What is a mouse-drag in the context of `Win32::Console`? - perl

What would constitute a mouse-drag when using the Input method from Win32::Console?
use Win32::Console qw(STD_INPUT_HANDLE ENABLE_MOUSE_INPUT);
my $con_in = Win32::Console->new(STD_INPUT_HANDLE);
$con_in->Mode(ENABLE_MOUSE_INPUT);
sub getch {
my ( $arg ) = #_;
my #event = $con_in->Input();
my $event_type = shift( #event );
if ( defined $event_type and $event_type == 2 ) {
my( $x, $x, $button_state, $control_key, $event_flags ) = #event;
my $button_drag = ?;
return handle_mouse( $x, $y, $button_state, $button_drag, $arg );
}
}
The getch on Linux looks like this:
sub getch {
my ( $arg ) = #_;
my $c = ReadKey 0;
if ( $c eq "\e" ) {
my $c = ReadKey 0.10;
# ...
if ( $c eq '[' ) {
my $c = ReadKey 0;
# ...
if ( $c eq 'M' ) {
# On button press, xterm sends CSI M C b C x C y (6 characters).
my $event_type = ord( ReadKey 0 ) - 32;
my $x = ord( ReadKey 0 ) - 32;
my $y = ord( ReadKey 0 ) - 32;
my $button_drag = ( $event_type & 0x20 ) >> 5;
my $button_pressed;
my $low3bits = $event_type & 0x03;
if ( $low3bits == 0x03 ) {
$button_pressed = 0;
} else {
if ( $event_type & 0x40 ) {
$button_pressed = $low3bits + 4;
} else {
$button_pressed = $low3bits + 1;
}
}
return handle_mouse( $x, $y, $button_pressed, $button_drag, $arg );
}
# ...
}
}
}

I've found something, but I'm not sure whether it is right.
use Win32::Console qw(STD_INPUT_HANDLE ENABLE_MOUSE_INPUT);
my $con_in = Win32::Console->new(STD_INPUT_HANDLE);
$con_in->Mode(ENABLE_MOUSE_INPUT);
sub getch {
my ( $arg ) = #_;
my #event = $con_in->Input();
my $event_type = shift( #event );
if ( defined $event_type and $event_type == 2 ) {
my( $x, $x, $button_state, $control_key, $event_flags ) = #event;
my $button_drag = 0;
# MOUSEEVENTF_MOVE => 0x0001
$button_drag = 1 if $event_flags & MOUSEEVENTF_MOVE;
return handle_mouse( $x, $y, $button_state, $button_drag, $arg );
}
}

Related

Which modern (post-5.10) trickery can be leveraged to make a Data::Dumper::Simple work-alike work?

Several dumpers exist that can show the names of variables without requiring the programmer to explicitely repeat the name.
› perl -MData::Dumper::Simple -e'my $foo = 42; print Dumper($foo)'
$foo = 42;
The trickery is a source filter (breaks often).
› perl -MDDS -e'my $foo = 42; DumpLex $foo'
$foo = 42;
The trickery is PadWalker.
They also work to some extent with variables of other types, but slices or other complex expressions are problematic.
Which modern (post-5.10) trickery can be leveraged to make the following example dumper (as in: data structure viewer, not eval-able code producer) work? The point of emphasis is to always print nice names, to accept multiple expressions, and no need for changing expressions with an extra reference level.
use 5.020; use Syntax::Construct qw(%slice);
use strictures;
use Acme::Hypothetical::Dumper 'd';
my %foo = (
Me => 'person',
You => 'beloved one',
Them => 'space aliens',
);
d %foo, $foo{'Me'}, #foo{qw(You Me)}, %foo{qw(You Me)};
# %foo = ('Me' => 'person', 'Them' => 'space aliens', 'You' => 'beloved one');
# $foo{'Me'} = 'person';
# #foo{qw(You Me)} = ('beloved one', 'person');
# %foo{qw(You Me)} = ('Me' => 'person', 'You' => 'beloved one');
my #bar = qw(Me You Them);
d #bar, $bar[0], #bar[2, 1], %bar[2, 1];
# #bar = ('Me', 'You', 'Them');
# $bar[0] = 'Me';
# #bar[2, 1] = ('Them', 'You');
# %bar[2, 1] = (2 => 'Them', 1 => 'You');
use LWP::UserAgent qw();
my $ua = LWP::UserAgent->new;
d $ua->{ssl_opts}{verify_hostname};
# $ua->{ssl_opts}{verify_hostname} = 1;
Whitespace in the output doesn't perfectly match your examples, but this is pretty close...
use v5.14;
use strict;
use warnings;
BEGIN {
package Acme::Hypothetical::Dumper;
use Keyword::Simple;
use PPR;
use Data::Dumper;
use B 'perlstring';
sub import {
my ( $class, $fname ) = ( shift, #_ );
$fname ||= 'd';
Keyword::Simple::define $fname => sub {
my $code = shift;
my ( #ws, #vars, #ws2 );
while ( $$code =~ / ^ ((?&PerlOWS)) ((?&PerlTerm)) ((?&PerlOWS)) $PPR::GRAMMAR /x ) {
my $len = length( $1 . $2 . $3 );
push #ws, $1;
push #vars, $2;
push #ws2, $3;
substr( $$code, 0, $len ) = '';
$$code =~ s/ ^ (?&PerlComma) $PPR::GRAMMAR //x;
}
my $newcode = perlstring( $class ) . '->d(';
while ( #vars ) {
my $var = shift #vars;
$newcode .= sprintf(
'%s%s,[%s],%s',
shift( #ws ),
perlstring( $var ),
$var,
shift( #ws2 ),
);
}
$newcode .= ');';
substr( $$code, 0, 0 ) = $newcode;
return;
};
}
our $OUTPUT = \*STDERR;
sub d {
my ( $class, #args ) = ( shift, #_ );
while ( #args ) {
my ( $label, $value ) = splice( #args, 0, 2 );
my $method = 'dump_list';
if ( $label =~ /^\$/ ) {
$method = 'dump_scalar';
$value = $value->[0];
}
elsif ( $label =~ /^\%/ ) {
$method = 'dump_hash';
}
printf { $OUTPUT } "%s = %s;\n", $label, $class->$method( $value );
}
}
sub dump_scalar {
my ( $class, $value ) = ( shift, #_ );
local $Data::Dumper::Terse = 1;
local $Data::Dumper::Indent = 0;
return Dumper( $value );
}
sub dump_list {
my ( $class, $value ) = ( shift, #_ );
my $dumped = $class->dump_scalar( $value );
$dumped =~ s/\[/(/;
$dumped =~ s/\]/)/;
return $dumped;
}
sub dump_hash {
my ( $class, $value ) = ( shift, #_ );
my $dumped = $class->dump_scalar( { #$value } );
$dumped =~ s/\{/(/;
$dumped =~ s/\}/)/;
return $dumped;
}
$INC{'Acme/Hypothetical/Dumper.pm'} = __FILE__;
};
use Acme::Hypothetical::Dumper 'd';
my %foo = (
Me => 'person',
You => 'beloved one',
Them => 'space aliens',
);
d %foo, $foo{'Me'}, #foo{qw(You Me)}, %foo{qw(You Me)};
my #bar = qw(Me You Them);
d #bar, $bar[0], #bar[2, 1], %bar[2, 1];
use LWP::UserAgent qw();
my $ua = LWP::UserAgent->new;
d $ua->{ssl_opts}{verify_hostname};

Perl invalid version format error

I am trying to finish a homework for my class. After I installed a API::Twitter module using CPAN, the Net::Twitter module seems to be affected and can no longer be used.
I tried to run this code in my Mac terminal:
song_hanlun_hw8.pl
#!/usr/bin/perl
use Net::Twitter;
use JSON;
use LWP::Simple;
use XML::Bare;
use Data::Dumper;
# keys for twitter
$consumer_key = "key";
$consumer_secret = "key";
$token = "key-key";
$token_secret = "key";
# keys for sentiment analysis
$apikey = "key";
# As of 13-Aug-2010, Twitter requires OAuth for authenticated requests
my $nt = Net::Twitter->new(
traits => [qw/API::RESTv1_1/],
# traits => [qw/API::Search/],
consumer_key => $consumer_key,
consumer_secret => $consumer_secret,
access_token => $token,
access_token_secret => $token_secret,
);
#enter a term you want to search for
$search_term = "Halloween";
$count = 100;
my $r = $nt->search( { q => $search_term, count => $count } );
# print Dumper $r;
for my $status ( #{ $r->{statuses} } ) {
push #tweets, $status->{text};
}
$nextMaxId = $r->{search_metadata}->{next_results};
$nextMaxId =~ s/\?max_id=//g;
$nextMaxId =~ s/&q=Halloween&count=100&include_entities=1//g;
# print $nextMaxId;
my $r2 = $nt->search( { q => $search_term, count => $count, max_id => $nextMaxId } );
for my $status ( #{ $r2->{statuses} } ) {
push #tweets, $status->{text};
}
foreach $tweet ( #tweets ) {
# $tweet = $tweets[1];
$return = get "http://gateway-a.watsonplatform.net/calls/text/TextGetTextSentiment?apikey=$apikey&text=$tweet&outputMode=xml";
$bare = new XML::Bare( text => $return );
$root = $bare->parse();
$sentiment = $root->{results}->{docSentiment}->{score}->{value};
push #sentiments, $sentiment;
# print "$sentiment";
}
$m100to75 = 0;
$m75to50 = 0;
$m50to25 = 0;
$m25to0 = 0;
$p0to25 = 0;
$p25to50 = 0;
$p50to75 = 0;
$p75to100 = 0;
foreach $sent ( #sentiments ) {
# smaller than zero. four ranges.
if ( $sent >= -1 && $sent < -0.75 ) {
$m100to75++;
}
if ( $sent >= -0.75 && $sent < -0.5 ) {
$m75to50++;
}
if ( $sent >= -0.5 && $sent < -0.25 ) {
$m50to25++;
}
if ( $sent >= -0.25 && $sent < 0 ) {
$m25to0++;
}
#bigger than zero. four ranges.
if ( $sent >= 0 && $sent < 0.25 ) {
$p0to25++;
}
if ( $sent >= 0.25 && $sent < 0.5 ) {
$p25to50++;
}
if ( $sent >= 0.5 && $sent < 0.75 ) {
$p50to75++;
}
if ( $sent >= 0.75 && $sent < 1 ) {
$p75to100++;
}
}
# print "$m100to75\n$m75to50\n$m50to25\n$m25to0\n$p0to25\n$p25to50\n$p50to75\n$p75to100\n";
print "tweets sentiment score summary histogram:\n";
print "-1.00 to -0.75: ";
for ( $i = 0 ; $i < $m100to75 ; $i++ ) {
print "*";
}
print "\n";
print "-0.75 to -0.50: ";
for ( $i = 0 ; $i < $m75to50 ; $i++ ) {
print "*";
}
print "\n";
print "-0.50 to -0.25: ";
for ( $i = 0 ; $i < $m50to25 ; $i++ ) {
print "*";
}
print "\n";
print "-0.25 to -0.00: ";
for ( $i = 0 ; $i < $m25to0 ; $i++ ) {
print "*";
}
print "\n";
print "+0.00 to +0.25: ";
for ( $i = 0 ; $i < $p0to25 ; $i++ ) {
print "*";
}
print "\n";
print "+0.25 to +0.50: ";
for ( $i = 0 ; $i < $p25to50 ; $i++ ) {
print "*";
}
print "\n";
print "+0.50 to +0.75: ";
for ( $i = 0 ; $i < $p50to75 ; $i++ ) {
print "*";
}
print "\n";
print "+0.75 to +1.00: ";
for ( $i = 0 ; $i < $p75to100 ; $i++ ) {
print "*";
}
print "\n";
But the terminal returns
Invalid version format (version required) at /Library/Perl/5.18/Module/Runtime.pm line 386.
BEGIN failed--compilation aborted at /Library/Perl/5.18/Net/Twitter.pm line 3.
Compilation failed in require at song_hanlun_hw8.pl line 3.
BEGIN failed--compilation aborted at song_hanlun_hw8.pl line 3.
Could someone tell me how to fix this?
After some Google search I highly believe downgrading the Module::Runtime would solve this issue. But I could't find how.
Eventually I solved this issue by delete the Runtime module file in perl folder and reinstalled the Net::Twitter module, which also reinstalled the Runtime that Net::Twitter depends on.

perl blowing up in sequence alignment by dynamic programming

I'm comparing a reference sequence of size 5500 bases and query sequence of size 3600, using dynamic programming (semi global alignment), in fact I don't know much about complexity and performance and the code is blowing up and giving me the error "out of memory". Knowing that it works normally on smaller sequences, my question is: This behavior is normal or I might have another problem in code ?if it's normal any hint to solve this problem ? Thanks in advance.
sub semiGlobal {
my ( $seq1, $seq2,$MATCH,$MISMATCH,$GAP ) = #_;
# initialization: first row to 0 ;
my #matrix;
$matrix[0][0]{score} = 0;
$matrix[0][0]{pointer} = "none";
for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
$matrix[0][$j]{score} = 0;
$matrix[0][$j]{pointer} = "none";
}
for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
$matrix[$i][0]{score} = $GAP * $i;
$matrix[$i][0]{pointer} = "up";
}
# fill
my $max_i = 0;
my $max_j = 0;
my $max_score = 0;
print "seq2: ".length($seq2);
print "seq1: ".length($seq1);
for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
my ( $diagonal_score, $left_score, $up_score );
# calculate match score
my $letter1 = substr( $seq1, $j - 1, 1 );
my $letter2 = substr( $seq2, $i - 1, 1 );
if ( $letter1 eq $letter2 ) {
$diagonal_score = $matrix[ $i - 1 ][ $j - 1 ]{score} + $MATCH;
}
else {
$diagonal_score = $matrix[ $i - 1 ][ $j - 1 ]{score} + $MISMATCH;
}
# calculate gap scores
$up_score = $matrix[ $i - 1 ][$j]{score} + $GAP;
$left_score = $matrix[$i][ $j - 1 ]{score} + $GAP;
# choose best score
if ( $diagonal_score >= $up_score ) {
if ( $diagonal_score >= $left_score ) {
$matrix[$i][$j]{score} = $diagonal_score;
$matrix[$i][$j]{pointer} = "diagonal";
}
else {
$matrix[$i][$j]{score} = $left_score;
$matrix[$i][$j]{pointer} = "left";
}
}
else {
if ( $up_score >= $left_score ) {
$matrix[$i][$j]{score} = $up_score;
$matrix[$i][$j]{pointer} = "up";
}
else {
$matrix[$i][$j]{score} = $left_score;
$matrix[$i][$j]{pointer} = "left";
}
}
# set maximum score
if ( $matrix[$i][$j]{score} > $max_score ) {
$max_i = $i;
$max_j = $j;
$max_score = $matrix[$i][$j]{score};
}
}
}
my $align1 = "";
my $align2 = "";
my $j = $max_j;
my $i = $max_i;
while (1) {
if ( $matrix[$i][$j]{pointer} eq "none" ) {
$stseq1 = $j;
last;
}
if ( $matrix[$i][$j]{pointer} eq "diagonal" ) {
$align1 .= substr( $seq1, $j - 1, 1 );
$align2 .= substr( $seq2, $i - 1, 1 );
$i--;
$j--;
}
elsif ( $matrix[$i][$j]{pointer} eq "left" ) {
$align1 .= substr( $seq1, $j - 1, 1 );
$align2 .= "-";
$j--;
}
elsif ( $matrix[$i][$j]{pointer} eq "up" ) {
$align1 .= "-";
$align2 .= substr( $seq2, $i - 1, 1 );
$i--;
}
}
$align1 = reverse $align1;
$align2 = reverse $align2;
return ( $align1, $align2, $stseq1 ,$max_j);
}
One way to possibly solve the problem is to tie the #matrix with a file. However, this will dramatically slow down the program. Consider this:
sub semiGlobal {
use Tie::Array::CSV;
tie my #matrix, 'Tie::Array::CSV', 'temp.txt'; # Don't forget to add your own error handler.
my ( $seq1, $seq2,$MATCH,$MISMATCH,$GAP ) = #_;
# initialization: first row to 0 ;
$matrix[0][0] = '0 n';
for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
$matrix[0][$j] = '0 n';
}
for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
my $score = $GAP * $i;
$matrix[$i][0] = join ' ',$score,'u';
}
#print Dumper(\#matrix);
# fill
my $max_i = 0;
my $max_j = 0;
my $max_score = 0;
print "seq2: ".length($seq2)."\n";
print "seq1: ".length($seq1)."\n";
for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
my ( $diagonal_score, $left_score, $up_score );
# calculate match score
my $letter1 = substr( $seq1, $j - 1, 1 );
my $letter2 = substr( $seq2, $i - 1, 1 );
my $score = (split / /, $matrix[ $i - 1 ][ $j - 1 ])[0];
if ( $letter1 eq $letter2 ) {
$diagonal_score = $score + $MATCH;
}
else {
$diagonal_score = $score + $MISMATCH;
}
# calculate gap scores
$up_score = (split / /,$matrix[ $i - 1 ][$j])[0] + $GAP;
$left_score = (split / /,$matrix[$i][ $j - 1 ])[0] + $GAP;
# choose best score
if ( $diagonal_score >= $up_score ) {
if ( $diagonal_score >= $left_score ) {
$matrix[$i][$j] = join ' ',$diagonal_score,'d';
}
else {
$matrix[$i][$j] = join ' ', $left_score, 'l';
}
}
else {
if ( $up_score >= $left_score ) {
$matrix[$i][$j] = join ' ', $up_score, 'u';
}
else {
$matrix[$i][$j] = join ' ', $left_score, 'l';
}
}
# set maximum score
if ( (split / /, $matrix[$i][$j])[0] > $max_score ) {
$max_i = $i;
$max_j = $j;
$max_score = (split / /, $matrix[$i][$j])[0];
}
}
}
my $align1 = "";
my $align2 = "";
my $stseq1;
my $j = $max_j;
my $i = $max_i;
while (1) {
my $pointer = (split / /, $matrix[$i][$j])[1];
if ( $pointer eq "n" ) {
$stseq1 = $j;
last;
}
if ( $pointer eq "d" ) {
$align1 .= substr( $seq1, $j - 1, 1 );
$align2 .= substr( $seq2, $i - 1, 1 );
$i--;
$j--;
}
elsif ( $pointer eq "l" ) {
$align1 .= substr( $seq1, $j - 1, 1 );
$align2 .= "-";
$j--;
}
elsif ( $pointer eq "u" ) {
$align1 .= "-";
$align2 .= substr( $seq2, $i - 1, 1 );
$i--;
}
}
$align1 = reverse $align1;
$align2 = reverse $align2;
untie #matrix; # Don't forget to add your own error handler.
unlink 'temp.txt'; # Don't forget to add your own error handler.
return ( $align1, $align2, $stseq1 ,$max_j);
}
You can still use your original sub for short sequences, and switch to this sub for long ones.
I think that #j_random_hacker and #Ashalynd are on the right track regarding using this algorithm in most Perl implementations. The datatypes you're using are going to use more memory that absolutely needed for the calculations.
So this is "normal" in that you should expect to see this kind of memory usage for how you've written this algorithm in perl. You may have other problems in surrounding code that are using a lot of memory but this algorithm will hit your memory hard with large sequences.
You can address some of the memory issues by changing the datatypes that you're using as #Ashalynd suggests. You could try changing the hash which holds score and pointer into an array and changing the string pointers into integer values. Something like this might get you some benefit while still maintaining readability:
use strict;
use warnings;
# define constants for array positions and pointer values
# so the code is still readable.
# (If you have the "Readonly" CPAN module you may want to use it for constants
# instead although none of the downsides of the "constant" pragma apply in this code.)
use constant {
SCORE => 0,
POINTER => 1,
DIAGONAL => 0,
LEFT => 1,
UP => 2,
NONE => 3,
};
...
sub semiGlobal2 {
my ( $seq1, $seq2,$MATCH,$MISMATCH,$GAP ) = #_;
# initialization: first row to 0 ;
my #matrix;
# score and pointer are now stored in an array
# using the defined constants as indices
$matrix[0][0][SCORE] = 0;
# pointer value is now a constant integer
$matrix[0][0][POINTER] = NONE;
for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
$matrix[0][$j][SCORE] = 0;
$matrix[0][$j][POINTER] = NONE;
}
for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
$matrix[$i][0][SCORE] = $GAP * $i;
$matrix[$i][0][POINTER] = UP;
}
... # continue to make the appropriate changes throughout the code
However, when I tested this I didn't get a huge benefit when attempting to align a 3600 char string in a 5500 char string of random data. I programmed my code to abort when it consumed more than 2GB of memory. The original code aborted after 23 seconds while the one using constants and an array instead of a hash aborted after 32 seconds.
If you really want to use this specific algorithm I'd check out the performance of Algorithm::NeedlemanWunsch. It doesn't look like it's very mature but it may have addressed your performance issues. Otherwise look into writing an Inline or Perl XS wrapper around a C implementation

Clean way to write OLE code in Perl?

I'm working with automating the creation of a Word document in Perl by means of Win32::OLE. My current code looks like this, and it's leaving instances of WINWORD.EXE in memory:
my $range = $select->Range;
my $table = $doc->Tables->Add( $range, scalar #rows, scalar #{ $rows[0] } );
for my $rownum ( 0 .. $#rows ) {
for my $colnum ( 0 .. $#{ $rows[$rownum] } ) {
my #cellpos = ( $rownum + 1, $colnum + 1 );
my $data = $rows[$rownum][$colnum];
$table->Cell(#cellpos)->Range->{'Text'} = $data;
1;
}
}
However, if I were to refactor my code per the Microsoft recommendation for Visual Studio .NET, it would look like this:
my $range = $select->Range;
my $tables = $doc->Tables;
my $table = $tables->Add( $range, scalar #rows, scalar #{ $rows[0] } );
for my $rownum ( 0 .. $#rows ) {
for my $colnum ( 0 .. $#{ $rows[$rownum] } ) {
my #cellpos = ( $rownum + 1, $colnum + 1 );
my $data = $rows[$rownum][$colnum];
my $cell = $table->Cell(#cellpos);
my $cell_range = $cell->Range;
$cell_range->{'Text'} = $data;
}
}
That code does the job, but it's awfully "noisy" to my mind. Is there a cleaner way to do this?
It can be improved marginally. There is no need for the #cellpos and $data variables, and it is tidier to extract a reference to the current element of #rows for use within the inner loop.
my $range = $select->Range;
my $tables = $doc->Tables;
my $table = $tables->Add( $range, scalar #rows, scalar #{ $rows[0] } );
for my $rownum ( 0 .. $#rows ) {
my $cols = $rows[$rownum];
for my $colnum ( 0 .. $#$cols ) {
my $cell = $table->Cell($rownum + 1, $colnum + 1);
my $cell_range = $cell->Range;
$cell_range->{Text} = $cols->[$colnum];
}
}

How to get all feature in a range from a GFF3 file in Perl?

I would like to write a Perl function that gets a GFF3 filename and a range (i.e. 100000 .. 2000000). and returns a reference to an array containing all names/accessions of genes found in this range.
I guess using bioperl will make sense, but I have very little experience with it. I can write a script that parses a GFF3 by my self, but if using bioperl (or another packagae) is not too complicated - I'd rather reuse their code.
use Bio::Tools::GFF;
my $range_start = 100000;
my $range_end = 200000;
my #features_in_range = ( );
my $gffio = Bio::Tools::GFF->new(-file => $gff_file, -gff_version => 3);
while (my $feature = $gffio->next_feature()) {
## What about features that are not contained within the coordinate range but
## do overlap it? Such features won't be caught by this check.
if (
($feature->start() >= $range_start)
&&
($feature->end() <= $range_end)
) {
push #features_in_range, $feature;
}
}
$gffio->close();
DISCLAIMER: Naive implementation. I just banged that out, it's had no testing. I won't even guarantee it compiles.
You do want to use BioPerl for this, using possibly the Bio::Tools::GFF module.
You should really ask on the BioPerl mailing list. It's very friendly and the subscribers are very knowledgeable -- they'll definitely be able to help you. And once you do get an answer (and if you don't get one here first), I suggest answering your own question here with the answer so we can all benefit!
The following function takes a hash of targets and ranges and returns a function that will iterate over all targets that overlap any of the ranges. The targets should be a reference to an array of references:
my $targets =
[
[
$start,
$end,
],
...,
]
The ranges should be a reference to an array of hashes:
my $ranges =
[
{
seqname => $seqname,
source => $source,
feature => $feature,
start => $start,
end => $end,
score => $score,
strand => $strand,
frame => $frame,
attribute => $attribute,
},
...,
]
You can, of course, just pass a single target.
my $brs_iterator
= binary_range_search( targets => $targets, ranges => $ranges );
while ( my $gff_line = $brs_iterator->() ) {
# do stuff
}
sub binary_range_search {
my %options = #_;
my $targets = $options{targets} || croak 'Need a targets parameter';
my $ranges = $options{ranges} || croak 'Need a ranges parameter';
my ( $low, $high ) = ( 0, $#{$ranges} );
my #iterators = ();
TARGET:
for my $range (#$targets) {
RANGE_CHECK:
while ( $low <= $high ) {
my $try = int( ( $low + $high ) / 2 );
$low = $try + 1, next RANGE_CHECK
if $ranges->[$try]{end} < $range->[0];
$high = $try - 1, next RANGE_CHECK
if $ranges->[$try]{start} > $range->[1];
my ( $down, $up ) = ($try) x 2;
my %seen = ();
my $brs_iterator = sub {
if ( $ranges->[ $up + 1 ]{end} >= $range->[0]
and $ranges->[ $up + 1 ]{start} <= $range->[1]
and !exists $seen{ $up + 1 } )
{
$seen{ $up + 1 } = undef;
return $ranges->[ ++$up ];
}
elsif ( $ranges->[ $down - 1 ]{end} >= $range->[0]
and $ranges->[ $down - 1 ]{start} <= $range->[1]
and !exists $seen{ $down - 1 }
and $down > 0 )
{
$seen{ $down - 1 } = undef;
return $ranges->[ --$down ];
}
elsif ( !exists $seen{$try} ) {
$seen{$try} = undef;
return $ranges->[$try];
}
else {
return;
}
};
push #iterators, $brs_iterator;
next TARGET;
}
}
# In scalar context return master iterator that iterates over the list of range iterators.
# In list context returns a list of range iterators.
return wantarray
? #iterators
: sub {
while (#iterators) {
if ( my $range = $iterators[0]->() ) {
return $range;
}
shift #iterators;
}
return;
};
}