Start time always getting same in Perl for my script - perl

I have made a script to extract the content of log files and to calculate the time difference if the task is complete.
Suppose I have four jobs and each job has thre individual tasks, so far I need the start of each task, and just print it.
Everything is fine except when I try to initialise to make it convenient, by using $j, $l which are used as sort of two-dimensional array.
The problem is at the output where I get the same "Started at" for each job.
The values of $counter and $l should be the root cause.
Can anyone help? I tried my best and am sort of newbie.
sub getdate {
my $line = $_[0];
($hrs, $min) = split(':', $line, 3);
return $hrs, $min;
}
print FILE "<html><head>\n";
print FILE "<title>CGI Test</title>\n";
print FILE "</head>\n";
print FILE "<body>";
print FILE "<font size=\"5\" color=\"#008080\" face=\"Tahoma\"><b><u><br>";
print FILE "PBI Batch for 22/02/2013";
print FILE "</font></b></u><br><br><br>";
my $i = 0;
my $j = 0;
my $l = 0;
my #sample;
#print FILE "<h4>";
foreach $header (<COLLECTION>) {
chomp($header);
($heading, $filepath) = split(',', $header);
#$two[$i]="<font size=\"3\"color=\"#008000\" face=\"Tahoma\"><b><u><br>";
#$two[$i]="<font size=\"3\" color=".$color." face=\"Tahoma\"><b><u><br>";
$two[$i] .= $heading;
#$two[$i] .= "</font></u></b><br>";
#print FILE "<font size=\"3\" color=\"#008000\" face=\"Tahoma\"><b><u><br>";
# print FILE $two[$i];
#print FILE $heading;
#print FILE "</font></u></b><br>";
#print $filepath."\n";
open(MYFILE1, $filepath) or die 'Could nont openfile';
my $counter;
foreach $list (<MYFILE1>) {
chomp($list);
($file, $path) = split(',', $list);
#print FILE $file;
my #secondstart;
my #secondend;
my $secondcounter = 0;
#print FILE "valueofllllllllllllllllllllllllllll".$l;
foreach $counter ($file) {
print FILE "valueofllllllllllllllllllllllllllll" . $l;
$l++;
$sample[$j][$l] = $counter;
print FILE "secCOUNTER " . $secondcounter;
$secondcounter++;
}
print FILE " space";
open(MYFILE, $path) or die 'ERRROR';
my $count = 0;
foreach $line (<MYFILE>) {
my #endtime;
$flag = 'false';
#$counter++;
$count++;
print FILE $count . "========";
if ($count == 1) {
($hrs, $min) = getdate($line);
$starttime[$j][$l] = ($hrs * 60) + $min;
}
else {
($hrs, $min) = split(':', $line, 3);
if ($line =~ m/End of Procedure/) {
$flag = 'true';
$endtime[$j][$l] = $hrs . $min;
$endtime[$j][$l] = ($hrs * 60) + $min;
}
else {
$endtime[$j][$l] = ($hrs * 60) + $min;
}
}
$duration[$j][$l] = $endtime[$j][$l] - $starttime[$j][$l];
}
# print $flag;
#print FILE $file." : ";
#print FILE "value of ".$j."and".$l;
$startstatus[$j][$l] = "Started at" . $starttime[$j][$l];
$durationstatus[$j][$l] = "&nbspDuration is " . $duration[$j][$l] . "m";
# print FILE "Started at".$starttime;
# print FILE "&nbspDuration is ".$duration."m";
# print FILE "<br>";
close(MYFILE);
}
my $valueofl = $l;
#print FILE "vlaeeofl".$valueofl;
print "valueofllllllllllllllllllllllllllll" . $l;
$l = 0;
if ($flag eq 'true') {
$status = 'Completed';
$color = '#008000';
print FILE "<font size=\"3\" color="
. $color
. " face=\"Tahoma\"><b><u><br>"
. $two[$i]
. "</font></u></b><br>";
print FILE $status . "<br>";
while ($l <= $valueofl) {
#print $j."and".$l;
# print "valueofllllllllllllllllllllllllllll".$l;
print FILE $sample[$j][$l] . "&nbsp&nbsp&nbsp&nbsp";
print FILE $startstatus[$j][$l] . "&nbsp&nbsp&nbsp&nbsp";
print FILE $durationstatus[$j][$l] . "<br>";
$l++;
}
# print FILE $startstatus[$j][0];
# print FILE $durationstatus[$j][0];
}
else {
#print "valueofllllllllllllllllllllllllllll".$l;
#print $j."and".$l;
$status = 'In Progress';
$color = 'blue';
print FILE "<font size=\"3\" color="
. $color
. " face=\"Tahoma\"><b><u><br>"
. $two[$i]
. "</font></u></b><br>"
. $status;
}
$i++;
$j++;
}
print FILE "</body>";
print FILE "</html>";
close(FILE);
close(MYFILE1)

This is a shocking piece of Perl. You must always start you program with use strict and use warnings, and declare all variables as close as possible to their first point of use using my. That is the most basic form of debugging, and it is only polite to do this at the very least before asking other people for help.
The problem is likely to lie in your for statement
foreach $counter ($file) { ... }
which will execute the body of the loop just once, with $content set to the value of $file. I can't imagine what you meant it to do.

Related

Perl Script is giving error uninialized varilable access

Code runs sometimes, sometimes gives error on linux host.
Need to check why has is not printing,
Error, messages: Use of uninitialized value in sprintf at ./fa_list.pl line 139, line
Can someone check, why I'm getting error?
use Getopt::Long;
my $sid = '9999';
my $Fa_VSan_Map = 'Fa_VSan_Map';
sub usage {
my $message = $_[0];
if (defined $message && length $message) {
$message .= "\n"
unless $message =~ /\n$/;
}
my $command = $0;
$command =~ s#^.*/##;
print STDERR (
$message,
"usage: $command -sid xxx -outf FA_Mapping\n" .
"Where -sid: is primary SID to show mappings.\n" .
" -outf: Output File prefix.\n" .
" -Reserved...\n"
);
die("\n")
}
GetOptions( 'sid=i' => \$sid, 'outf=s' => \$Fa_VSan_Map) or
usage("Invalid commmand line options.");
print($sid);
my $outf = "$Fa_VSan_Map$sid.csv";
my $outf1 = "Fa_VSan_Map1$sid.csv";
my ($mydir,$dir_port,$dir_port_wwpn,$FaWWPN);
my (%FA,%FAH,%FAC,%VSAN);
my ($wwpn,$host,$port,$fcid,$logged,$fab);
# 50:00:09:72:08:4b:05:89, => cdc02-core1-1.yyyyy.xxxx.com,CISCO,fc3/12,VS251,50:00:09:72:08:4b:05:89,,8,Active
# cdc02-core-1-2.yyyyy.xxxx.com,CISCO,fc1/29,VS251,50:00:09:73:00:1c:e1:1c,,8,Active
sub LoadVSAN()
{
my $vsanf = "VSAN$sid.csv";
print ($vsanf);
open (VSAN, "<", $vsanf) or die "Could not open $!";
while (<VSAN>) {
if (/Active/) {
my #array = split /,/;
print (#array);
my $key = $array[4];
$key =~ s/://g;
my #line_arranged = ($array[3],$array[2],$array[0],$array[6],$array[7]);
$VSAN{$key} = \#line_arranged;
print($key, ": ", #{$VSAN{$key}}, "\n");
}
}
close VSAN;
}
LoadVSAN;
# foreach my $key (%VSAN) {
# print(${VSAN{$key}}[0]); print("\n");
# ${$VSAN{$FaWwpn}}[0]
# }
open (OUT, ">", $outf) or die "Could not open $outf $!";
open( OUT1, ">",$outf1) or die "Could not open $outf1 $!";
my $sidtxt = "sidcfg.fa$sid.txt";
my $cmd = 'symcfg -sid ' . $sid . ' list -fa all -v > ' . $sidtxt;
system($cmd);
open ( SYM, "<" , $sidtxt ) or die "Could not open $sidtxt $!";
while ( <SYM>) {
chomp ;
if (/Director Identification:/) {
$mydir = $_;
$mydir =~ s/\s+Director Identification: //;
$mydir =~ s/FA-//;
}
elsif (/Director Port:/) {
$port = $_;
$port =~ s/\s+Director Port: //;
$dir_port = sprintf '%04d_%03s_%03d', int($sid), $mydir, int($port);
}
elsif (/WWN Port Name/) {
$wwpn = $_;
$wwpn =~ s/\s+WWN Port Name\s+: //;
$dir_port_wwpn = sprintf '%s,%s', $dir_port, $wwpn;
$FA{$dir_port} = $wwpn;
}
}
close(SYM);
$sidtxt = 'symaccess.ll.' . $sid . '.txt';
$cmd = 'symaccess -sid ' . $sid . ' list logins > ' . $sidtxt;
#print($cmd);
system($cmd );
open ( SYM, "<" , $sidtxt ) or die "Could not open $sidtxt $!";
while ( <SYM>) {
chomp ;
if (/Director Identification/) {
$mydir = $_;
$mydir =~ s/Director Identification\s+:\s+//;
$mydir =~ s/FA-//;
}
elsif (/Director Port/) {
$port = $_;
$port =~ s/Director Port\s+:\s+//;
$dir_port = sprintf '%04d_%03s_%03d', int($sid),$mydir, int($port);
}
elsif (/Fibre/) {
($wwpn,undef, $host,$port,$fcid,$logged,$fab) = split;
my $host_port;
if( lc($host) eq 'null') {
$host_port = substr($wwpn,10,6);
}
else {
$host_port = $host . '_' . $port . '_' . substr($wwpn,12,4);
}
if (exists $FAH{$dir_port}) {
$FAH{$dir_port} .= ':' . $host_port;
$FAC{$dir_port} += 1;
} else {
$FAH{$dir_port} = $host_port;
$FAC{$dir_port} = 1;
}
if ( $logged eq "Yes") {
my $line = sprintf ( '%s,%s,%s,%s', $dir_port, $FA{$dir_port}, $host_port, $fcid);
print (OUT1 $line . "\n");
}
}
}
print OUT "Fa,FaWWPN,VSan,HostCount,PERCENT_BUSY,HostNames\n";
my $PERCENT_BUSY=10.0;
foreach my $fa ( keys %FAC) {
my $formula = '=VLOOKUP(B2,Sheet1!A$2:F$600,6,FALSE)';
my $FaWwpn = lc($FA{$fa});
#print($FaWwpn . ": " . $VSAN{$FaWwpn}->[0] . "\n" );
## Below is line 139
my $line = sprintf ('%s,%s,%s,%s,%3.2f,%s', $fa, $FaWwpn, ${$VSAN{$FaWwpn}}[0], $FAC{$fa}, $PERCENT_BUSY, lc($FAH{$fa}));
print OUT $line . "\n";
#print $line . "\n";
}
close(SYM);
I believe there is problem with lc($FAH{$fa}).
have you checked you initialized $FAH in your code ?

stockholm to fasta format - include accession id in every header

Hello I've multiple sequences in stockholm format, at the top of every alignment there is a accession ID, for ex: '#=GF AC PF00406' and '//' --> this is the end of the alignment. When I'm converting the stockholm format to fasta format I need PF00406 in the header of every sequence of the particular alignment. Some times there will be multiple stockholm alignments in one file. I tried to modify the following perl script, it gave me bizarre results, any help will be greatly appreciated.
my $columns = 60;
my $gapped = 0;
my $progname = $0;
$progname =~ s/^.*?([^\/]+)$/$1/;
my $usage = "Usage: $progname [<Stockholm file(s)>]\n";
$usage .= " [-h] print this help message\n";
$usage .= " [-g] write gapped FASTA output\n";
$usage .= " [-s] sort sequences by name\n";
$usage .= " [-c <cols>] number of columns for FASTA output (default is $columns)\n";
# parse cmd-line opts
my #argv;
while (#ARGV) {
my $arg = shift;
if ($arg eq "-h") {
die $usage;
} elsif ($arg eq "-g") {
$gapped = 1;
} elsif ($arg eq "-s"){
$sorted = 1;
} elsif ($arg eq "-c") {
defined ($columns = shift) or die $usage;
} else {
push #argv, $arg;
}
}
#ARGV = #argv;
my %seq;
while (<>) {
next unless /\S/;
next if /^\s*\#/;
if (/^\s*\/\//) { printseq() }
else {
chomp;
my ($name, $seq) = split;
#seq =~ s/[\.\-]//g unless $gapped;
$seq{$name} .= $seq;
}
}
printseq();
sub printseq {
if($sorted){
foreach $key (sort keys %seq){
print ">$key\n";
for (my $i = 0; $i < length $seq{$key}; $i += $columns){
print substr($seq{$key}, $i, $columns), "\n";
}
}
} else{
while (my ($name, $seq) = each %seq) {
print ">$name\n";
for (my $i = 0; $i < length $seq; $i += $columns) {
print substr ($seq, $i, $columns), "\n";
}
}
}
%seq = ();
}
Depending on the how much variation there is in the line with the accessionID, you might need to modify the regex, but this works for your example file
my %seq;
my $aln;
while (<>) {
if ($_ =~ /#=GF AC (\w+)/) {
$aln = $1;
}
elsif ($_ =~ /^\s*\/\/\s*$/){
$aln = '';
}
next unless /\S/;
next if /^\s*\#/;
if (/^\s*\/\//) { printseq() }
else {
chomp;
my ($name, $seq) = split;
$name = $name . ' ' . $aln;
$seq{$name} .= $seq;
}
}
printseq();

perl variable scope using while loop

I have an ftp upload from my client machine to my server running consistently as a means of backup, occasionally if the connection becomes corrupt the upload will stall, the solution to this is to remove the "corrupt file" from the server, then the client resumes and the file is uploaded next time the client runs. This script is to remove the file if it has never occured before, or check the time stamp if it has been deleted in the past and check that this is a new occurence. Then delete if required.
the line in the logfile will be like:
Sun May 11 02:38:46 2010 [pid 17116] [ftp] FAIL UPLOAD: Client "192.168.179.58", "/Dan/Example.file", 0.00Kbyte/sec
and once written to the filelist it looks like this:
Sun May 11 02:38:46 - /Dan/Example.file
Below you can see where the scope problem lies within the read_filelist() sub-routine.
Please see the solution so far:
#!/usr/bin/perl
use warnings;
use strict;
use Data::Dumper qw(Dumper);
#open /var/log/vsftpd.log read only, and /var/log/vsftpd.log R/W + append
open my $logfile, '<', '/var/log/vsftpd.log' # 3 arg open is safer
or die "could not open file: $!"; # checking for errors is good
open my $filelist, '+<', '/scripts/filelist'
or die "could not open file: $!";
my #rid;
my #filename;
my #deletedfile;
my $int = -1;
my #time;
my #hourcompare;
my #splittime;
my #filelisttime;
my #splitfilelisttime;
my #filelistfile;
my #filelistarray;
my $fileexists = 0;
#Define read_filelist()
sub read_filelist{
my ($filename, $hour, $min, $sec, $filelist) = #_;
while (<$filelist>){
#filelisttime = split /\s+/, $_;
#splitfilelisttime = split /:/, $filelisttime[3];
#filelistfile = split /\s+-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
if ($filelistfile[2] eq $filename){
my $fileexists = 1;
if ($hour >= $fhour){
if($min >= $fmin){
if($sec > $fsec){
system ("rm", "-fv", "/home/desktop"."$filename");
}
}
}
}
}
}
#open vsftp log and look for lines that include "FAIL UPLOAD" print those lines to a file
while (<$logfile>) {
$int = $int + 1;
if (index($_, "FAIL UPLOAD:") != -1){
#rid = split /\s+"/, $_;
#filename = split /",/, $rid[2];
#time = split /\s+201/, $rid[0];
}
$deletedfile[$int] = $filename[0];
if ($filename[0] ne $deletedfile[$int-1]){
print $filelist $time[0]." - ".$filename[0]."\n";
}
#convert the timestamp into integers for comparison
#hourcompare = split /\s+/, $time[0];
#splittime = split /:/, $hourcompare[3];
my $sec = $splittime[2]+0;
my $min = $splittime[1]+0;
my $hour = $splittime[0]+0;
#itterate through '/scripts/filelist'
read_filelist($filename[0], $hour, $min, $sec, $filelist);
if ($fileexists = 0){
system ("rm", "-fv", "/home/desktop"."$filename[0]");
}
}
close $filelist;
close $logfile;
the variables pass to the read_filelist() sub no problem, but when I start the while() loop all passed variables become uninitialized:
sub read_filelist {
my ($filename, $hour, $min, $sec, $filelist) = #_;
while (<$filelist>) {
#filelisttime = split /\s+/, $_;
#splitfilelisttime = split /:/, $filelisttime[3];
#filelistfile = split /\s+-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
if ($filelistfile[2] eq $filename) {
my $fileexists = "T";
if ($hour >= $fhour) {
if($min >= $fmin) {
if($sec > $fsec) {
system ("rm", "-fv", "/home/desktop"."$filename");
}
}
}
}
print "log: " . "$hour" . ":" . "$min" . ":" . "$sec" . "\n";
print "file: " . "$fhour" . ":" . "$fmin" . ":" . "$fsec" . "\n";
print "$filename" . "\n";
}
}
read_filelist($filename[0], $hour, $min, $sec, $filelist);
This returns the following:
Use of uninitialized value in string eq at removefailed.pl line 39, <$filelist> line 1.
Use of uninitialized value $filename in string eq at removefailed.pl line 39, <$filelist> line 1.
log: 0:0:0
file: 2:38:46
Use of uninitialized value $filename in string at removefailed.pl line 52, <$filelist> line 1.
However if I move the prints outside of the while loop it works, but obviously I can only compare them with the last line of the filelist.
sub read_filelist {
my ($filename, $hour, $min, $sec, $filelist) = #_;
print "log: " . "$hour" . ":" . "$min" . ":" . "$sec" . "\n";
while (<$filelist>) {
#filelisttime = split /\s+/, $_;
#splitfilelisttime = split /:/, $filelisttime[3];
#filelistfile = split /\s+-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
if ($filelistfile[2] eq $filename) {
my $fileexists = "T";
if ($hour >= $fhour) {
if($min >= $fmin) {
if($sec > $fsec) {
system ("rm", "-fv", "/home/desktop"."$filename");
}
}
}
}
print "file: " . "$fhour" . ":" . "$fmin" . ":" . "$fsec" . "\n";
}
print "$filename" . "\n";
}
read_filelist($filename[0], $hour, $min, $sec, $filelist);
I get the following output:
file: 2:38:46
log: 2:38:46
/Dan/Example.file
Any help with this would be much appreciated, please let me know if you need any further information?
I have solved this problem using Hash's. I think this was caused because the filelist was already open in the logfile read.
Anyhow I created a global Hash:
my %logfilelines;
passed all the assorted relevant lines to it from the logfile:
$logfilelines{$filename[0].":".$hour.":".$min.":".$sec}++
Then within the read_file() sub I run through %logfilelines; and compare the filename\ time etc. I will have to rebuild the time comparison as it is wrong but atleast I am making progress now. see the new subroutine below in case you are curious:
sub read_filelist{
#my ($filename, $hour, $min, $sec, $filelist) = #_;
my $fint = -1;
my #filelines;
my #filelistlines;
foreach my $line (keys %logfilelines) {
open my $filelist2, '<', 'c:\scripts\filelist'
or die "could not open file: $!";
$fint = $fint + 1;
$filelines[$fint] = $line;
#filelistlines = split /:/, $filelines[$fint];
my $filename = $filelistlines[0];
my $hour = $filelistlines[1]+0;
my $min = $filelistlines[2]+0;
my $sec = $filelistlines[3]+0;
while (<$filelist2>){
my #filelisttime = split /\s+/, $_;
my #splitfilelisttime = split /:/, $filelisttime[3];
my #filelistfile = split /-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
chomp $filelistfile[1];
if ($filelistfile[1] eq $filename){
# my $fileexists = 1;
print "log: "."$hour".":"."$min".":"."$sec"." $filename"."\n";
print "file: "."$fhour".":"."$fmin".":"."$fsec"."\n";
if ($min > $fmin || $hour > $fhour){
# if($min >= $fmin ||$hour >= $fhour){
# if($sec > $fsec){
#system ("rm", "-fv", "/home/desktop"."$filename");
print "success"." $filename";
# }
# }
}
}
}
}

How convert text into XML using perl?

input text file contain the following:
....
ponies B-pro
were I-pro
used I-pro
A O
report O
of O
indirect B-cd
were O
. O
...
output XML file
<sen>
<base id="pro">
<w id="1">ponies</w>
<w id="2">were</w>
<w id="3">were</w>
</base>A report of
<base id="cd">indirect</base> were
</sen>
i want to make an XML file by reading the text file, B- means the begining of my tag and I- means an include words inside the tag while "O" means outside the base tag which means it only exist in the tag.
i try the following codes:
#!/usr/local/bin/perl -w
open(my $f, "input.txt") or die "Can't";
open(my $o, ">output.xml") or die "Can't";
my $c;
sub read_line {
my $fh = shift;
if ($fh and my $line = <$fh>) {
chomp($line);
my #words = split(/\t/, $line);
my $word = $words[0];
my $group = $words[1];
if($word eq "."){
return;
}
else{
if($group ne 'O'){
my #b = split(/\-/, $group);
if($b[0] eq 'B'){
my $e = "<e id=\"";
$e .= " . $b[1] . "\">";
$e .= $word . "</e>";
return $e;
}
if($b[0] eq 'I'){
my $w = "<w id=\"";
$w .= $c . "\">";
$w .= $word . "</w>";
$c++;
return $w;
}
}
else{
$c = 2;
return $word;
}
}
}
return;
}
sub get_text(){
my $txt = "";
my $r = read_line($f);
while($r){
if($r =~ m/[[:punct:]]/){
chop($txt);
$txt .= " " . $r . " ";
}
else{
$txt .= $r . " ";
}
$r = read_line($f);
}
chop($txt);
return "<sen>" . $txt . ".</sen>";
}
instead im getting as output:
<sen>
<base id="pro"> ponies </base>
<w id="2">were</w>
<w id="3">were</w>
A report of
<base id="cd">indirect</base> were
</sen>
i really need help.
Thanks
Writing XML "by hand" will only get you in trouble. Use a module from CPAN.
In your case, I would first put the data in a proper Perl data structure (maybe a hash containing some arrays, or something similar) and then using a module (i.e. XML::Simple for starters) to output to a file.
As Javs said, you want to use a module rather than do this by hand. For your purposes, since you have mixed content, I recommend XML::LibXML. Here is an example I made to test that you can indeed to mixed content like you've got:
use XML::LibXML;
my $doc = XML::LibXML::Document->new();
my $root = $doc->createElement('html');
$doc->setDocumentElement($root);
my $body = $doc->createElement('body');
$root->appendChild($body);
my $link = $doc->createElement('a');
$link->setAttribute('href', 'http://google.com');
$link->appendText('Google');
$body->appendChild($link);
$body->appendText('Inline Text');
print $doc->toString;

Why does perl "hash of lists" do this?

I have a hash of lists that is not getting populated.
I checked that the block at the end that adds to the hash is in fact being called on input. It should either add a singleton list if the key doesn't exist, or else push to the back of the list (referenced under the right key) if it does.
I understand that the GOTO is ugly, but I've commented it out and it has no effect.
The problem is that when printhits is called, nothing is printed, as if there are no values in the hash. I also tried each (%genomehits), no dice.
THANKS!
#!/usr/bin/perl
use strict;
use warnings;
my $len = 11; # resolution of the peaks
#$ARGV[0] is input file
#$ARGV[1] is call number
# optional -s = spread number from call
# optional -o specify output file name
my $usage = "see arguments";
my $input = shift #ARGV or die $usage;
my $call = shift #ARGV or die $usage;
my $therest = join(" ",#ARGV) . " ";
print "the rest".$therest."\n";
my $spread = 1;
my $output = $input . ".out";
if ($therest =~ /-s\s+(\d+)\s/) {$spread = $1;}
if ($therest =~ /-o\s+(.+)\s/) {$output = $1;}
# initialize master hash
my %genomehits = ();
foreach (split ';', $input) {
my $mygenename = "err_naming";
if ($_ =~ /^(.+)-/) {$mygenename = $1;}
open (INPUT, $_);
my #wiggle = <INPUT>;
&singlegene(\%genomehits, \#wiggle, $mygenename);
close (INPUT);
}
&printhits;
#print %genomehits;
sub printhits {
foreach my $key (%genomehits) {
print "key: $key , values: ";
foreach (#{$genomehits{$key}}) {
print $_ . ";";
}
print "\n";
}
}
sub singlegene {
# let %hash be the mapping hash
# let #mygene be the gene to currently process
# let $mygenename be the name of the gene to currently process
my (%hash) = %{$_[0]};
my (#mygene) = #{$_[1]};
my $mygenename = $_[2];
my $chromosome;
my $leftbound = -2;
my $rightbound = -2;
foreach (#mygene) {
#print "Doing line ". $_ . "\n";
if ($_ =~ "track" or $_ =~ "output" or $_ =~ "#") {next;}
if ($_ =~ "Step") {
if ($_ =~ /chrom=(.+)\s/) {$chromosome = $1;}
if ($_ =~ /span=(\d+)/) {$1 == 1 or die ("don't support span not equal to one, see wig spec")};
$leftbound = -2;
$rightbound = -2;
next;
}
my #line = split /\t/, $_;
my $pos = $line[0];
my $val = $line[-1];
# above threshold for a call
if ($val >= $call) {
# start of range
if ($rightbound != ($pos - 1)) {
$leftbound = $pos;
$rightbound = $pos;
}
# middle of range, increment rightbound
else {
$rightbound = $pos;
}
if (\$_ =~ $mygene[-1]) {goto FORTHELASTONE;}
}
# else reinitialize: not a call
else {
FORTHELASTONE:
# typical case, in an ocean of OFFs
if ($rightbound != ($pos-1)) {
$leftbound = $pos;
}
else {
# register the range
my $range = $rightbound - $leftbound;
for ($spread) {
$leftbound -= $len;
$rightbound += $len;
}
#print $range . "\n";
foreach ($leftbound .. $rightbound) {
my $key = "$chromosome:$_";
if (not defined $hash{$key}) {
$hash{$key} = [$mygenename];
}
else { push #{$hash{$key}}, $mygenename; }
}
}
}
}
}
You are passing a reference to %genomehits to the function singlegene, and then copying it into a new hash when you do my (%hash) = %{$_[0]};. You then add values to %hash which goes away at the end of the function.
To fix it, use the reference directly with arrow notation. E.g.
my $hash = $_[0];
...
$hash->{$key} = yadda yadda;
I think it's this line:
my (%hash) = %{$_[0]};
You're passing in a reference, but this statement is making a copy of your hash. All additions you make in singlegene are then lost when you return.
Leave it as a hash reference and it should work.
PS - Data::Dumper is your friend when large data structures are not behaving as expected. I'd sprinkle a few of these in your code...
use Data::Dumper; print Dumper \%genomehash;