What is causing the error with declaring my $value in Perl for loop? - perl

I am writing code to parse sales information for a auto generated excel report. I am able to pull the data from each cell for a row and print it to a text file. The issue begins when I attempt to loop reading rows. The spreadsheet is setup with data on every other cell. I attempted to create a for loop to increment the cells but get an error.
syntax error at C:\taylor\perl\test_parse.pl line 128, near ")
my "
Global symbol "$value" requires explicit package name (did you forget to declare "my $value"?) at C:\taylor\perl\test_parse.pl line 128.
Global symbol "$value" requires explicit package name (did you forget to declare "my $value"?) at C:\taylor\perl\test_parse.pl line 129.
Global symbol "$value" requires explicit package name (did you forget to declare "my $value"?) at C:\taylor\perl\test_parse.pl line 168.
Execution of C:\taylor\perl\test_parse.pl aborted due to compilation errors."
I have lookled into moving the variables out of the scope of the if block but am unsure if this is possible.
#!/usr/bin/perl
use warnings;
use strict;
use Spreadsheet::ParseXLSX;
use Spreadsheet::ParseExcel;
use Spreadsheet::XLSX;
use Date::Format;
my $filename = "c:/taylor/perl/DCS8.xlsx";
my $files = "C:\\Taylor\\perl\\imports.csv";
#Parse excel file
my $parser = Spreadsheet::ParseXLSX->new();
my $workbook = $parser->parse("$filename");
for (my $i=0; $i <= 200; $i+=2) {
my $worksheet1 = $workbook->worksheet('Sheet1');
my $a = $worksheet1->get_cell(15+$i,0);
my $d = $worksheet1->get_cell(15+$i,3);
my $f = $worksheet1->get_cell(15+$i,5);
my $h = $worksheet1->get_cell(15+$i,7);
my $j = $worksheet1->get_cell(15+$i,9);
my $l = $worksheet1->get_cell(15+$i,11);
my $n = $worksheet1->get_cell(15+$i,13);
my $p = $worksheet1->get_cell(15+$i,15);
my $r = $worksheet1->get_cell(15+$i,17);
my $t = $worksheet1->get_cell(15+$i,19);
my $v = $worksheet1->get_cell(15+$i,21);
my $x = $worksheet1->get_cell(15+$i,23);
my $z = $worksheet1->get_cell(15+$i,25);
my $ab = $worksheet1->get_cell(15+$i,27);
my $af = $worksheet1->get_cell(15+$i,29);
my $ah = $worksheet1->get_cell(15+$i,31);
my $aj = $worksheet1->get_cell(15+$i,33);
my $ao = $worksheet1->get_cell(15+$i,35);
if (( defined $a and $a->value() ne "")
or ( defined $d and $d->value() ne "")
or ( defined $f and $f->value() ne "")
or ( defined $h and $h->value() ne "")
or ( defined $j and $j->value() ne "")
or ( defined $l and $l->value() ne "")
or ( defined $n and $n->value() ne "")
or ( defined $p and $p->value() ne "")
or ( defined $r and $r->value() ne "")
or ( defined $t and $t->value() ne "")
or ( defined $v and $v->value() ne "")
or ( defined $x and $x->value() ne "")
or ( defined $z and $z->value() ne "")
or ( defined $ab and $ab->value() ne "")
or ( defined $af and $af->value() ne "")
or ( defined $ah and $ah->value() ne "")
or ( defined $aj and $aj->value() ne "")
or ( defined $ao and $ao->value() ne ""))
my $value = $a->value();
$value =~ s/[_, -]//g;
my $value2 = $d->value();
$value2 =~ s/[_, -]//g;
my $value3 = $f->value();
$value3 =~ s/[_, -]//g;
my $value4 = $h->value();
$value4 =~ s/[_, -]//g;
my $value5 = $j->value();
$value5 =~ s/[_, -]//g;
my $value6 = $l->value();
$value6 =~ s/[_, -]//g;
my $value7 = $n->value();
$value7 =~ s/[^a-zA-Z0-9,]//g;
my $value8 = $p->value();
$value8 =~ s/[_, -]//g;
my $value9 = $r->value();
$value9 =~ s/[_, -]//g;
my $value10 = $t->value();
$value10 =~ s/[_, -]//g;
my $value11 = $v->value();
$value11 =~ s/[_, -]//g;
my $value12 = $x->value();
$value12 =~ s/[_, -]//g;
my $value13 = $z->value();
$value13 =~ s/[_, -]//g;
my $value14 = $ab->value();
$value14 =~ s/[_, -]//g;
my $value15 = $af->value();
$value15 =~ s/[_, -]//g;
my $value16 = $ah->value();
$value16 =~ s/[^a-zA-Z0-9,]//g;
my $value17 = $aj->value();
$value17 =~ s/[^a-zA-Z0-9,]//g;
my $value18 = $ao->value();
$value18 =~ s/[^a-zA-Z0-9,]//g;
my $files = "C:\\Taylor\\perl\\imports.csv";
unlink ($files);
open (OUTFILE, ">>$files");
print OUTFILE "$value,$value2,$value3,$value4,$value5,$value6,$value7,$value8,$value9,$value10,$value11,$value12,$value13,$value14,$value15,$value16,$value17,$value18\n";
}
Ideally this would loop through the get cells adding 2 rows each time, instead it seems when the loop begins it breaks.

The solution is already given in the comments, but it might help to make it clearer for anyone else who comes across this problem in the future.
The code contains an if statement that was written like this (simplified massively):
if ($some_condition)
# do something
But in Perl, the code attached to an if statement needs to be in a code block. So it should be written like this:
if ($some_condition) {
# do something
}

Related

Get value from next N rows of a file

I'm having problems intercepting the contents of the lines above what I'm reading $lines[0] as following foreach loop
my $IN_DIR = "/tmp/appo/log"; # Input Directories
my $jumprow = '<number of row to skip>'; # This is a value
foreach my $INPUT ( glob( "$IN_DIR/logrotate_*.log" ) ) {
open( my $fh, '<', $INPUT ) or die $!;
while ( <$fh> ) {
next unless $. > $jumprow;
my #lines = split /\n/;
my $i = 0;
foreach my $lines ( #lines ) {
if ( $lines[$i] =~ m/\A#\d.\d.+#\d{4}\s\d{2}\s\d{2}\s\d{2}:\d{2}:\d{2}:\d{3}#\+\d+#\w+#\/\w+\/\w+\/Authentication/ ) {
# Shows only LOGIN/LOGOUT access type and exclude GUEST users
if ( $lines[ $i + 2 ] =~ m/Login/ || $lines[ $i + 2 ] =~ m/Logout/ && $lines[ $i + 3 ] !~ m/Guest/ ) {
my ( $y, $m, $d, $time ) = $lines[$i] =~ /\A#\d.\d.+#(\d{4})\s(\d{2})\s(\d{2})\s(\d{2}:\d{2}:\d{2}:\d{3})/;
my ( $action ) = $lines[ $i + 2 ] =~ /\A(\w+)/;
my ( $user ) = $lines[ $i + 3 ] =~ /\w+:\s(.+)/;
print "$y/$m/$d;$time;$action;$user\n";
}
}
else {
next; # Is this next technically necessary according to you?
}
$i++;
}
}
close( $fh );
}
The Tie::File
module could help me
my $IN_DIR = "/tmp/appo/log"; # Input Directories
my $jumprow = '<number of row to skip>'; # This is a value
foreach my $INPUT ( glob( "$IN_DIR/logrotate_*.log" ) ) {
tie #lines, 'Tie::File', $INPUT, mode => O_RDONLY;
or die $!;
my $i = $.;
next unless $i > $jumprow;
foreach my $lines ( #lines ) {
if ( $lines[$i] =~ m/\A#\d.\d.+#\d{4}\s\d{2}\s\d{2}\s\d{2}:\d{2}:\d{2}:\d{3}#\+\d+#\w+#\/\w+\/\w+\/Authentication/ ) {
# Shows only LOGIN/LOGOUT access type and exclude GUEST users
if ( $lines[ $i + 2 ] =~ m/Login/ || $lines[ $i + 2 ] =~ m/Logout/ && $lines[ $i + 3 ] !~ m/Guest/ ) {
my ( $y, $m, $d, $time ) = $lines[$i] =~ /\A#\d.\d.+#(\d{4})\s(\d{2})\s(\d{2})\s(\d{2}:\d{2}:\d{2}:\d{3})/;
my ( $action ) = $lines[ $i + 2 ] =~ /\A(\w+)/;
my ( $user ) = $lines[ $i + 3 ] =~ /\w+:\s(.+)/;
print "$y/$m/$d;$time;$action;$user\n";
}
}
else {
next; # Is this next technically necessary according to you?
}
$i++;
}
}
Could you tell me if my declaration with Tie::File is correct or not?
This is only a part of my master script as indicated in following guide mcve
Actually without tie, my master scripts works only with $lines[0], it doesn't take value from $lines[$i+2] or $lines[$i+3]
It looks like you're getting very lost here. I've written a working program that processes the data you showed in your previous question; it should at least form a stable basis for you to continue your work. I think it's fairly straightforward, but ask if there's anything that's not obvious in the Perl documentation
use strict;
use warnings 'all';
use feature 'say';
use autodie; # Handle IO failures automatically
use constant IN_DIR => '/tmp/appo/log';
chdir IN_DIR; # Change to input directory
# Status handled by autodie
for my $file ( glob 'logrotate_*.log' ) {
say $file;
say '-' x length $file;
say "";
open my $fh, '<', $file; # Status handled by autodie
local $/ = ""; # Enable block mode
while ( <$fh> ) {
my #lines = split /\n/;
next unless $lines[0] =~ /
^
\# \d.\d .+?
\# (\d\d\d\d) \s (\d\d) \s (\d\d)
\s
( \d\d : \d\d : \d\d : \d\d\d )
/x;
my ( $y, $m, $d, $time ) = ($1, $2, $3, $4);
$time =~ s/.*\K:/./; # Change decimal point to dot for seconds
next unless $lines[2] =~ /^(Log(?:in|out))/;
my $action = $1;
next unless $lines[3] =~ /^User:\s+(.*\S)/ and $1 ne 'Guest';
my $user = $1;
print "$y/$m/$d;$time;$action;$user\n";
}
say "";
}
output
logrotate_0.0.log
-----------------
2018/05/24;11:05:04.011;Login;USER4
2018/05/24;11:04:59.410;Login;USER4
2018/05/24;11:05:07.100;Logout;USER3
2018/05/24;11:07:21.314;Login;USER2
2018/05/24;11:07:21.314;Login;USER2
2018/05/26;10:48:02.458;Logout;USER2
2018/05/28;10:00:25.000;Logout;USER0
logrotate_1.0.log
-----------------
2018/05/29;10:09:45.969;Login;USER4
2018/05/29;11:51:06.541;Login;USER1
2018/05/30;11:54:03.906;Login;USER4
2018/05/30;11:59:59.156;Logout;USER3
2018/05/30;08:32:11.348;Login;USER4
2018/05/30;11:09:54.978;Login;USER2
2018/06/01;08:11:30.008;Logout;USER2
2018/06/01;11:11:29.658;Logout;USER1
2018/06/02;12:05:00.465;Logout;USER9
2018/06/02;12:50:00.065;Login;USER9
2018/05/24;10:43:38.683;Login;USER1

perl replace characters in a string but retain special character or space

I would like to create a program that replaces characters and retains the special characters. An example input and output is shown below.
Here's what I did so far:
$sentence = userinput;
#words = split(/ /, $sentence);
for ($i = 0; $i < #words.length; $i ++){
$words[$i] =~ s/\W//g;
#characters = split(//, $words[$i]);
#print $words[$i] . "\n";
$wordlength = length($words[$i]);
for ($j = 0; $j < #characters.length; $j ++){
$char = $characters[$j];
for ($x = 0; $x < $wordlength; $x++){
$char++;
if ($char eq "aa"){
$char = "a";
}
elsif ($char eq "AA"){
$char = "A";
}
}
print $char;
if ($x = 0){
$output[$i] = $char;
}
else {
$output[$i] = join ($char);
}
}
print $output[$i];
}
Input:
Hi! how are you doing?
Output:
Jk! krz duh brx itnsl?
A couple of things in your code don't make sense:
Missing use strict; use warnings;.
All variables are global (you should be using my to create variables)
#foo.length is not the number of elements in the array #foo. It's the number of elements in the array #foo concatenated with the number of characters in $_ (because arrays in scalar context return their length, . concatenates strings, and length works on $_ by default).
join ($char) always returns the empty string: You're joining an empty list (no elements) using $char as a separator.
Here's an attempt to fix all of these issues:
use strict;
use warnings;
my $sentence = readline;
$sentence =~ s{([A-Za-z]+)}{
my $word = $1;
join '', map {
my $base = ord(/^[A-Z]/ ? 'A' : 'a');
chr((ord($_) - $base + length($word)) % 26 + $base)
} split //, $word
}eg;
print $sentence;
I think what you are doing is rot3 encoding, but if so then your example is wrong
my $sentence = 'Hi! how are you doing?';
$sentence =~ tr/A-Za-z/D-ZA-Cd-za-c/;
print $sentence, "\n";
output
Kl! krz duh brx grlqj?
which is similar, but not identical to
Jk! krz duh brx itnsl?

stockholm to fasta format - include accession id in every header

Hello I've multiple sequences in stockholm format, at the top of every alignment there is a accession ID, for ex: '#=GF AC PF00406' and '//' --> this is the end of the alignment. When I'm converting the stockholm format to fasta format I need PF00406 in the header of every sequence of the particular alignment. Some times there will be multiple stockholm alignments in one file. I tried to modify the following perl script, it gave me bizarre results, any help will be greatly appreciated.
my $columns = 60;
my $gapped = 0;
my $progname = $0;
$progname =~ s/^.*?([^\/]+)$/$1/;
my $usage = "Usage: $progname [<Stockholm file(s)>]\n";
$usage .= " [-h] print this help message\n";
$usage .= " [-g] write gapped FASTA output\n";
$usage .= " [-s] sort sequences by name\n";
$usage .= " [-c <cols>] number of columns for FASTA output (default is $columns)\n";
# parse cmd-line opts
my #argv;
while (#ARGV) {
my $arg = shift;
if ($arg eq "-h") {
die $usage;
} elsif ($arg eq "-g") {
$gapped = 1;
} elsif ($arg eq "-s"){
$sorted = 1;
} elsif ($arg eq "-c") {
defined ($columns = shift) or die $usage;
} else {
push #argv, $arg;
}
}
#ARGV = #argv;
my %seq;
while (<>) {
next unless /\S/;
next if /^\s*\#/;
if (/^\s*\/\//) { printseq() }
else {
chomp;
my ($name, $seq) = split;
#seq =~ s/[\.\-]//g unless $gapped;
$seq{$name} .= $seq;
}
}
printseq();
sub printseq {
if($sorted){
foreach $key (sort keys %seq){
print ">$key\n";
for (my $i = 0; $i < length $seq{$key}; $i += $columns){
print substr($seq{$key}, $i, $columns), "\n";
}
}
} else{
while (my ($name, $seq) = each %seq) {
print ">$name\n";
for (my $i = 0; $i < length $seq; $i += $columns) {
print substr ($seq, $i, $columns), "\n";
}
}
}
%seq = ();
}
Depending on the how much variation there is in the line with the accessionID, you might need to modify the regex, but this works for your example file
my %seq;
my $aln;
while (<>) {
if ($_ =~ /#=GF AC (\w+)/) {
$aln = $1;
}
elsif ($_ =~ /^\s*\/\/\s*$/){
$aln = '';
}
next unless /\S/;
next if /^\s*\#/;
if (/^\s*\/\//) { printseq() }
else {
chomp;
my ($name, $seq) = split;
$name = $name . ' ' . $aln;
$seq{$name} .= $seq;
}
}
printseq();

Pattern Matching in perl

I want to parse some information from the file.
Information in the file:
Rita_bike_house_Sha9
Rita_bike_house
I want to have output like dis
$a = Rita_bike_house and $b = Sha9,
$a = Rita_bike_house and $b = "original"
In order to get that I have used the below code:
$name = #_; # This #_ has all the information from the file that I have shown above.
#For matching pattern Rita_bike_house_Sha9
($a, $b) = $name =~ /\w\d+/;
if ($a ne "" and $b ne "" ) { return ($a,$b) }
# this statement doesnot work at all as its first condition
# before the end is not satisified.
Is there any way where I can store "Rita_bike_house" in $a and "Sha9" in $b? I think my regexp is missing with something. Can you suggest anything?
Please don't use the variables $a and $b in your code. There are used by sort and will confuse you.
Try:
while( my $line = <DATA> ){
chomp $line;
if( $line =~ m{ \A ( \w+ ) _ ( [^_]* \d [^_]* ) \z }msx ){
my $first = $1;
my $second = $2;
print "\$a = $first and \$b = $second\n";
}else{
print "\$a = $line and \$b = \"original\"\n";
}
}
__DATA__
Rita_bike_house_Sha9
Rita_bike_house
Not very nice, but the next:
use strict;
use warnings;
while(<DATA>) {
chomp;
next if /^\s*$/;
my #parts = split(/_/);
my $b = pop #parts if $parts[$#parts] =~ /\d/;
$b //= '"original"';
my $a = join('_', #parts);
print "\$a = $a and \$b = $b,\n";
}
__DATA__
Rita_bike_house_Sha9
Rita_bike_house
prints:
$a = Rita_bike_house and $b = Sha9,
$a = Rita_bike_house and $b = "original",
If you are sure that the pattern which is required will always be similar to 'Sha9' and also it will appear at the end then just do a greedy matching....
open FILE, "filename.txt" or die $!;
my #data = <FILE>;
close(<FILE>);
#my $line = "Rita_bike_house_Sha9";
foreach $line (#data)
{
chomp($line);
if ($line =~ m/(.*?)(_([a-zA-Z]+[0-9]+))?$/)
{
$a = $1;
$b = $3 ? $3 : "original";
}
}

Why does perl "hash of lists" do this?

I have a hash of lists that is not getting populated.
I checked that the block at the end that adds to the hash is in fact being called on input. It should either add a singleton list if the key doesn't exist, or else push to the back of the list (referenced under the right key) if it does.
I understand that the GOTO is ugly, but I've commented it out and it has no effect.
The problem is that when printhits is called, nothing is printed, as if there are no values in the hash. I also tried each (%genomehits), no dice.
THANKS!
#!/usr/bin/perl
use strict;
use warnings;
my $len = 11; # resolution of the peaks
#$ARGV[0] is input file
#$ARGV[1] is call number
# optional -s = spread number from call
# optional -o specify output file name
my $usage = "see arguments";
my $input = shift #ARGV or die $usage;
my $call = shift #ARGV or die $usage;
my $therest = join(" ",#ARGV) . " ";
print "the rest".$therest."\n";
my $spread = 1;
my $output = $input . ".out";
if ($therest =~ /-s\s+(\d+)\s/) {$spread = $1;}
if ($therest =~ /-o\s+(.+)\s/) {$output = $1;}
# initialize master hash
my %genomehits = ();
foreach (split ';', $input) {
my $mygenename = "err_naming";
if ($_ =~ /^(.+)-/) {$mygenename = $1;}
open (INPUT, $_);
my #wiggle = <INPUT>;
&singlegene(\%genomehits, \#wiggle, $mygenename);
close (INPUT);
}
&printhits;
#print %genomehits;
sub printhits {
foreach my $key (%genomehits) {
print "key: $key , values: ";
foreach (#{$genomehits{$key}}) {
print $_ . ";";
}
print "\n";
}
}
sub singlegene {
# let %hash be the mapping hash
# let #mygene be the gene to currently process
# let $mygenename be the name of the gene to currently process
my (%hash) = %{$_[0]};
my (#mygene) = #{$_[1]};
my $mygenename = $_[2];
my $chromosome;
my $leftbound = -2;
my $rightbound = -2;
foreach (#mygene) {
#print "Doing line ". $_ . "\n";
if ($_ =~ "track" or $_ =~ "output" or $_ =~ "#") {next;}
if ($_ =~ "Step") {
if ($_ =~ /chrom=(.+)\s/) {$chromosome = $1;}
if ($_ =~ /span=(\d+)/) {$1 == 1 or die ("don't support span not equal to one, see wig spec")};
$leftbound = -2;
$rightbound = -2;
next;
}
my #line = split /\t/, $_;
my $pos = $line[0];
my $val = $line[-1];
# above threshold for a call
if ($val >= $call) {
# start of range
if ($rightbound != ($pos - 1)) {
$leftbound = $pos;
$rightbound = $pos;
}
# middle of range, increment rightbound
else {
$rightbound = $pos;
}
if (\$_ =~ $mygene[-1]) {goto FORTHELASTONE;}
}
# else reinitialize: not a call
else {
FORTHELASTONE:
# typical case, in an ocean of OFFs
if ($rightbound != ($pos-1)) {
$leftbound = $pos;
}
else {
# register the range
my $range = $rightbound - $leftbound;
for ($spread) {
$leftbound -= $len;
$rightbound += $len;
}
#print $range . "\n";
foreach ($leftbound .. $rightbound) {
my $key = "$chromosome:$_";
if (not defined $hash{$key}) {
$hash{$key} = [$mygenename];
}
else { push #{$hash{$key}}, $mygenename; }
}
}
}
}
}
You are passing a reference to %genomehits to the function singlegene, and then copying it into a new hash when you do my (%hash) = %{$_[0]};. You then add values to %hash which goes away at the end of the function.
To fix it, use the reference directly with arrow notation. E.g.
my $hash = $_[0];
...
$hash->{$key} = yadda yadda;
I think it's this line:
my (%hash) = %{$_[0]};
You're passing in a reference, but this statement is making a copy of your hash. All additions you make in singlegene are then lost when you return.
Leave it as a hash reference and it should work.
PS - Data::Dumper is your friend when large data structures are not behaving as expected. I'd sprinkle a few of these in your code...
use Data::Dumper; print Dumper \%genomehash;