I have an ftp upload from my client machine to my server running consistently as a means of backup, occasionally if the connection becomes corrupt the upload will stall, the solution to this is to remove the "corrupt file" from the server, then the client resumes and the file is uploaded next time the client runs. This script is to remove the file if it has never occured before, or check the time stamp if it has been deleted in the past and check that this is a new occurence. Then delete if required.
the line in the logfile will be like:
Sun May 11 02:38:46 2010 [pid 17116] [ftp] FAIL UPLOAD: Client "192.168.179.58", "/Dan/Example.file", 0.00Kbyte/sec
and once written to the filelist it looks like this:
Sun May 11 02:38:46 - /Dan/Example.file
Below you can see where the scope problem lies within the read_filelist() sub-routine.
Please see the solution so far:
#!/usr/bin/perl
use warnings;
use strict;
use Data::Dumper qw(Dumper);
#open /var/log/vsftpd.log read only, and /var/log/vsftpd.log R/W + append
open my $logfile, '<', '/var/log/vsftpd.log' # 3 arg open is safer
or die "could not open file: $!"; # checking for errors is good
open my $filelist, '+<', '/scripts/filelist'
or die "could not open file: $!";
my #rid;
my #filename;
my #deletedfile;
my $int = -1;
my #time;
my #hourcompare;
my #splittime;
my #filelisttime;
my #splitfilelisttime;
my #filelistfile;
my #filelistarray;
my $fileexists = 0;
#Define read_filelist()
sub read_filelist{
my ($filename, $hour, $min, $sec, $filelist) = #_;
while (<$filelist>){
#filelisttime = split /\s+/, $_;
#splitfilelisttime = split /:/, $filelisttime[3];
#filelistfile = split /\s+-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
if ($filelistfile[2] eq $filename){
my $fileexists = 1;
if ($hour >= $fhour){
if($min >= $fmin){
if($sec > $fsec){
system ("rm", "-fv", "/home/desktop"."$filename");
}
}
}
}
}
}
#open vsftp log and look for lines that include "FAIL UPLOAD" print those lines to a file
while (<$logfile>) {
$int = $int + 1;
if (index($_, "FAIL UPLOAD:") != -1){
#rid = split /\s+"/, $_;
#filename = split /",/, $rid[2];
#time = split /\s+201/, $rid[0];
}
$deletedfile[$int] = $filename[0];
if ($filename[0] ne $deletedfile[$int-1]){
print $filelist $time[0]." - ".$filename[0]."\n";
}
#convert the timestamp into integers for comparison
#hourcompare = split /\s+/, $time[0];
#splittime = split /:/, $hourcompare[3];
my $sec = $splittime[2]+0;
my $min = $splittime[1]+0;
my $hour = $splittime[0]+0;
#itterate through '/scripts/filelist'
read_filelist($filename[0], $hour, $min, $sec, $filelist);
if ($fileexists = 0){
system ("rm", "-fv", "/home/desktop"."$filename[0]");
}
}
close $filelist;
close $logfile;
the variables pass to the read_filelist() sub no problem, but when I start the while() loop all passed variables become uninitialized:
sub read_filelist {
my ($filename, $hour, $min, $sec, $filelist) = #_;
while (<$filelist>) {
#filelisttime = split /\s+/, $_;
#splitfilelisttime = split /:/, $filelisttime[3];
#filelistfile = split /\s+-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
if ($filelistfile[2] eq $filename) {
my $fileexists = "T";
if ($hour >= $fhour) {
if($min >= $fmin) {
if($sec > $fsec) {
system ("rm", "-fv", "/home/desktop"."$filename");
}
}
}
}
print "log: " . "$hour" . ":" . "$min" . ":" . "$sec" . "\n";
print "file: " . "$fhour" . ":" . "$fmin" . ":" . "$fsec" . "\n";
print "$filename" . "\n";
}
}
read_filelist($filename[0], $hour, $min, $sec, $filelist);
This returns the following:
Use of uninitialized value in string eq at removefailed.pl line 39, <$filelist> line 1.
Use of uninitialized value $filename in string eq at removefailed.pl line 39, <$filelist> line 1.
log: 0:0:0
file: 2:38:46
Use of uninitialized value $filename in string at removefailed.pl line 52, <$filelist> line 1.
However if I move the prints outside of the while loop it works, but obviously I can only compare them with the last line of the filelist.
sub read_filelist {
my ($filename, $hour, $min, $sec, $filelist) = #_;
print "log: " . "$hour" . ":" . "$min" . ":" . "$sec" . "\n";
while (<$filelist>) {
#filelisttime = split /\s+/, $_;
#splitfilelisttime = split /:/, $filelisttime[3];
#filelistfile = split /\s+-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
if ($filelistfile[2] eq $filename) {
my $fileexists = "T";
if ($hour >= $fhour) {
if($min >= $fmin) {
if($sec > $fsec) {
system ("rm", "-fv", "/home/desktop"."$filename");
}
}
}
}
print "file: " . "$fhour" . ":" . "$fmin" . ":" . "$fsec" . "\n";
}
print "$filename" . "\n";
}
read_filelist($filename[0], $hour, $min, $sec, $filelist);
I get the following output:
file: 2:38:46
log: 2:38:46
/Dan/Example.file
Any help with this would be much appreciated, please let me know if you need any further information?
I have solved this problem using Hash's. I think this was caused because the filelist was already open in the logfile read.
Anyhow I created a global Hash:
my %logfilelines;
passed all the assorted relevant lines to it from the logfile:
$logfilelines{$filename[0].":".$hour.":".$min.":".$sec}++
Then within the read_file() sub I run through %logfilelines; and compare the filename\ time etc. I will have to rebuild the time comparison as it is wrong but atleast I am making progress now. see the new subroutine below in case you are curious:
sub read_filelist{
#my ($filename, $hour, $min, $sec, $filelist) = #_;
my $fint = -1;
my #filelines;
my #filelistlines;
foreach my $line (keys %logfilelines) {
open my $filelist2, '<', 'c:\scripts\filelist'
or die "could not open file: $!";
$fint = $fint + 1;
$filelines[$fint] = $line;
#filelistlines = split /:/, $filelines[$fint];
my $filename = $filelistlines[0];
my $hour = $filelistlines[1]+0;
my $min = $filelistlines[2]+0;
my $sec = $filelistlines[3]+0;
while (<$filelist2>){
my #filelisttime = split /\s+/, $_;
my #splitfilelisttime = split /:/, $filelisttime[3];
my #filelistfile = split /-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
chomp $filelistfile[1];
if ($filelistfile[1] eq $filename){
# my $fileexists = 1;
print "log: "."$hour".":"."$min".":"."$sec"." $filename"."\n";
print "file: "."$fhour".":"."$fmin".":"."$fsec"."\n";
if ($min > $fmin || $hour > $fhour){
# if($min >= $fmin ||$hour >= $fhour){
# if($sec > $fsec){
#system ("rm", "-fv", "/home/desktop"."$filename");
print "success"." $filename";
# }
# }
}
}
}
}
Related
i have two files . one is user's input file and another file is original config file. After comparing two files , do add/delete functions in my original config file.
user's input file: (showing line by line)
add:L28A:Z:W #add--> DID ID --> Bin ID
del:L28C:B:Q:X:
rpl:L38A:B:M:D:
original input file
L28A:B:Q:M:X:
L28C:B:Q:M:X:
L38A:B:Q:M:X:
based on user's input file , first is doing add function second is delete function and third is replace function.
so output for original input txt file should show:
L28A:B:Q:M:X:Z:W
L28C:M:
L38A:B:M:D:
but my code is showing :
L28A:B:Q:M:X:
L28C:B:Q:M:X:
L38A:B:Q:M:X:
L28A:B:Q:M:X:Z:W
L28C:M:
L38A:B:M:D:
how can i replace above three lines with new modify lines?
use strict;
use warnings;
use File::Copy;
use vars qw($requestfile $requestcnt $configfile $config2cnt $my3file $myfile3cnt $new_file $new_filecnt #output);
my $requestfile = "DID1.txt"; #user's input file
my $configfile = "DID.txt"; #original config file
my $new_file = "newDID.txt";
readFileinString($requestfile, \$requestcnt);
readFileinString($configfile, \$config2cnt);
copy($configfile, $new_file) or die "The copy operation failed: $!";
while ($requestcnt =~ m/^((\w){3})\:([^\n]+)$/mig) #Each line from user request
{
my $action = $1;
my $requestFullLine = $3;
while ($requestFullLine =~ m/^((\w){4})\:([^\n]+)$/mig) #Each line from user request
{
my $DID = $1; #DID
my $requestBinList = $3; #Bin List in user request
#my #First_values = split /\:/, $requestBinList;
if ($config2cnt =~ m/^$DID\:([^\n]+)$/m) #configfile
{
my $ConfigFullLine = $1; #Bin list in config
my $testfile = $1;
my #First_values = split /\:/, $ConfigFullLine;
my #second_values = split /\:/, $requestBinList;
foreach my $sngletter(#second_values) # Each line from user request
{
if( grep {$_ eq "$sngletter"} #First_values)
{
print " $DID - $sngletter - Existing bin..\n\n";
}
else
{
print "$DID - $sngletter - Not existing bin..\n\n";
}
}
print "Choose option 1.Yes 2.No\n";
my $option = <STDIN>;
if ($option == 1) {
open(DES,'>>',$configfile) or die $!;
if($action eq 'add')
{
$ConfigFullLine =~ s/$/$requestBinList/g;
my $add = "$DID:$ConfigFullLine";
print DES "$add\n" ;
print"New Added Bin Valu $add\n\n";
}
if ( $action eq 'del')
{
foreach my $sngletter(#second_values){
$ConfigFullLine =~ s/$sngletter://g;
}
print DES "$DID:$ConfigFullLine\n";
print "New Deleted Bin Value $DID:$ConfigFullLine\n\n";
}
if ( $action eq 'rpl')
{
my $ConfigFullLine = $requestBinList;
my $replace = "$DID:$ConfigFullLine";
print DES "$replace\n";
print"Replace Bin Value $replace\n\n";
}
}
elsif ($option == 2)
{
print"Start from begining\n";
}
else
{
print "user chose invalid process or input is wrong\n";
}
}
else
{
print "New DID $DID detected\n";}
}
}
sub readFileinString
{
my $File = shift;
my $string = shift;
use File::Basename;
my $filenames = basename($File);
open(FILE1, "<$File") or die "\nFailed Reading File: [$File]\n\tReason: $!";
read(FILE1, $$string, -s $File, 0);
close(FILE1);
}
The problem is here:
open(DES,'>>',$configfile) or die $!;
You open your file for appending. So you get the original data, followed by your edited data.
Update: It appears that you have a working solution now, but I thought it might be interesting to show you how I would write this.
This program is a Unix filter. That is, it reads from STDIN and writes to STDOUT. I find that far more flexible than hard-coded filenames. You also don't have to explicitly open files - which saves time :-)
It also takes a command-line option, -c, telling it which file contains the edit definitions. So it is called like this (assuming we've called the program edit_files:
$ edit_files -c edit_definitions.txt < your_input_file > your_output_file
And here's the code.
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
my %opts;
getopts('e:', \%opts);
my %edits = read_edits($opts{e});
while (<>) {
chomp;
my ($key, $val) = split /:/, $_, 2; #/ stop faulty syntax highlight
if (!exists $edits{$key}) {
print "$_\n";
next;
}
my $edit = $edits{$key};
if ($edit->[0] eq 'add') {
print "$_$edit->[1]\n";
} elsif ($edit->[0] eq 'del') {
$val =~ s/$_:// for split /:/, $edit->[1]; #/
print "$key:$val\n";
} elsif ($edit->[0] eq 'rpl') {
print "$key:$edit->[1]\n";
} else {
warn "$edit->[0] is an invalid edit type\n";
next;
}
}
sub read_edits {
my $file = shift;
open my $edit_fh, '<', $file or die $!;
my %edits;
while (<$edit_fh>) {
chomp;
# Remove comments
s/\s*#.*//; #/
my ($type, $key, $val) = split /:/, $_, 3; #/
$edits{$key} = [ $type, $val ];
}
}
Code runs sometimes, sometimes gives error on linux host.
Need to check why has is not printing,
Error, messages: Use of uninitialized value in sprintf at ./fa_list.pl line 139, line
Can someone check, why I'm getting error?
use Getopt::Long;
my $sid = '9999';
my $Fa_VSan_Map = 'Fa_VSan_Map';
sub usage {
my $message = $_[0];
if (defined $message && length $message) {
$message .= "\n"
unless $message =~ /\n$/;
}
my $command = $0;
$command =~ s#^.*/##;
print STDERR (
$message,
"usage: $command -sid xxx -outf FA_Mapping\n" .
"Where -sid: is primary SID to show mappings.\n" .
" -outf: Output File prefix.\n" .
" -Reserved...\n"
);
die("\n")
}
GetOptions( 'sid=i' => \$sid, 'outf=s' => \$Fa_VSan_Map) or
usage("Invalid commmand line options.");
print($sid);
my $outf = "$Fa_VSan_Map$sid.csv";
my $outf1 = "Fa_VSan_Map1$sid.csv";
my ($mydir,$dir_port,$dir_port_wwpn,$FaWWPN);
my (%FA,%FAH,%FAC,%VSAN);
my ($wwpn,$host,$port,$fcid,$logged,$fab);
# 50:00:09:72:08:4b:05:89, => cdc02-core1-1.yyyyy.xxxx.com,CISCO,fc3/12,VS251,50:00:09:72:08:4b:05:89,,8,Active
# cdc02-core-1-2.yyyyy.xxxx.com,CISCO,fc1/29,VS251,50:00:09:73:00:1c:e1:1c,,8,Active
sub LoadVSAN()
{
my $vsanf = "VSAN$sid.csv";
print ($vsanf);
open (VSAN, "<", $vsanf) or die "Could not open $!";
while (<VSAN>) {
if (/Active/) {
my #array = split /,/;
print (#array);
my $key = $array[4];
$key =~ s/://g;
my #line_arranged = ($array[3],$array[2],$array[0],$array[6],$array[7]);
$VSAN{$key} = \#line_arranged;
print($key, ": ", #{$VSAN{$key}}, "\n");
}
}
close VSAN;
}
LoadVSAN;
# foreach my $key (%VSAN) {
# print(${VSAN{$key}}[0]); print("\n");
# ${$VSAN{$FaWwpn}}[0]
# }
open (OUT, ">", $outf) or die "Could not open $outf $!";
open( OUT1, ">",$outf1) or die "Could not open $outf1 $!";
my $sidtxt = "sidcfg.fa$sid.txt";
my $cmd = 'symcfg -sid ' . $sid . ' list -fa all -v > ' . $sidtxt;
system($cmd);
open ( SYM, "<" , $sidtxt ) or die "Could not open $sidtxt $!";
while ( <SYM>) {
chomp ;
if (/Director Identification:/) {
$mydir = $_;
$mydir =~ s/\s+Director Identification: //;
$mydir =~ s/FA-//;
}
elsif (/Director Port:/) {
$port = $_;
$port =~ s/\s+Director Port: //;
$dir_port = sprintf '%04d_%03s_%03d', int($sid), $mydir, int($port);
}
elsif (/WWN Port Name/) {
$wwpn = $_;
$wwpn =~ s/\s+WWN Port Name\s+: //;
$dir_port_wwpn = sprintf '%s,%s', $dir_port, $wwpn;
$FA{$dir_port} = $wwpn;
}
}
close(SYM);
$sidtxt = 'symaccess.ll.' . $sid . '.txt';
$cmd = 'symaccess -sid ' . $sid . ' list logins > ' . $sidtxt;
#print($cmd);
system($cmd );
open ( SYM, "<" , $sidtxt ) or die "Could not open $sidtxt $!";
while ( <SYM>) {
chomp ;
if (/Director Identification/) {
$mydir = $_;
$mydir =~ s/Director Identification\s+:\s+//;
$mydir =~ s/FA-//;
}
elsif (/Director Port/) {
$port = $_;
$port =~ s/Director Port\s+:\s+//;
$dir_port = sprintf '%04d_%03s_%03d', int($sid),$mydir, int($port);
}
elsif (/Fibre/) {
($wwpn,undef, $host,$port,$fcid,$logged,$fab) = split;
my $host_port;
if( lc($host) eq 'null') {
$host_port = substr($wwpn,10,6);
}
else {
$host_port = $host . '_' . $port . '_' . substr($wwpn,12,4);
}
if (exists $FAH{$dir_port}) {
$FAH{$dir_port} .= ':' . $host_port;
$FAC{$dir_port} += 1;
} else {
$FAH{$dir_port} = $host_port;
$FAC{$dir_port} = 1;
}
if ( $logged eq "Yes") {
my $line = sprintf ( '%s,%s,%s,%s', $dir_port, $FA{$dir_port}, $host_port, $fcid);
print (OUT1 $line . "\n");
}
}
}
print OUT "Fa,FaWWPN,VSan,HostCount,PERCENT_BUSY,HostNames\n";
my $PERCENT_BUSY=10.0;
foreach my $fa ( keys %FAC) {
my $formula = '=VLOOKUP(B2,Sheet1!A$2:F$600,6,FALSE)';
my $FaWwpn = lc($FA{$fa});
#print($FaWwpn . ": " . $VSAN{$FaWwpn}->[0] . "\n" );
## Below is line 139
my $line = sprintf ('%s,%s,%s,%s,%3.2f,%s', $fa, $FaWwpn, ${$VSAN{$FaWwpn}}[0], $FAC{$fa}, $PERCENT_BUSY, lc($FAH{$fa}));
print OUT $line . "\n";
#print $line . "\n";
}
close(SYM);
I believe there is problem with lc($FAH{$fa}).
have you checked you initialized $FAH in your code ?
Hello I've multiple sequences in stockholm format, at the top of every alignment there is a accession ID, for ex: '#=GF AC PF00406' and '//' --> this is the end of the alignment. When I'm converting the stockholm format to fasta format I need PF00406 in the header of every sequence of the particular alignment. Some times there will be multiple stockholm alignments in one file. I tried to modify the following perl script, it gave me bizarre results, any help will be greatly appreciated.
my $columns = 60;
my $gapped = 0;
my $progname = $0;
$progname =~ s/^.*?([^\/]+)$/$1/;
my $usage = "Usage: $progname [<Stockholm file(s)>]\n";
$usage .= " [-h] print this help message\n";
$usage .= " [-g] write gapped FASTA output\n";
$usage .= " [-s] sort sequences by name\n";
$usage .= " [-c <cols>] number of columns for FASTA output (default is $columns)\n";
# parse cmd-line opts
my #argv;
while (#ARGV) {
my $arg = shift;
if ($arg eq "-h") {
die $usage;
} elsif ($arg eq "-g") {
$gapped = 1;
} elsif ($arg eq "-s"){
$sorted = 1;
} elsif ($arg eq "-c") {
defined ($columns = shift) or die $usage;
} else {
push #argv, $arg;
}
}
#ARGV = #argv;
my %seq;
while (<>) {
next unless /\S/;
next if /^\s*\#/;
if (/^\s*\/\//) { printseq() }
else {
chomp;
my ($name, $seq) = split;
#seq =~ s/[\.\-]//g unless $gapped;
$seq{$name} .= $seq;
}
}
printseq();
sub printseq {
if($sorted){
foreach $key (sort keys %seq){
print ">$key\n";
for (my $i = 0; $i < length $seq{$key}; $i += $columns){
print substr($seq{$key}, $i, $columns), "\n";
}
}
} else{
while (my ($name, $seq) = each %seq) {
print ">$name\n";
for (my $i = 0; $i < length $seq; $i += $columns) {
print substr ($seq, $i, $columns), "\n";
}
}
}
%seq = ();
}
Depending on the how much variation there is in the line with the accessionID, you might need to modify the regex, but this works for your example file
my %seq;
my $aln;
while (<>) {
if ($_ =~ /#=GF AC (\w+)/) {
$aln = $1;
}
elsif ($_ =~ /^\s*\/\/\s*$/){
$aln = '';
}
next unless /\S/;
next if /^\s*\#/;
if (/^\s*\/\//) { printseq() }
else {
chomp;
my ($name, $seq) = split;
$name = $name . ' ' . $aln;
$seq{$name} .= $seq;
}
}
printseq();
I have made a script to extract the content of log files and to calculate the time difference if the task is complete.
Suppose I have four jobs and each job has thre individual tasks, so far I need the start of each task, and just print it.
Everything is fine except when I try to initialise to make it convenient, by using $j, $l which are used as sort of two-dimensional array.
The problem is at the output where I get the same "Started at" for each job.
The values of $counter and $l should be the root cause.
Can anyone help? I tried my best and am sort of newbie.
sub getdate {
my $line = $_[0];
($hrs, $min) = split(':', $line, 3);
return $hrs, $min;
}
print FILE "<html><head>\n";
print FILE "<title>CGI Test</title>\n";
print FILE "</head>\n";
print FILE "<body>";
print FILE "<font size=\"5\" color=\"#008080\" face=\"Tahoma\"><b><u><br>";
print FILE "PBI Batch for 22/02/2013";
print FILE "</font></b></u><br><br><br>";
my $i = 0;
my $j = 0;
my $l = 0;
my #sample;
#print FILE "<h4>";
foreach $header (<COLLECTION>) {
chomp($header);
($heading, $filepath) = split(',', $header);
#$two[$i]="<font size=\"3\"color=\"#008000\" face=\"Tahoma\"><b><u><br>";
#$two[$i]="<font size=\"3\" color=".$color." face=\"Tahoma\"><b><u><br>";
$two[$i] .= $heading;
#$two[$i] .= "</font></u></b><br>";
#print FILE "<font size=\"3\" color=\"#008000\" face=\"Tahoma\"><b><u><br>";
# print FILE $two[$i];
#print FILE $heading;
#print FILE "</font></u></b><br>";
#print $filepath."\n";
open(MYFILE1, $filepath) or die 'Could nont openfile';
my $counter;
foreach $list (<MYFILE1>) {
chomp($list);
($file, $path) = split(',', $list);
#print FILE $file;
my #secondstart;
my #secondend;
my $secondcounter = 0;
#print FILE "valueofllllllllllllllllllllllllllll".$l;
foreach $counter ($file) {
print FILE "valueofllllllllllllllllllllllllllll" . $l;
$l++;
$sample[$j][$l] = $counter;
print FILE "secCOUNTER " . $secondcounter;
$secondcounter++;
}
print FILE " space";
open(MYFILE, $path) or die 'ERRROR';
my $count = 0;
foreach $line (<MYFILE>) {
my #endtime;
$flag = 'false';
#$counter++;
$count++;
print FILE $count . "========";
if ($count == 1) {
($hrs, $min) = getdate($line);
$starttime[$j][$l] = ($hrs * 60) + $min;
}
else {
($hrs, $min) = split(':', $line, 3);
if ($line =~ m/End of Procedure/) {
$flag = 'true';
$endtime[$j][$l] = $hrs . $min;
$endtime[$j][$l] = ($hrs * 60) + $min;
}
else {
$endtime[$j][$l] = ($hrs * 60) + $min;
}
}
$duration[$j][$l] = $endtime[$j][$l] - $starttime[$j][$l];
}
# print $flag;
#print FILE $file." : ";
#print FILE "value of ".$j."and".$l;
$startstatus[$j][$l] = "Started at" . $starttime[$j][$l];
$durationstatus[$j][$l] = " Duration is " . $duration[$j][$l] . "m";
# print FILE "Started at".$starttime;
# print FILE " Duration is ".$duration."m";
# print FILE "<br>";
close(MYFILE);
}
my $valueofl = $l;
#print FILE "vlaeeofl".$valueofl;
print "valueofllllllllllllllllllllllllllll" . $l;
$l = 0;
if ($flag eq 'true') {
$status = 'Completed';
$color = '#008000';
print FILE "<font size=\"3\" color="
. $color
. " face=\"Tahoma\"><b><u><br>"
. $two[$i]
. "</font></u></b><br>";
print FILE $status . "<br>";
while ($l <= $valueofl) {
#print $j."and".$l;
# print "valueofllllllllllllllllllllllllllll".$l;
print FILE $sample[$j][$l] . "    ";
print FILE $startstatus[$j][$l] . "    ";
print FILE $durationstatus[$j][$l] . "<br>";
$l++;
}
# print FILE $startstatus[$j][0];
# print FILE $durationstatus[$j][0];
}
else {
#print "valueofllllllllllllllllllllllllllll".$l;
#print $j."and".$l;
$status = 'In Progress';
$color = 'blue';
print FILE "<font size=\"3\" color="
. $color
. " face=\"Tahoma\"><b><u><br>"
. $two[$i]
. "</font></u></b><br>"
. $status;
}
$i++;
$j++;
}
print FILE "</body>";
print FILE "</html>";
close(FILE);
close(MYFILE1)
This is a shocking piece of Perl. You must always start you program with use strict and use warnings, and declare all variables as close as possible to their first point of use using my. That is the most basic form of debugging, and it is only polite to do this at the very least before asking other people for help.
The problem is likely to lie in your for statement
foreach $counter ($file) { ... }
which will execute the body of the loop just once, with $content set to the value of $file. I can't imagine what you meant it to do.
I am working on some genome data and I have 2 files ->
File1
A1 1 10
A1 15 20
A2 2 11
A2 13 16
File2
>A1
CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA
AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT
>A2
GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA
AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGC
CAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAAT
In file 1, 2nd and 3rd column represents the indexes in File2. So I want that, if character in column1 of file1 matches with character followed by symbol (>) in file2 , then from next line of that file2 give back the substring according to indexes in col2 and col3 of file1. (sorry, I know its complicated) Here is the desire output ->
Output
>A1#1:10
CTATTATTTA
>A1#15:20
ACCTA
>A2#2:11
TCTGCACAGC
>A2#13:16
GCTT
I know if I have only 1 string I can take out sub-string very easily ->
#ARGV or die "No input file specified";
open $first, '<',$ARGV[0] or die "Unable to open input file: $!";
$string="GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT";
while (<$first>)
{
#cols = split /\s+/;
$co=$cols[1]-1;
$length=$cols[2]-$co;
$fragment = substr $string, $co, $length;
print ">",$cols[0],"#",$cols[1],":",$cols[2],"\n",$fragment,"\n";
}
but here my problem is when should I input my second file and how should I match the character in col1 (of file1) with character in file2 (followed by > symbol) and then how to get substring?
I wasnt sure if they were all one continuous line or separate lines.
I set it up as continuous for now.
Basically, read the 2nd file as master.
Then you can process as many index files as you need.
You can use hash of arrays to help with the indexing.
push #{$index{$key}}, [$start,$stop];
use strict;
my $master_file = "dna_master.txt";
if ($#ARGV) {
print "Usage: $0 [filename(s)]\n";
exit 1;
}
my %Data = read_master($master_file);
foreach my $index_file (#ARGV) {
my %Index = read_index($index_file);
foreach my $key (sort keys %Index) {
foreach my $i (#{$Index{$key}}) {
my ($start,$stop) = #$i;
print ">$key#$start:$stop\n";
my $pos = $start - 1;
my $count = $stop - $start + 1;
print substr($Data{$key},$pos,$count)."\n";
}
}
}
sub read_file {
my $file = shift;
my #lines;
open(FILE, $file) or die "Error: cannot open $file\n$!";
while(<FILE>){
chomp; #remove newline
s/(^\s+|\s+$)//g; # strip lead/trail whitespace
next if /^$/; # skip blanks
push #lines, $_;
}
close FILE;
return #lines;
}
sub read_index {
my $file = shift;
my #lines = read_file($file);
my %index;
foreach (#lines) {
my ($key,$start,$stop) = split /\s+/;
push #{$index{$key}}, [$start,$stop];
}
return %index;
}
sub read_master {
my $file = shift;
my %master;
my $key;
my #lines = read_file($file);
foreach (#lines) {
if ( m{^>(\w+)} ) { $key = $1 }
else { $master{$key} .= $_ }
}
return %master;
}
Load File2 in a Hash, with A1, A2... as keys, and the DNA sequence as value. This way you can get the DNA sequence easily.
This 2nd update turns the master file into a hash of arrays as well.
This treats each row in the 2nd file as individual sequences.
use strict;
my $master_file = "dna_master.txt";
if ($#ARGV) {
print "Usage: $0 [filename(s)]\n";
exit 1;
}
my %Data = read_master($master_file);
foreach my $index_file (#ARGV) {
my %Index = read_index($index_file);
foreach my $key (sort keys %Index) {
foreach my $i (#{$Index{$key}}) {
my ($start,$stop) = #$i;
print ">$key#$start:$stop\n";
my $pos = $start - 1;
my $count = $stop - $start + 1;
foreach my $seq (#{$Data{$key}}) {
print substr($seq,$pos,$count)."\n";
}
}
}
}
sub read_file {
my $file = shift;
my #lines;
open(FILE, $file) or die "Error: cannot open $file\n$!";
while(<FILE>){
chomp; #remove newline
s/(^\s+|\s+$)//g; # strip lead/trail whitespace
next if /^$/; # skip blanks
push #lines, $_;
}
close FILE;
return #lines;
}
sub read_index {
my $file = shift;
my #lines = read_file($file);
my %index;
foreach (#lines) {
my ($key,$start,$stop) = split /\s+/;
push #{$index{$key}}, [$start,$stop];
}
return %index;
}
sub read_master {
my $file = shift;
my %master;
my $key;
my #lines = read_file($file);
foreach (#lines) {
if ( m{^>(\w+)} ) { $key = $1 }
else { push #{ $master{$key} }, $_ }
}
return %master;
}
Output:
>A1#1:10
CTATTATTTA
AAGTGTGTTA
>A1#15:20
ACCTAC
ATTAAT
>A2#2:11
TCTGCACAGC
ACCCCCCCCT
AAACCCCAAA
>A2#13:16
GCTT
CCCC
ACAA