perl date fetch script assistance - perl

I was just having a go at perl. I was trying to extract the date and time from a line in a text file that begins with
Date: 05/Feb/2017 21:30:00 PST - 06/Feb/2017 06:00:00 PST
I have managed to scavenge a script but Im trying to modify it so it extracts the above to
start date - 2017-02-05 21:30:00
end date - 2017-02-06 06:00:00
any help would be appreciated
#Is it a reg date
} elsif (!$start && $line[$i] =~ /^Date: (.+?) - (.+)$/i) {
$regstartdate = $1;
$regenddate = $2;
} elsif (!$start && $line[$i] =~ /^Date: (.+)$/i) {
#Monday 3rd July 2006
#Friday 30th June 2006 01:00 Hrs EST
$regstartdate = $1;
eval{
$start = &dateconv($regstartdate);
$end = $start;
1;
}
or do{
warn "Could not process start date: $regstartdate";
};
} elsif (!$start && $line[$i] =~ /^Time:\s*(\d+:\d+)[\s\-]+\s+(\d+:\d+)\s+[hH][rR][sS]\s+(\w+)$/i) {
$tz = $3;
$twestartdate = $regstartdate . " ".$1." Hrs ".$3;
$tweenddate = $regenddate . " ".$2." Hrs ".$3;
$start = &dateconv($regstartdate);
$end = &dateconv($regenddate);
} elsif (!$start && $line[$i] =~ /^Time:\s*(\d+:\d+)[\s*\-]+\s*(\d+:\d+)\s+(\w+)$/i) {
$tz = $3;
$regstartdate = $regstartdate . " ".$1." Hrs ".$3;
$regenddate = $regenddate . " ".$2." Hrs ".$3;
$start = &dateconv($regstartdate);
$end = &dateconv($regenddate);
#Is it a reg Time
} elsif (!$start && $line[$i] =~ /^Interruption Time: (\d+:\d+)[\s\-]+(\d+:\d+) (\w+)/i) {
$regstartdate = $regstartdate . " " . $1 . " Hrs " . $3;
$regenddate = $regenddate . " " . $2 . " Hrs " . $3;
$start = &dateconv($regstartdate);
$end = &dateconv($regenddate);
#Is is the start time ?
} elsif (!$start && $line[$i] =~ /^Duration:\s*START DATE (.*)/i) {
$start = &dateconv($1);
#Is it the end time ?
} elsif (!$end && $line[$i] =~ /^END DATE (.*)/i) {
my $endtime = $1;
$end = &dateconv($endtime);
$endtime =~ m/(\w{1,2}ST)/g;
$tz = $1;

Offering a solution using Time::Piece which I prefer - both because I like the interface, but also because it's core as of perl 5.9.5
#!/usr/bin/env perl
use strict;
use warnings;
use Time::Piece;
my $input_format = '%d/%b/%Y %H:%M:%S';
my $output_format = '%Y-%m-%d %H:%M:%S';
while (<DATA>) {
if ( my ( $start_str, $end_str ) = m/Date: (.*) PST - (.*) PST/ ) {
my $start = Time::Piece->strptime( $start_str, $input_format );
my $end = Time::Piece->strptime( $end_str, $input_format );
print "Start:\t", $start->strftime($output_format), "\n";
print "End: \t", $end ->strftime($output_format), "\n";
}
}
__DATA__
Date: 05/Feb/2017 21:30:00 PST - 06/Feb/2017 06:00:00 PST
The reason I like it is because the $start and $end in the above, are 'time objects' so you can do things like:
print $start -> epoch;
print "Elapsed time: ", $end - $start, "s\n";

If you use the DateTime module, parsing and formatting those date strings is trivial:
use strict;
use warnings;
use 5.010;
use DateTime;
use DateTime::Format::Strptime;
my $fmt = DateTime::Format::Strptime->new(pattern => '%d/%b/%Y %H:%M:%S');
my $line = 'Date: 05/Feb/2017 21:30:00 PST - 06/Feb/2017 06:00:00 PST';
$line =~ s/^Date: //;
my #dates = split(/\s*-\s*/, $line);
for my $date (#dates) {
my $dt = $fmt->parse_datetime($date);
say $dt->ymd . ' ' . $dt->hms;
}
Produces:
2017-02-05 21:30:00
2017-02-06 06:00:00
You can adjust the formatting as you see fit.

Related

Perl Date Comparison Query

I'm trying to output "not ok" if the date provided within an input file is greater than one day from "today" using Perl version 5.8.8.
Initializing with "./code.sh < test.txt" works fine, when test.txt contains the following data:
07/02/2020 08/02/2020
When I rehash the code below to try an use "today's date" as a variable, and only have one date within the input file I get the following error:
Use of uninitialized value in concatenation (.) or string at code.sh line 27, <> line 1
Working code (with two dates in the input file):
#!/usr/bin/perl
use strict;
use warnings;
use Time::Piece;
#my $date = localtime->strftime('%d/%m/%Y');
#print "$date";
my $format = '%d/%m/%Y';
while (<>) {
chomp;
my ($str1, $str2) = split;
# my ($date, $str2) = split;
# my $dt1 = Time::Piece->strptime($date, $format);
my $dt1 = Time::Piece->strptime($str1, $format);
my $dt2 = Time::Piece->strptime($str2, $format);
# print "$date / $str2 ";
print "$str1 / $str2 ";
if ($dt2->julian_day - $dt1->julian_day ==1) {
print "ok\n";
} else {
print "not ok\n";
}
}
Broken code (with one date within the input file):
#!/usr/bin/perl
use strict;
use warnings;
use Time::Piece;
my $date = localtime->strftime('%d/%m/%Y');
print "$date";
my $format = '%d/%m/%Y';
while (<>) {
chomp;
# my ($str1, $str2) = split;
my ($date, $str2) = split;
my $dt1 = Time::Piece->strptime($date, $format);
# my $dt1 = Time::Piece->strptime($str1, $format);
my $dt2 = Time::Piece->strptime($str2, $format);
print "$date / $str2 ";
# print "$str1 / $str2 ";
if ($dt2->julian_day - $dt1->julian_day ==1) {
print "ok\n";
} else {
print "not ok\n";
}
}
Not quite sure what I'm doing wrong...
Any help is appreciated
Please pay more attention when you type your code, your sample had a few miss-types
#!/usr/bin/perl
#
# vim: ai:ts=4:sw=4
#
use strict;
use warnings;
use feature 'say';
use Time::Piece;
my $format = '%d/%m/%Y';
my $date = localtime->strftime($format);
say "Today is: $date\n";
my #str_dates = <DATA>;
chomp(#str_dates);
my $date1 = Time::Piece->strptime($str_dates[0], $format);
my $date2 = Time::Piece->strptime($str_dates[1], $format);
my $days_diff = $date2->julian_day - $date1->julian_day;
my $msg = ($days_diff == 1) ? "ok" :"not ok";
say "$date2 :: $date1 => $msg";
say "$date2 :: $date1 = $days_diff day(s) apart";
__DATA__
07/02/2020
08/02/2020
Output
Today is: 07/02/2020
Sat Feb 8 00:00:00 2020 :: Fri Feb 7 00:00:00 2020 => ok
Sat Feb 8 00:00:00 2020 :: Fri Feb 7 00:00:00 2020 = 1 day(s) apart
NOTE: I would recommend if you use:
Windows OS
notepad++
Linux OS vim
vim better use with options: syntax on, ai,ts=4,sw=4

match string between columns using perl

I want to compare a string in column A with that in column B for every row and print a third column that highlights the differences.
Column A Column B
uuaaugcuaauugugauaggggu uuaaugcuaauugugauaggggu
uuaaugcuaauugugauagggguu uuaaugcuaauugugauaggggu
uuaaugcuaauugugauagggguuu uuaaugcuaauugugauaggggu
Desired Result:
Column A Column B Column C
uuaaugcuaauugugauaggggu uuaaugcuaauugugauaggggu ********************
uuaaugcuaauugugauagggguu uuaaugcuaauugugauaggggu ********************u
uuaaugcuaauugugauagggguuu uuaaugcuaauugugauaggggu ********************uu
I have an example script that might work, but how do I do this for every row in the data frame?
use strict;
use warnings;
my $string1 = 'AAABBBBBCCCCCDDDDD';
my $string2 = 'AEABBBBBCCECCDDDDD';
my $result = '';
for(0 .. length($string1)) {
my $char = substr($string2, $_, 1);
if($char ne substr($string1, $_, 1)) {
$result .= "**$char**";
} else {
$result .= $char;
}
}
print $result;
Using bruteforce and substr
use strict;
use warnings;
while (<DATA>) {
my ($str1, $str2) = split;
my $len = length $str1 < length $str2 ? length $str1 : length $str2;
for my $i (0..$len-1) {
my $c1 = substr $str1, $i, 1;
my $c2 = substr $str2, $i, 1;
if ($c1 eq $c2) {
substr $str1, $i, 1, '*';
substr $str2, $i, 1, '*';
}
}
printf "%-30s %s\n", $str1, $str2;
}
__DATA__
Column_A Column_B
uuaaugcuaauugugauaggggu uuaaugcuaauugugauaggggu
uuaaugcuaauugugauagggguu uuaaugcuaauugugauaggggu
uuaaugcuaauugugauagggguuu uuaaugcuaauugugauaggggu
AAABBBBBCCCCCDDDDD AEABBBBBCCECCDDDDD
Outputs:
*******A *******B
*********************** ***********************
***********************u ***********************
***********************uu ***********************
*A********C******* *E********E*******
Alternative using XOR
It's also possible to use ^ to find the intersection between two strings.
The following performs the same as the above:
while (<DATA>) {
my ($str1, $str2) = split;
my $intersection = $str1 ^ $str2;
while ($intersection =~ /(\0+)/g) {
my $len = length $1;
my $pos = pos($intersection) - $len;
substr $str1, $pos, $len, '*' x $len;
substr $str2, $pos, $len, '*' x $len;
}
printf "%-30s %s\n", $str1, $str2;
}
I could not resist to provide a modified Miller's solution with regular expressions
use strict;
use warnings;
while (<DATA>) {
my $masked_str1 ="";
my $masked_str2 ="";
my ($str1, $str2) = split;
my $intersection = $str1 ^ $str2;
while ($intersection =~ /(\x00+)/g) {
my $mask = $intersection;
$mask =~ s/\x00/1/g;
$mask =~ s/[^1]/0/g;
while ( $mask =~ /\G(.)/gc ) { # traverse the mask
my $bit = $1;
if ( $str1 =~ /\G(.)/gc ) { # traverse the string1 to be masked
$masked_str1 .= $bit ? '_' : $1;
}
if ( $str2 =~ /\G(.)/gc ) { # traverse the string2 to be masked
$masked_str2 .= $bit ? '_' : $1;
}
}
}
print "=" x 80;
printf "\n%-30s %s\n", $str2, $str1; # Minimum length 30 char, left-justified
printf "%-30s %s\n", $str1, $str2;
printf "%-30s %s\n\n", $masked_str1, $masked_str2;
}

perl variable scope using while loop

I have an ftp upload from my client machine to my server running consistently as a means of backup, occasionally if the connection becomes corrupt the upload will stall, the solution to this is to remove the "corrupt file" from the server, then the client resumes and the file is uploaded next time the client runs. This script is to remove the file if it has never occured before, or check the time stamp if it has been deleted in the past and check that this is a new occurence. Then delete if required.
the line in the logfile will be like:
Sun May 11 02:38:46 2010 [pid 17116] [ftp] FAIL UPLOAD: Client "192.168.179.58", "/Dan/Example.file", 0.00Kbyte/sec
and once written to the filelist it looks like this:
Sun May 11 02:38:46 - /Dan/Example.file
Below you can see where the scope problem lies within the read_filelist() sub-routine.
Please see the solution so far:
#!/usr/bin/perl
use warnings;
use strict;
use Data::Dumper qw(Dumper);
#open /var/log/vsftpd.log read only, and /var/log/vsftpd.log R/W + append
open my $logfile, '<', '/var/log/vsftpd.log' # 3 arg open is safer
or die "could not open file: $!"; # checking for errors is good
open my $filelist, '+<', '/scripts/filelist'
or die "could not open file: $!";
my #rid;
my #filename;
my #deletedfile;
my $int = -1;
my #time;
my #hourcompare;
my #splittime;
my #filelisttime;
my #splitfilelisttime;
my #filelistfile;
my #filelistarray;
my $fileexists = 0;
#Define read_filelist()
sub read_filelist{
my ($filename, $hour, $min, $sec, $filelist) = #_;
while (<$filelist>){
#filelisttime = split /\s+/, $_;
#splitfilelisttime = split /:/, $filelisttime[3];
#filelistfile = split /\s+-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
if ($filelistfile[2] eq $filename){
my $fileexists = 1;
if ($hour >= $fhour){
if($min >= $fmin){
if($sec > $fsec){
system ("rm", "-fv", "/home/desktop"."$filename");
}
}
}
}
}
}
#open vsftp log and look for lines that include "FAIL UPLOAD" print those lines to a file
while (<$logfile>) {
$int = $int + 1;
if (index($_, "FAIL UPLOAD:") != -1){
#rid = split /\s+"/, $_;
#filename = split /",/, $rid[2];
#time = split /\s+201/, $rid[0];
}
$deletedfile[$int] = $filename[0];
if ($filename[0] ne $deletedfile[$int-1]){
print $filelist $time[0]." - ".$filename[0]."\n";
}
#convert the timestamp into integers for comparison
#hourcompare = split /\s+/, $time[0];
#splittime = split /:/, $hourcompare[3];
my $sec = $splittime[2]+0;
my $min = $splittime[1]+0;
my $hour = $splittime[0]+0;
#itterate through '/scripts/filelist'
read_filelist($filename[0], $hour, $min, $sec, $filelist);
if ($fileexists = 0){
system ("rm", "-fv", "/home/desktop"."$filename[0]");
}
}
close $filelist;
close $logfile;
the variables pass to the read_filelist() sub no problem, but when I start the while() loop all passed variables become uninitialized:
sub read_filelist {
my ($filename, $hour, $min, $sec, $filelist) = #_;
while (<$filelist>) {
#filelisttime = split /\s+/, $_;
#splitfilelisttime = split /:/, $filelisttime[3];
#filelistfile = split /\s+-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
if ($filelistfile[2] eq $filename) {
my $fileexists = "T";
if ($hour >= $fhour) {
if($min >= $fmin) {
if($sec > $fsec) {
system ("rm", "-fv", "/home/desktop"."$filename");
}
}
}
}
print "log: " . "$hour" . ":" . "$min" . ":" . "$sec" . "\n";
print "file: " . "$fhour" . ":" . "$fmin" . ":" . "$fsec" . "\n";
print "$filename" . "\n";
}
}
read_filelist($filename[0], $hour, $min, $sec, $filelist);
This returns the following:
Use of uninitialized value in string eq at removefailed.pl line 39, <$filelist> line 1.
Use of uninitialized value $filename in string eq at removefailed.pl line 39, <$filelist> line 1.
log: 0:0:0
file: 2:38:46
Use of uninitialized value $filename in string at removefailed.pl line 52, <$filelist> line 1.
However if I move the prints outside of the while loop it works, but obviously I can only compare them with the last line of the filelist.
sub read_filelist {
my ($filename, $hour, $min, $sec, $filelist) = #_;
print "log: " . "$hour" . ":" . "$min" . ":" . "$sec" . "\n";
while (<$filelist>) {
#filelisttime = split /\s+/, $_;
#splitfilelisttime = split /:/, $filelisttime[3];
#filelistfile = split /\s+-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
if ($filelistfile[2] eq $filename) {
my $fileexists = "T";
if ($hour >= $fhour) {
if($min >= $fmin) {
if($sec > $fsec) {
system ("rm", "-fv", "/home/desktop"."$filename");
}
}
}
}
print "file: " . "$fhour" . ":" . "$fmin" . ":" . "$fsec" . "\n";
}
print "$filename" . "\n";
}
read_filelist($filename[0], $hour, $min, $sec, $filelist);
I get the following output:
file: 2:38:46
log: 2:38:46
/Dan/Example.file
Any help with this would be much appreciated, please let me know if you need any further information?
I have solved this problem using Hash's. I think this was caused because the filelist was already open in the logfile read.
Anyhow I created a global Hash:
my %logfilelines;
passed all the assorted relevant lines to it from the logfile:
$logfilelines{$filename[0].":".$hour.":".$min.":".$sec}++
Then within the read_file() sub I run through %logfilelines; and compare the filename\ time etc. I will have to rebuild the time comparison as it is wrong but atleast I am making progress now. see the new subroutine below in case you are curious:
sub read_filelist{
#my ($filename, $hour, $min, $sec, $filelist) = #_;
my $fint = -1;
my #filelines;
my #filelistlines;
foreach my $line (keys %logfilelines) {
open my $filelist2, '<', 'c:\scripts\filelist'
or die "could not open file: $!";
$fint = $fint + 1;
$filelines[$fint] = $line;
#filelistlines = split /:/, $filelines[$fint];
my $filename = $filelistlines[0];
my $hour = $filelistlines[1]+0;
my $min = $filelistlines[2]+0;
my $sec = $filelistlines[3]+0;
while (<$filelist2>){
my #filelisttime = split /\s+/, $_;
my #splitfilelisttime = split /:/, $filelisttime[3];
my #filelistfile = split /-\s+/, $_;
my $fsec = $splitfilelisttime[2]+0;
my $fmin = $splitfilelisttime[1]+0;
my $fhour = $splitfilelisttime[0]+0;
chomp $filelistfile[1];
if ($filelistfile[1] eq $filename){
# my $fileexists = 1;
print "log: "."$hour".":"."$min".":"."$sec"." $filename"."\n";
print "file: "."$fhour".":"."$fmin".":"."$fsec"."\n";
if ($min > $fmin || $hour > $fhour){
# if($min >= $fmin ||$hour >= $fhour){
# if($sec > $fsec){
#system ("rm", "-fv", "/home/desktop"."$filename");
print "success"." $filename";
# }
# }
}
}
}
}

formatting output data to an excel file

This program gets numeric values from the web for each of the values in my #values array
I want these values to be printed out in a table which looks like
il9 il8 il7
2012 v1 b1
2011 v2 b2
2010 v3 b3
.
.
2000 v12 b12
where v1 .. v12 are values for the first variable in #values etc. here is my program please help me structure it. Is there an escape character that could take me back to the first line of the program in perl
thanks
#!/usr/bin/perl -w
use strict;
use LWP::UserAgent;
use URI;
my $browser = LWP::UserAgent->new;
$browser->timeout(10);
$browser->env_proxy;
open(OUT, ">out");
my $i = 2013;
while ($i-- > 2000){print OUT "$i\n"}
my $a = 2013 ;
my $base = 'http://webtools.mf.uni-lj.si/public/summarisenumbers.php';
my #values = ('il9', 'il8', 'il6' );
foreach my $value (#values) {
print OUT "$value \n"
while ($a-- > 2000){
my $b = $a + 1;
my $c = $b + 1;
my $query = '?query=('.$value.')'.$a.'[dp] NOT '.$b.'[dp] NOT '.$c.'[dp]';
my $add = $base.$query;
#my $url = URI->new($add);
#my $response = $browser->get($url);
#if($response->is_success) {print OUT $response->decoded_content;}
#else {die $response->status_line};
print OUT "$query\n";
} $a = 2013; print OUT
}
close(OUT);
Pay more attention to formatting/indentation and variable naming - it will help you a lot.
#!/usr/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
my $base_url = 'http://webtools.mf.uni-lj.si/public/summarisenumbers.php';
my #values = ( 'il9', 'il8', 'il6' );
my $stat_data = {};
my $browser = LWP::UserAgent->new;
$browser->timeout(10);
$browser->env_proxy;
for my $value ( #values ) {
for my $year ( 2010 .. 2013 ) {
my $query = '?query=(' . $value . ')' . $year .'[dp] NOT ' . ($year+1) . '[dp] NOT ' . ($year+2) .'[dp]';
my $url = "$base_url$query";
my $response = $browser->get( $url );
if( $response->is_success ) {
## store the fetched content in a hash structure
$stat_data->{$year}->{$value} = $response->decoded_content;
}
else {
die $response->status_line;
}
}
}
## print the header
print "\t", join( "\t", #values ), "\n";
## print the data by the years in reverse order
for my $year ( reverse sort keys %{$stat_data} ) {
print $year;
for my $value ( #values ) {
print "\t", $stat_data->{$year}->{$value};
}
print "\n";
}

perl Schwartzian Transform?

i had script:
# N1089767N_7_SWOPT_03-Jul-2011_78919186.xml
# N1089767N_7_SWOPT_25-Jun-2011_72745892.xml
# N1089772L_9_SWOPT_03-Jul-2011_78979055.xml
# N1089772L_9_SWOPT_20-Jul-2011_69380887.xml
# N1089772L_9_SWOPT_29-Jun-2011_74754662.xml
open( CONSULTS, "confile" );
#scons = <CONSULTS>;
close CONSULTS;
my %is_trade_id_unique;
foreach ( reverse sort consort #scons ) {
chomp;
#print $_. "\n";
if ( $_ =~ m/(\w+_\d+_\w+)_(\d+)-([A-Za-z]{3})-2011_(\d+)[.]xml/i ) {
my ( $trade_id, $date, $month, $row_num ) = ( $1, $2, $3, $4 );
if ( !$is_trade_id_unique{$trade_id} ) {
print $_. "\n";
$is_trade_id_unique{$trade_id} = 1;
}
#print $_."\n";
}
}
#N1089767N_7_SWOPT_03-Jul-2011_78919186.xml
sub consort {
$aa = $a;
$bb = $b;
# save our variables because our sort routine affects them. If I "chomp $a"
# that will actually change the line seen in the foreach loop that calls this.
chomp $aa;
chomp $bb;
$aa =~ s/^ *//;
$bb =~ s/^ *//;
my %months = (
FY => 0,
Jan => 1,
Feb => 2,
Mar => 3,
Apr => 4,
May => 5,
Jun => 6,
Jul => 7,
Aug => 8,
Sep => 9,
Oct => 10,
Nov => 11,
Dec => 12,
);
my ( $trade_id, $date, $month, $row_num );
my ( $btrade_id, $bdate, $bmonth, $brow_num );
if ( $aa =~ m/(\w+_\d+_\w+)_(\d+)-([A-Za-z]{3})-2011_(\d+)[.]xml/i ) {
( $trade_id, $date, $month, $row_num ) = ( $1, $2, $months{$3}, $4 );
}
if ( $bb =~ m/(\w+_\d+_\w+)_(\d+)-([A-Za-z]{3})-2011_(\d+)[.]xml/i ) {
( $btrade_id, $bdate, $bmonth, $brow_num ) =
( $1, $2, $months{$3}, $4 );
}
$trade_id cmp $btrade_id
|| $month <=> $bmonth
|| $date <=> $bdate
|| $row_num <=> $brow_num;
}
and i rwrite this script to
#!/usr/bin/perl
use strict;
use warnings;
#use Smart::Comments;
use constant RegExp_parse_name => qr/(\w+)_(\d{2})-(\w{3})-(\d{4})_(\d+)/;
#qr/([A-Z0-9]+_\d+_[A-Z0-9]+)_(\d+)-([A-Z][a-z]{2})-(20\d{2})_(\d+)[.]xml/;
#create month hash
my #month = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/;
my %months;
foreach my $index ( 0 .. $#month ) { $months{ $month[$index] } = $index }
#generate tmp array for special sort
my #tmp_scons;
while ( my $str = <DATA> ) {
chomp($str);
my ( $trade_id, $date, $month, $year, $row_num ) =
$str =~ RegExp_parse_name;
$trade_id or next;
$month = $months{$month};
push #tmp_scons, [ "$trade_id:$year-$month-$date:$row_num", $str ];
}
my #scons = map $_->[1], sort { $a cmp $b } #tmp_scons;
### #tmp_scons:#tmp_scons
### #scons:#scons
### %months:%months
my %is;
foreach my $str (#scons) {
my ( $trade_id, $date, $month, $year, $row_num ) =
$str =~ RegExp_parse_name;
if ( !$is{$trade_id} ) {
print "$str\n";
}
$is{$trade_id}++;
#print "$str\n";
}
__DATA__
N1089767N_7_SWOPT_03-Jul-2011_78919186.xml
N1089767N_7_SWOPT_25-Jun-2011_72745892.xml
N1089772L_9_SWOPT_03-Jul-2011_78979055.xml
N1089772L_9_SWOPT_20-Jul-2011_69380887.xml
N1089772L_9_SWOPT_29-Jun-2011_74754662.xml
but it doesn't sort correctly what the matter?
In this line:
my #scons = map $_->[1], sort { $a cmp $b } #tmp_scons;
You are sorting the transformed data, and then pulling out the origional. However, in your sort block, when you write $a cmp $b you are comparing the array references, so perl is doing something like 'ARRAY(0x123412)' cmp 'ARRAY(0x234234)' rather than looking at your transformed data, which is in the first element of that array.
Rewrite the line as follows:
my #scons = map $_->[1], sort { $a->[0] cmp $b->[0] } #tmp_scons;
And you will be correctly sorting on the transformed value.
#as a result
#!/usr/bin/env perl
######################################
# $URL: http://mishin.narod.ru $
# $Date: 2011-09-14 19:53:20 +0300 (Web, 14 Sep 2011) $
# $Author: mishin nikolay $
# $Revision: 1.02 $
# $Source: get_latest.pl $
# $Description: Sort trades and get latest $
##############################################################################
use strict;
use warnings;
use utf8;
use Data::Dumper;
use Carp;
use English qw(-no_match_vars);
our $VERSION = '0.01';
my $RGX_SHORT_MESS = qr/^(\w+)_(\d{2})-(\w{3})-(\d{4})_(\d+)/smo;
my $RGX_LONG_MESS = qr/^message[.](\w+)[.](\w+)_(\d{2})-(\w{3})-(\d{4})/smo;
#create month hash
my %months;
# two symbol for correct literal matching
#months{qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec )} =
( '00' .. '11' );
my ( $result, $index );
my $file = shift; #'file_names.txt';
open my $fh, q{<}, $file or croak "unable to open:$file $ERRNO";
process_data($fh); #my #file_names = <$fh>;
close $fh or croak "unable to close: $file $ERRNO";
sub process_data {
my ($fh) = #_;
while ( my $str = <$fh> ) {
chomp $str;
my $search_str = $str;
my $trade_id;
if ( $search_str =~ s/$RGX_SHORT_MESS/$4-$months{$3}-$2:$5/sm ) {
$trade_id = $1;
}
elsif ( $search_str =~ s/$RGX_LONG_MESS/$5-$months{$4}-$3:$1/sm ) {
$trade_id = $2;
}
else { next }
# so, from now we are search BIGGEST value & ignore less
next
if ( exists $index->{$trade_id}
&& ( $index->{$trade_id} gt $search_str ) );
$index->{$trade_id} = $search_str;
$result->{$trade_id} = $str;
}
# $result
foreach ( reverse sort keys %{$result} ) {
print $result->{$_} . "\n";
}
return;
}
__DATA__
N1089767N_7_SWOPT_03-Jul-2011_78919186.xml
N1089767N_7_SWOPT_25-Jun-2011_72745892.xml
N1089772L_9_SWOPT_03-Jul-2011_78979055.xml
N1089772L_9_SWOPT_20-Jul-2011_69380887.xml
N1089772L_9_SWOPT_29-Jun-2011_74754662.xml
message.110530033311A4259348AS26.A4259348AS_26_SWOPT_01-Jul-2011.xml
message.110530033311A4259348AS26.A4259348AS_26_SWOPT_31-May-2011.xml
A4259348AS_26_SWOPT_29-Jun-2011_74754662.xml