Unable to find it out duplicate - perl - perl

I am traversing all files to get the desired one in some directory tree recursively, as soon as i am getting that files i doing some operation on them but before doing the operation i need to check whether i have performed operation on this file or not if yes then don't do it again else continue :
But the prob is, i am unable to find the way to check the condition :(
Here is my code :
use strict;
use warnings;
use autodie;
use File::Find 'find';
use File::Spec;
use Data::Printer;
my ( $root_path, $id ) = #ARGV;
our $anr_name;
opendir my ($dh), $root_path;
my #dir_list = grep -d, map File::Spec->catfile( $root_path, $_ ), grep { not /\A\.\.?\z/ } readdir $dh;
closedir $dh;
my $count;
for my $dir (#dir_list) {
find(
sub {
return unless /traces[_d]*/;
my $file = $_;
my #all_anr;
#print "$file\n\n";
my $file_name = $File::Find::name;
open( my $fh, "<", $file ) or die "cannot open file:$!\n";
my #all_lines = <$fh>;
my $i = 0;
foreach my $check (#all_lines) {
if ( $i < 10 ) {
if ( $check =~ /Cmd line\:\s+com\.android\..*/ ) {
$anr_name = $check;
my #temp = split( ':', $anr_name );
$anr_name = $temp[1];
push( #all_anr, $anr_name );
#print "ANR :$anr_name\n";
my $chk = check_for_dublicate_anr(#all_anr);
if ( $chk eq "1" ) {
# performed some action
}
}
$i++;
} else {
close($fh);
last;
}
}
},
$dir
);
}
sub check_for_dublicate_anr {
my #anrname = #_;
my %uniqueAnr = ();
foreach my $item (#anrname) {
unless ( $uniqueAnr{$item} ) {
# if we get here, we have not seen it before
$uniqueAnr{$item} = 1;
return 1;
}
}
}

You can simplify things with Path::Class and Path::Class::Rule:
use 5.010;
use warnings;
use Path::Class;
use Path::Class::Rule;
my $root = ".";
my #dirs = grep { -d $_ } dir($root)->children();
my $iter = Path::Class::Rule->new->file->name(qr{traces[_d]*})->iter(#dirs);
my $seen;
while ( my $file = $iter->() ) {
for ( $file->slurp( chomp => 1 ) ) {
next unless /Cmd line:\s+(com\.android\.\S*)/;
do_things( $file, $1 ) unless $seen->{$1}++;
}
}
sub do_things {
my ( $file, $str ) = #_;
say "new $str in the $file";
}

Related

Perl output format

I'm reading a log file and grouping it based on the 'Program' name and in turn its ID.
LOG FILE
------------------------------------------
DEV: COM-1258
Program:Testing
Reviewer:Jackie
Description:New Entries
rev:r145201
------------------------------------------
QA: COM-9696
Program:Testing
Reviewer:Poikla
Description:Some random changes
rev:r112356
------------------------------------------
JIRA: COM-1234
Program:Development
Reviewer:John Wick
Description:Genral fix
rev:r345676
------------------------------------------
JIRA:COM-1234
Program:Development
Reviewer:None
Description:Updating Received
rev:r909276
------------------------------------------
JIRA: COM-6789
Program:Testing
Reviewer:Balise Mat
Description:Audited
rev:r876391
------------------------------------------
JIRA: COM-8585
Program:Testing
Reviewer:Gold frt
Description: yet to be reviewed
rev:r565639
The code I have,
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Terse = 1;
my $file = "log.txt";
open FH, $file or die "Couldn't open file: [$!]\n";
my $data = {};
my $hash = {};
while (<FH>)
{
my $line = $_;
chomp $line;
if ($line =~ m/(-){2,}/)
{
my $program = $hash->{Program} || '';
my $jira = $hash->{JIRA} || $hash->{QA} || $hash->{DEV} ||
+'';
if ($program && $jira)
{
push #{$data->{$program}{$jira}}, $hash;
$hash = {};
}
}
else
{
if ($line =~ m/:/)
{
my ($key, $value) = split /:\s*/, $line;
$hash->{$key} = $value;
}
elsif ($line =~ m#/# && exists $hash->{Files})
{
$hash->{Files} .= "\n$line";
}
}
}
print 'data = ' . Dumper($data);
foreach my $prg (sort keys %{$data})
{
print "===========================================================
+=\n";
print " PROGRAM : $prg
+ \n";
print "===========================================================
+=\n";
foreach my $jira (sort keys %{$data->{$prg}})
{
print "******************\n";
print "JIRA ID : $jira\n";
print "******************\n";
foreach my $hash (#{$data->{$prg}{$jira}})
{
foreach my $key (keys %{$hash})
{
# print the data except Program and JIRA
next if $key =~ m/(Program|JIRA|DEV|QA)/;
print " $key => $hash->{$key}\n";
}
print "\n";
}
}
}
I have a requirement to print the output in the below format and currently unable to do so with my logic, any ideas would be really helpful.
PROGRAM: Development
Change IDs:
1.JIRA
a.COM-1234
PROGRAM: Testing
Change IDs:
1.JIRA
a.COM-6789
b.COM-8585
2.QA
a.COM-9696
3.DEV
a.COM-1258
I would write this
use strict;
use warnings 'all';
use List::Util 'uniq';
my $file = 'log.txt';
open my $fh, $file or die "Couldn't open file: [$!]\n";
my #data;
{
my %item;
while ( <$fh> ) {
chomp;
if ( eof or /\-{2,}/ ) {
push #data, { %item } if keys %item;
%item = ();
}
else {
my ( $key, $value ) = split /\s*:\s*/;
next unless $value;
$item{$key} = $value;
$item{jira} = $key if grep { $key eq $_ } qw/ JIRA DEV QA /;
}
}
}
my %data;
{
for my $item ( #data ) {
my ($prog, $jira) = #{$item}{qw/ Program jira /};
push #{ $data{$prog}{$jira} }, $item->{$jira};
}
}
for my $prog ( sort keys %data ) {
printf "PROGRAM: %s\n", $prog;
print "Change IDs:\n";
my $n = 1;
for my $jira ( qw/ JIRA QA DEV / ) {
next unless my $codes = $data{$prog}{$jira};
printf "%d.%s\n", $n++, $jira;
my $l = 'a';
printf " %s.%s\n", $l++, $_ for sort(uniq(#$codes));
}
print "\n";
}
output
PROGRAM: Development
Change IDs:
1.JIRA
a.COM-1234
PROGRAM: Testing
Change IDs:
1.JIRA
a.COM-6789
b.COM-8585
2.QA
a.COM-9696
3.DEV
a.COM-1258
#!/usr/bin/perl -w
use strict;
use warnings;
use Data::Dumper;
my $file = 'test';
my $hash;
my $id_hash = ();
my $line_found = 0;
my $line_count = 1;
my $ID;
my $ID_num;
open (my $FH, '<', "$file") or warn $!;
while (my $line = <$FH> ) {
chomp($line);
if ( $line =~ m/------------------------------------------/){
$line_found = 1;
$line_count++;
next;
}
if ( $line_found ) {
$line =~ m/(.*?):(.*)/;
$ID = $1;
$ID_num = $2;
$line_found = 0;
}
if ( $line =~ m/Program:(.*)/ ) {
my $pro = $1;
push #{$hash->{$pro}->{$ID}}, ($ID_num) ;
}
$line_count++;
}
close $FH;
foreach my $pro (keys %$hash){
# print Dumper($pro);
print "PROGRAM:\t$pro\nChange IDs:\n";
foreach my $ids (keys $hash->{$pro}){
print "\t1. $ids\n";
foreach my $id (values $hash->{$pro}->{$ids}){
print "\t\ta. $id\n";
}
}
}
OUTPUT
PROGRAM: Testing
Change IDs:
1. QA
a. COM-9696
1. DEV
a. COM-1258
1. JIRA
a. COM-6789
a. COM-8585
PROGRAM: Development
Change IDs:
1. JIRA
a. COM-1234
a. COM-1234
Just change the output to your need!!

How to find the class-file on case insensitive filesystem?

Simple test case (for the demonstration of the problem):
mkdir -p ./lib1/Class ./lib2/Class
touch ./lib1/Class/Name.pm ./lib2/Class/NAME.pm
So, have:
./lib1/Class/Name.pm
./lib2/Class/NAME.pm
Need search for the right file in the case-insensitive filesystem (OS X's HFS+).
The following works on case-sensitive filesystem,
#!/usr/bin/env perl
use 5.014;
use strict;
use warnings;
my #DIRS = qw(./lib1 ./lib2);
for my $class ( qw(Class::Name Class::NAME) ) {
my $file = findClassFile($class);
say $file;
}
sub findClassFile {
my($file) = #_;
$file =~ s|::|/|g;
$file .= ".pm";
for my $dir (#DIRS) {
return "$dir/$file" if( -e "$dir/$file" );
}
return undef;
}
and prints
./lib1/Class/Name.pm
./lib2/Class/NAME.pm
on the OS X, it prints incorrectly:
./lib1/Class/Name.pm
./lib1/Class/NAME.pm
How to find on the OSX's insensitive filesystem the correct filename?
Ps: Now only comes to my mind write and recursive routine with opendir/readdir/chdir and checking the filenames what are comes from readdir. Not to shabby... Exists some more easy way?
My current solution is:
#!/usr/bin/env perl
use 5.014;
use strict;
use warnings;
my #DIRS = qw(./lib1 ./lib2 /Users/me/tmp/lib3);
for my $class ( qw(Class::Name Class::NAME CLASS::name Class::Namex) ) {
my $file = findClassFile($class);
say $file // "Not found $class";
}
sub findClassFile {
my($classname) = #_;
my $file = ($classname =~ s|::|/|gr) . ".pm";
for my $dir (#DIRS) {
return "$dir/$file" if( FileExists("$dir/$file") );
}
return undef;
}
sub FileExists {
my($path) = #_;
my $curr = $path =~ m|^/| ? "/" : ".";
for my $part (split '/', $path) {
next unless $part;
opendir(my $dfd, $curr) || return undef;
my #files = grep {/^$part$/} readdir($dfd);
closedir($dfd);
return undef unless( #files );
$curr .= "/$part";
}
return $curr;
}
what prints:
./lib1/Class/Name.pm
./lib2/Class/NAME.pm
/Users/me/tmp/lib3/CLASS/name.pm
Not found Class::Namex
so - it's working, only don't like it.. ;)

Why would my if(-f ) not be working?

When initially checking if ARGV[0] is a file or directory, it worked fine with accurate results. Then further down into sub files I try it again and it doesnt return anything. This may seem like a small silly question but Im a student and help on programming on the weekends is scarce. Sometimes a new set of eyes can help.Thanks in advance.
#!/usr/bin/perl -w
my %hash;
my $args = #ARGV;
my $dh = $ARGV[0];
if ( -f $dh ) {
words($dh);
} elsif ( -d $dh ) {
files($dh);
}
sub words {
open( my $file, '<', $_[0] ) or die 'cant open file';
while ( my $line = <$file> ) {
chomp $line;
#words = split( /\s+/, $line );
foreach $word (#words) {
$word =~ tr/A-Z/a-z/;
$hash{$word}++;
}
}
foreach $key ( keys %hash ) {
print $key. " " . $hash{$key} . "\n";
}
}
sub files {
opendir( DH, $_[0] );
my #paths = grep !/^\./, readdir(DH);
closedir(DH);
foreach (#paths) {
if ( -f $_ ) {
print $_. "\n";
}
}
}
You're missing path to your file,
if (-f "$_[0]/$_") ..
or to make it less obscure,
sub files{
my ($path) = #_;
opendir(my $DH, $path);
my #paths = grep !/^\./, readdir($DH);
closedir($DH);
foreach (#paths) {
if (-f "$path/$_") {
print "$_\n";
}
}
}
As has already been stated, the return values of readdir contain just the basename. Therefore, to do file tests, you must either chdir or include the path info explicitly.
if (-f "$_[0]/$_") {
One alternative solution is to use Path::Class or some similar module for doing Cross-platform path specification manipulation.
The following is your script rewritten using this module:
use strict;
use warnings;
use Path::Class;
my $path = shift // die "Usage: $0 <Dir or File>\n";
words($path) if -f $path;
files($path) if -d $path;
sub words {
my $file = file(shift);
my %count;
$count{ lc($_) }++ for split ' ', $file->slurp;
for my $key ( keys %count ) {
printf "%s %s\n", $key, $count{$key};
}
}
sub files {
my $dir = dir(shift);
for ( $dir->children ) {
next if $_->is_dir;
print "$_\n", $_->basename;
}
}

<DATA> prevents foreach loop from being executed, why? :)

I have two nested foreach loops. If I use this code:
foreach (#directories) {
my $actual_directory = $_;
print "\nactual directory: ".$actual_directory."\n";
foreach (#files) {
my $file_name = $_;
my $actual_file = $actual_directory.$file_name;
print $actual_file."\n";
open(DATA, $actual_file) or die "Nelze otevřít zdrojový soubor: $!\n";
my $line_number = 0;
# while (<DATA>){
# my #znaky = split(' ',$_);
# my $poradi = $znaky[0]; #poradi nukleotidu
# my $hodnota = $znaky[1]; #hodnota
# my #temp = $files_to_sum_of_lines{$actual_file};
# $temp[$line_number] += $hodnota;
# $files_to_sum_of_lines{$actual_file} = #temp;
# $line_number+=1;
# }
# close(DATA);
}
}
I got this output:
actual directory: /home/n/Plocha/counting_files/1/
/home/n/Plocha/counting_files/1/a.txt
/home/n/Plocha/counting_files/1/b.txt
actual directory: /home/n/Plocha/counting_files/2/
/home/n/Plocha/counting_files/2/a.txt
/home/n/Plocha/counting_files/2/b.txt
However, if I uncomment "while (<DATA>){ }", I loose a.txt and b.txt, so the output looks like this:
actual directory: /home/n/Plocha/counting_files/1/
/home/n/Plocha/counting_files/1/a.txt
/home/n/Plocha/counting_files/1/b.txt
actual directory: /home/n/Plocha/counting_files/2/
/home/n/Plocha/counting_files/2/
/home/n/Plocha/counting_files/2/
How can this while (<DATA>) prevent my foreach from being executed?
Any help will be appreciated. Thanks a lot.
In addition to not using DATA, try using lexical loop variables, and lexical filehandles. Also, Perl's built-in $. keeps track of line numbers for you.
for my $actual_directory (#directories) {
print "\nactual directory: ".$actual_directory."\n";
foreach my $file_name (#files) {
my $actual_file = $actual_directory.$file_name;
print $actual_file."\n";
open my $INPUT, '<', $actual_file
or die "Nelze otevřít zdrojový soubor: $!\n";
while (my $line = <$INPUT>) {
my #znaky = split(' ', $line);
my $poradi = $znaky[0]; #poradi nukleotidu
my $hodnota = $znaky[1]; #hodnota
#temp = $files_to_sum_of_lines{$actual_file};
$temp[ $. ] += $hodnota;
$files_to_sum_of_lines{$actual_file} = #temp;
}
close $INPUT;
}
}
On the other hand, I can't quite tell if there is a logic error in there. Something like the following might be useful:
#!/usr/bin/perl
use warnings; use strict;
use Carp;
use File::Find;
use File::Spec::Functions qw( catfile canonpath );
my %counts;
find(\&count_lines_in_files, #ARGV);
for my $dir (sort keys %counts) {
print "$dir\n";
my $dircounts = $counts{ $dir };
for my $file (sort keys %{ $dircounts }) {
printf "\t%s: %d\n", $file, $dircounts->{ $file };
}
}
sub count_lines_in_files {
my $file = canonpath $_;
my $dir = canonpath $File::Find::dir;
my $path = canonpath $File::Find::name;
return unless -f $path;
$counts{ $dir }{ $file } = count_lines_in_file($path);
}
sub count_lines_in_file {
my ($path) = #_;
my $ret = open my $fh, '<', $path;
unless ($ret) {
carp "Cannot open '$path': $!";
return;
}
1 while <$fh>;
my $n_lines = $.;
close $fh
or croak "Cannot close '$path': $!";
return $n_lines;
}
Perl uses __DATA__ to make a pseudo-data file at the end of the package. You can access that using the filehandle DATA, e.g. <DATA>. Is it possible that your filehandle is conflicting? Try changing the filehandle to something else and see if it works better.

DBD::CSV: How can I generate different behavior with the two f_ext-options ".csv" and ".csv/r"?

This is from the DBD::File-documentation:
f_ext
This attribute is used for setting the file extension where (CSV) files are opened. There are several possibilities.
DBI:CSV:f_dir=data;f_ext=.csv
In this case, DBD::File will open only table.csv if both table.csv and table exist in the datadir. The table will still be named table. If your datadir has files with extensions, and you do not pass this attribute, your table is named table.csv, which is probably not what you wanted. The extension is always case-insensitive. The table names are not.
DBI:CSV:f_dir=data;f_ext=.csv/r
In this case the extension is required, and all filenames that do not match are ignored.
It was not possible for me to generate different behavior with the two options ".csv/r" and ".csv". Could someone show me an example, where I can see the difference between ".csv/r" and ".csv"?
I can't seem to get it to do anything different either. The relevant section of code is
sub file2table
{
my ($data, $dir, $file, $file_is_tab, $quoted) = #_;
$file eq "." || $file eq ".." and return;
my ($ext, $req) = ("", 0);
if ($data->{f_ext}) {
($ext, my $opt) = split m/\//, $data->{f_ext};
if ($ext && $opt) {
$opt =~ m/r/i and $req = 1;
}
}
(my $tbl = $file) =~ s/$ext$//i;
$file_is_tab and $file = "$tbl$ext";
# Fully Qualified File Name
my $fqfn;
unless ($quoted) { # table names are case insensitive in SQL
opendir my $dh, $dir or croak "Can't open '$dir': $!";
my #f = grep { lc $_ eq lc $file } readdir $dh;
#f == 1 and $file = $f[0];
closedir $dh or croak "Can't close '$dir': $!";
}
$fqfn = File::Spec->catfile ($dir, $file);
$file = $fqfn;
if ($ext) {
if ($req) {
# File extension required
$file =~ s/$ext$//i or return;
}
else {
# File extension optional, skip if file with extension exists
grep m/$ext$/i, glob "$fqfn.*" and return;
$file =~ s/$ext$//i;
}
}
$data->{f_map}{$tbl} = $fqfn;
return $tbl;
} # file2table
Does this demonstrate the difference?:
sandbox % echo "a,b,c" > foo
sandbox % echo "a,b,c" > foo.csv
sandbox % echo "a,b,c" > bar
sandbox % echo "a,b,c" > baz.csv
sandbox % perl -MDBI -wle'print for DBI->connect("dbi:CSV:f_ext=.csv")->tables'
"merijn".baz
"merijn".bar
"merijn".foo
sandbox % perl -MDBI -wle'print for DBI->connect("dbi:CSV:f_ext=.csv/r")->tables'
"merijn".baz
"merijn".foo
sandbox %
f_ext=.csv only makes the .csv a preference, but nor a requirement: in the first case, the file "bar" with no .csv extension is still used, but "foo.csv" is chosen over "foo". With f_ext=.csv/r", "bar" is ignored, as it has no ".csv" extension.
Now in version 0.39 of DBD::File this part looks like this:
sub file2table
{
my ($self, $meta, $file, $file_is_table, $respect_case) = #_;
$file eq "." || $file eq ".." and return; # XXX would break a possible DBD::Dir
my ($ext, $req) = ("", 0);
if ($meta->{f_ext}) {
($ext, my $opt) = split m/\//, $meta->{f_ext};
if ($ext && $opt) {
$opt =~ m/r/i and $req = 1;
}
}
# (my $tbl = $file) =~ s/$ext$//i;
my ($tbl, $dir, $user_spec_file);
if ($file_is_table and defined $meta->{f_file}) {
$tbl = $file;
($file, $dir, undef) = File::Basename::fileparse ($meta->{f_file});
$user_spec_file = 1;
}
else {
($tbl, $dir, undef) = File::Basename::fileparse ($file, $ext);
$user_spec_file = 0;
}
-d File::Spec->catdir ($meta->{f_dir}, $dir) or
croak (File::Spec->catdir ($meta->{f_dir}, $dir) . ": $!");
!$respect_case and $meta->{sql_identifier_case} == 1 and # XXX SQL_IC_UPPER
$tbl = uc $tbl;
!$respect_case and $meta->{sql_identifier_case} == 2 and # XXX SQL_IC_LOWER
$tbl = lc $tbl;
my $searchdir = File::Spec->file_name_is_absolute ($dir)
? $dir
: Cwd::abs_path (File::Spec->catdir ($meta->{f_dir}, $dir));
$searchdir eq $meta->{f_dir} and
$dir = "";
unless ($user_spec_file) {
$file_is_table and $file = "$tbl$ext";
# Fully Qualified File Name
my $cmpsub;
if ($respect_case) {
$cmpsub = sub {
my ($fn, undef, $sfx) = File::Basename::fileparse ($_, qr/\.[^.]*/);
$fn eq $tbl and
return (lc $sfx eq lc $ext or !$req && !$sfx);
return 0;
}
}
else {
$cmpsub = sub {
my ($fn, undef, $sfx) = File::Basename::fileparse ($_, qr/\.[^.]*/);
lc $fn eq lc $tbl and
return (lc $sfx eq lc $ext or !$req && !$sfx);
return 0;
}
}
opendir my $dh, $searchdir or croak "Can't open '$searchdir': $!";
my #f = sort { length $b <=> length $a } grep { &$cmpsub ($_) } readdir $dh;
#f > 0 && #f <= 2 and $file = $f[0];
!$respect_case && $meta->{sql_identifier_case} == 4 and # XXX SQL_IC_MIXED
($tbl = $file) =~ s/$ext$//i;
closedir $dh or croak "Can't close '$searchdir': $!";
#(my $tdir = $dir) =~ s{^\./}{}; # XXX We do not want all tables to start with ./
#$tdir and $tbl = File::Spec->catfile ($tdir, $tbl);
$dir and $tbl = File::Spec->catfile ($dir, $tbl);
my $tmpfn = $file;
if ($ext) {
if ($req) {
# File extension required
$tmpfn =~ s/$ext$//i or return;
}
# else {
# # File extension optional, skip if file with extension exists
# grep m/$ext$/i, glob "$fqfn.*" and return;
# $tmpfn =~ s/$ext$//i;
# }
}
}
my $fqfn = File::Spec->catfile ($searchdir, $file);
my $fqbn = File::Spec->catfile ($searchdir, $tbl);
$meta->{f_fqfn} = $fqfn;
$meta->{f_fqbn} = $fqbn;
!defined $meta->{f_lockfile} && $meta->{f_lockfile} and
$meta->{f_fqln} = $meta->{f_fqbn} . $meta->{f_lockfile};
$meta->{table_name} = $tbl;
return $tbl;
} # file2table
As far as I can see, the two f_ext-options are working as expected.