Unable to get absolute path of a file from $File::Find::name - perl - perl

I am unable to get the absolute path of a file from $File::Find::name. It is showing undef vale as a output. Not able to figure it out why :( can any one please help me out in this
Error displayed is : Use of uninitialized value $file_name in concatenation
My Code :
use strict;
use warnings;
use File::Find;
use File::Path qw(make_path);
use File::Copy;
use Cwd;
use Data::Printer;
my $rootPATH = $ARGV[0];
my $id = $ARGV[1];
my #Arraypath;
my $file_name;
our $anr_name;
opendir( my $DIR, $rootPATH );
while ( my $entry = readdir $DIR ) {
next unless -d $rootPATH . '/' . $entry;
next if $entry eq '.' or $entry eq '..';
#print "Found directory $entry\n";
push( #Arraypath, ( split( "\n", $entry ) ) );
}
closedir $DIR;
my $count = 0;
foreach my $line (#Arraypath) {
my $fulllogpath = $rootPATH . "\\" . $line;
#print "$fulllogpath\n";
$count++;
start($fulllogpath);
}
sub start {
my $fulllogpath = shift;
our #content;
#print "$fulllogpath\n\n";
find( \&wanted, $fulllogpath );
sub wanted {
push #content, $_;
return;
}
foreach my $file (#content) {
# print "$file\n\n";
if ( $file =~ /traces[_d]*/ ) {
print "$file\n\n";
$file_name = $File::Find::name;
p $file_name;
print "$file_name\n";
}
}
}

Your program is very poorly layed out. It will be much simpler to write and debug code if you indent it properly and use carefully-chosen identifiers: a name like start for a subroutine is useless.
You also have unnecessary subroutine declarations which break up the program flow and make it awkward to follow.
Why do you have a couple of package variables (declared with our)? There is generally no need for them, and it is best to use lexical variables throughout, declared at an appropriate place so that all code has access to them if it needs it.
It is also preferable to use File::Spec to work with file paths, rather than manipulate them using string operators, with which it is easy to make a mistake.
The best way to manage the results of find is to work with absolute paths all the way through. It looks like you want to do more than just print the results returned by find since you load modules like Cwd and File::Copy, but without knowing what that further purpose is I cannot help you to write it.
This code removes all the subroutines and makes everything much more concise.
use strict;
use warnings;
use autodie;
use File::Find 'find';
use File::Spec;
use Data::Printer;
my ($root_path, $id) = #ARGV;
opendir my ($dh), $root_path;
my #dir_list =
grep -d,
map File::Spec->catfile($root_path, $_),
grep { not /\A\.\.?\z/ } readdir $dh;
closedir $dh;
my $count;
for my $dir (#dir_list) {
++$count;
find(sub {
return unless /traces[_d]*/;
my $file = $_;
print "$file\n\n";
my $file_name = $File::Find::name;
p $file_name;
print "$file_name\n";
}, $dir);
}

As has already been stated, $File::Find::name is valid only within the wanted function. Not outside of it.
However, I would recommend making the shift to using Path::Class and Path::Class::Rule for some simpler processing of your files in a cross platform compatible way:
use strict;
use warnings;
use Data::Printer;
use Path::Class;
use Path::Class::Rule;
my ( $root_path, $id ) = #ARGV;
my $dir = dir($root_path);
my $next = Path::Class::Rule->new->file->name(qr{traces[_d]*})->iter(
grep { $_->is_dir() } $dir->children
);
while ( my $file = $next->() ) {
# Accomplishes the same as your script. I suspect these prints statements are mostly for debugging though.
print $file->basename(), "\n\n";
p "$file";
print "$file\n";
}

Related

Tie file not working for loops

I have a script which pulls all the pm files in my directory and look for certain pattern and change them to desired value, i tried Tie::File but it's not looking to content of the file
use File::Find;
use Data::Dumper qw(Dumper);
use Tie::File;
my #content;
find( \&wanted, '/home/idiotonperl/project/');
sub wanted {
push #content, $File::Find::name;
return;
}
my #content1 = grep{$_ =~ /.*.pm/} #content;
#content = #content1;
for my $absolute_path (#content) {
my #array='';
print $absolute_path;
tie #array, 'Tie::File', $absolute_path or die qq{Not working};
print Dumper #array;
foreach my $line(#array) {
$line=~s/PERL/perl/g;
}
untie #array;
}
the output is
Not working at tiereplacer.pl line 22.
/home/idiotonperl/main/content.pm
this is not working as intended(looking into the content of all pm file), if i try to do the same operation for some test file under my home for single file, the content is getting replaced
#content = ‘home/idiotonperl/option.pm’
it’s working as intended
I would not recommend to use tie for that. This simple code below should do as asked
use warnings;
use strict;
use File::Copy qw(move);
use File::Glob ':bsd_glob';
my $dir = '/home/...';
my #pm_files = grep { -f } glob "$dir/*.pm";
foreach my $file (#pm_files)
{
my $outfile = 'new_' . $file; # but better use File::Temp
open my $fh, '<', $file or die "Can't open $file: $!";
open my $fh_out, '>', $outfile or die "Can't open $outfile: $!";
while (my $line = <$fh>)
{
$line =~ s/PERL/perl/g;
print $fh_out $line; # write out the line, changed or not
}
close $fh;
close $fh_out;
# Uncomment after testing, to actually overwrite the original file
#move $outfile, $file or die "Can't move $outfile to $file: $!";
}
The glob from File::Glob allows you to specify filenames similarly as in the shell. See docs for accepted metacharacters. The :bsd_glob is better for treatment of spaces in filenames. †
If you need to process files recursively then you indeed want a module. See File::Find::Rule
The rest of the code does what we must do when changing file content: copy the file. The loop reads each line, changes the ones that match, and writes each line to another file. If the match fails then s/ makes no changes to $line, so we just copy those that are unchanged.
In the end we move that file to overwrite the original using File::Copy.
The new file is temporary and I suggest to create it using File::Temp.
† The glob pattern "$dir/..." allows for an injection bug for directories with particular names. While this is very unusual it is safer to use the escape sequence
my #pm_files = grep { -f } glob "\Q$dir\E/*.pm";
In this case File::Glob isn't needed since \Q escapes spaces as well.
Solution using my favorite module: Path::Tiny. Unfortunately, it isn't a core module.
use strict;
use warnings;
use Path::Tiny;
my $iter = path('/some/path')->iterator({recurse => 1});
while( my $p = $iter->() ) {
next unless $p->is_file && $p =~ /\.pm\z/i;
$p->edit_lines(sub {
s/PERL/perl/;
#add more line-editing
});
#also check the path(...)->edit(...) as an alternative
}
Working fine for me:
#!/usr/bin/env perl
use common::sense;
use File::Find;
use Tie::File;
my #content;
find(\&wanted, '/home/mishkin/test/t/');
sub wanted {
push #content, $File::Find::name;
return;
}
#content = grep{$_ =~ /.*\.pm$/} #content;
for my $absolute_path (#content) {
my #array='';
say $absolute_path;
tie #array, 'Tie::File', $absolute_path or die "Not working: $!";
for my $line (#array) {
$line =~ s/PERL/perl/g;
}
untie #array;
}

Perl - empty rows while writing CSV from Excel

I want to convert excel-files to csv-files with Perl. For convenience I like to use the module File::Slurp for read/write operations. I need it in a subfunction.
While printing out to the screen, the program generates the desired output, the generated csv-files unfortunately just contain one row with semicolons, field are empty.
Here is the code:
#!/usr/bin/perl
use File::Copy;
use v5.14;
use Cwd;
use File::Slurp;
use Spreadsheet::ParseExcel;
sub xls2csv {
my $currentPath = getcwd();
my #files = <$currentPath/stage0/*.xls>;
for my $sourcename (#files) {
print "Now working on $sourcename\n";
my $outFile = $sourcename;
$outFile =~ s/xls/csv/g;
print "Output CSV-File: ".$outFile."\n";
my $source_excel = new Spreadsheet::ParseExcel;
my $source_book = $source_excel->Parse($sourcename)
or die "Could not open source Excel file $sourcename: $!";
foreach my $source_sheet_number ( 0 .. $source_book->{SheetCount} - 1 )
{
my $source_sheet = $source_book->{Worksheet}[$source_sheet_number];
next unless defined $source_sheet->{MaxRow};
next unless $source_sheet->{MinRow} <= $source_sheet->{MaxRow};
next unless defined $source_sheet->{MaxCol};
next unless $source_sheet->{MinCol} <= $source_sheet->{MaxCol};
foreach my $row_index (
$source_sheet->{MinRow} .. $source_sheet->{MaxRow} )
{
foreach my $col_index (
$source_sheet->{MinCol} .. $source_sheet->{MaxCol} )
{
my $source_cell =
$source_sheet->{Cells}[$row_index][$col_index];
if ($source_cell) {
print $source_cell->Value, ";"; # correct output!
write_file( $outFile, { binmode => ':utf8' }, $source_cell->Value, ";" ); # only one row of semicolons with empty fields!
}
}
print "\n";
}
}
}
}
xls2csv();
I know it has something to do with the parameter passing in the write_file function, but couldn't manage to fix it.
Has anybody an idea?
Thank you very much in advance.
write_file will overwrite the file unless the append => 1 option is given. So this:
write_file( $outFile, { binmode => ':utf8' }, $source_cell->Value, ";" );
Will write a new file for each new cell value. It does however not match your description of "only one row of semi-colons of empty fields", as it should only be one semi-colon, and one value.
I am doubtful towards this sentiment from you: "For convenience I like to use the module File::Slurp". While the print statement works as it should, using File::Slurp does not. So how is that convenient?
What you should do, if you still want to use write_file is to gather all the lines to print, and then print them all at once at the end of the loop. E.g.:
$line .= $source_cell->Value . ";"; # use concatenation to build the line
...
push #out, "$line\n"; # store in array
...
write_file(...., \#out); # print the array
Another simple option would be to use join, or to use the Text::CSV module.
Well, in this particular case, File::Slurp was indeed complicating this for me. I just wanted to avoid to repeat myself, which I did in the following clumsy working solution:
#!/usr/bin/perl
use warnings;
use strict;
use File::Copy;
use v5.14;
use Cwd;
use File::Basename;
use File::Slurp;
use Tie::File;
use Spreadsheet::ParseExcel;
use open qw/:std :utf8/;
# ... other functions
sub xls2csv {
my $currentPath = getcwd();
my #files = <$currentPath/stage0/*.xls>;
my $fh;
for my $sourcename (#files) {
say "Now working on $sourcename";
my $outFile = $sourcename;
$outFile =~ s/xls/csv/gi;
if ( -e $outFile ) {
unlink($outFile) or die "Error: $!";
print "Old $outFile deleted.";
}
my $source_excel = new Spreadsheet::ParseExcel;
my $source_book = $source_excel->Parse($sourcename)
or die "Could not open source Excel file $sourcename: $!";
foreach my $source_sheet_number ( 0 .. $source_book->{SheetCount} - 1 )
{
my $source_sheet = $source_book->{Worksheet}[$source_sheet_number];
next unless defined $source_sheet->{MaxRow};
next unless $source_sheet->{MinRow} <= $source_sheet->{MaxRow};
next unless defined $source_sheet->{MaxCol};
next unless $source_sheet->{MinCol} <= $source_sheet->{MaxCol};
foreach my $row_index (
$source_sheet->{MinRow} .. $source_sheet->{MaxRow} )
{
foreach my $col_index (
$source_sheet->{MinCol} .. $source_sheet->{MaxCol} )
{
my $source_cell =
$source_sheet->{Cells}[$row_index][$col_index];
if ($source_cell) {
print $source_cell->Value, ";";
open( $fh, '>>', $outFile ) or die "Error: $!";
print $fh $source_cell->Value, ";";
close $fh;
}
}
print "\n";
open( $fh, '>>', $outFile ) or die "Error: $!";
print $fh "\n";
close $fh;
}
}
}
}
xls2csv();
I'm actually NOT happy with it, since I'm opening and closing the files so often (I have many files with many lines). That's not very clever in terms of performance.
Currently I still don't know how to use the split or Text:CSV in this case, in order to put everything into an array and to open, write and close each file only once.
Thank you for your answer TLP.

Perl search for specific subdirectory then process

So for the program I am writing, what I would like for it to do is search through all of the subdirectories within a directory. If the subdirectory name contains a word, let's say "foo", then the program will open this subdirectory and perform a function on the files within the subdirectory. Can anybody give me some help on how to go about this? it also needs to be recursive. Thanks in advance
This can be done using the File::Find module, but I believe Path::Class is superior even though it isn't a core module and will likely need installing.
This program finds the files wanted and calls process to process them. At present the process subroutine simply prints the name of the file for testing.
use strict;
use warnings;
use Path::Class;
my $dir = dir '/path/to/root/directory';
$dir->recurse(callback => sub {
my $node = shift;
return if $node->is_dir;
my $parent = $node->parent;
if ($parent->basename =~ /foo/) {
process($node);
}
});
sub process {
my $file = shift;
print $file, "\n";
}
Update
If you prefer, this program performs the same task using File::Find.
use strict;
use warnings;
use File::Find;
use File::Basename qw/ basename /;
my $dir = '/path/to/root/directory';
find(sub {
return unless -f;
if (basename($File::Find::dir) =~ /foo/) {
process($File::Find::name);
}
}, $dir);
sub process {
my $file = shift;
print $file, "\n";
}
Update
As requested, here is a further solution using Path::Class::Rule for comparison. As daxim suggested the code is a little shorter.
use strict;
use warnings;
use Path::Class::Rule;
my $rule = Path::Class::Rule->new;
$rule->file->and(sub { $_->parent->basename =~ /foo/ });
my $next = $rule->iter('/path/to/root/directory');
while ( my $file = $next->() ) {
process($file);
}
sub process {
my $file = shift;
print $file, "\n";
}

change the directory and grab the xml file to parse certain data in perl

I am trying to parse specific XML file which is located in sub directories of one directory. For some reason i am getting error saying file does not exists. if the file does not exist it should move on to next sub directory.
HERE IS MY CODE
use strict;
use warnings;
use Data::Dumper;
use XML::Simple;
my #xmlsearch = map { chomp; $_ } `ls`;
foreach my $directory (#xmlsearch) {
print "$directory \n";
chdir($directory) or die "Couldn't change to [$directory]: $!";
my #findResults = `find -name education.xml`;
foreach my $educationresults (#findResults){
print $educationresults;
my $parser = new XML::Simple;
my $data = $parser->XMLin($educationresults);
print Dumper($data);
chdir('..');
}
}
ERROR
music/gitar/education.xml
File does not exist: ./music/gitar/education.xml
Using chdir the way you did makes the code IMO less readable. You can use File::Find for that:
use autodie;
use File::Find;
use XML::Simple;
use Data::Dumper;
sub findxml {
my #found;
opendir(DIR, '.');
my #where = grep { -d && m#^[^.]+$# } readdir(DIR);
closedir(DIR);
File::Find::find({wanted => sub {
push #found, $File::Find::name if m#^education\.xml$#s && -f _;
} }, #where);
return #found;
}
foreach my $xml (findxml()){
say $xml;
print Dumper XMLin($xml);
}
Whenever you find yourself relying on backticks to execute shell commands, you should consider whether there is a proper perl way to do it. In this case, there is.
ls can be replaced with <*>, which is a simple glob. The line:
my #array = map { chomp; $_ } `ls`;
Is just a roundabout way of saying
chomp(my #array = `ls`); # chomp takes list arguments as well
But of course the proper way is
my #array = <*>; # no chomp required
Now, the simple solution to all of this is simply to do
for my $xml (<*/education.xml>) { # find the xml files in dir 1 level up
Which will cover one level of directories, with no recursion. For full recursion, use File::Find:
use strict;
use warnings;
use File::Find;
my #list;
find( sub { push #list, $File::Find::name if /^education\.xml$/i; }, ".");
for (#list) {
# do stuff
# #list contains full path names of education.xml files found in subdirs
# e.g. ./music/gitar/education.xml
}
You should note that changing directories is not required, and in my experience, not worth the trouble. Instead of doing:
chdir($somedir);
my $data = XMLin($somefile);
chdir("..");
Simply do:
my $data = XMLin("$somedir/$somefile");

How do I use File::Find::Rule in taint mode?

I am trying to get a list of subdirectories in a given directory using something like the following:
#!/usr/bin/perl -wT
use strict;
use warnings;
use File::Find::Rule;
use Data::Dumper;
my #subdirs = File::Find::Rule->maxdepth(1)->directory->relative->in('mydir');
print Dumper(#subdirs);
However, running this gives the result:
Insecure dependency in chdir while running with -T switch
I understand that File::Find has options for dealing with taint mode, but I can’t seem to find an equivalent in File::Find::Rule. Is it possible to do the above? Should I use an alternative method for listing subdirectories? Am I completely misunderstanding something obvious that I really should understand about taint mode?
(Edit!) Okay, logic would suggest that throwing in the following would work:
->extras( {untaint => 1, untaint_pattern => $untaint_pattern, untaint_skip => 1} )
This lets you use the taint-mode features of File::Find by passing arguments directly to that module's find() function. Incidentally, File::Find mentions that one should set $untaint_pattern by using the qr// operator. For example, the default value is
$untaint_pattern = qr|^([-+#\w./]+)$|
However, this does not work! In fact, your issue is a known bug in File::Find::Rule. (For example, here are the CPAN and Debian bug reports.) If you would like a bugfix, then both of those bug reports have patches.
If you are in a restricted environment, one thing you can do is essentially implement the patch yourself in your code. For example, if you want to keep everything in one file, you can add the large code block below after use File::Find::Rule. Note that this is a very quick fix and may be suboptimal. If it doesn't work for you (e.g., because you have spaces in your filenames), change the pattern qr|^([-+#\w./]+)$| that is used.
Note finally that if you want your code organization to be a bit better, you may want to dump this into a separate package, maybe called MyFileFindRuleFix or something, that you always use after File::Find::Rule itself.
package File::Find::Rule;
no warnings qw(redefine);
sub in {
my $self = _force_object shift;
my #found;
my $fragment = $self->_compile( $self->{subs} );
my #subs = #{ $self->{subs} };
warn "relative mode handed multiple paths - that's a bit silly\n"
if $self->{relative} && #_ > 1;
my $topdir;
my $code = 'sub {
(my $path = $File::Find::name) =~ s#^(?:\./+)+##;
$path = "." if ($path eq ""); # See Debian bug #329377
my #args = ($_, $File::Find::dir, $path);
my $maxdepth = $self->{maxdepth};
my $mindepth = $self->{mindepth};
my $relative = $self->{relative};
# figure out the relative path and depth
my $relpath = $File::Find::name;
$relpath =~ s{^\Q$topdir\E/?}{};
my $depth = scalar File::Spec->splitdir($relpath);
#print "name: \'$File::Find::name\' ";
#print "relpath: \'$relpath\' depth: $depth relative: $relative\n";
defined $maxdepth && $depth >= $maxdepth
and $File::Find::prune = 1;
defined $mindepth && $depth < $mindepth
and return;
#print "Testing \'$_\'\n";
my $discarded;
return unless ' . $fragment . ';
return if $discarded;
if ($relative) {
push #found, $relpath if $relpath ne "";
}
else {
push #found, $path;
}
}';
#use Data::Dumper;
#print Dumper \#subs;
#warn "Compiled sub: '$code'\n";
my $sub = eval "$code" or die "compile error '$code' $#";
my $cwd = getcwd;
# Untaint it
if ( $cwd =~ qr|^([-+#\w./]+)$| ) {
$cwd = $1;
} else {
die "Couldn't untaint \$cwd: [$cwd]";
}
for my $path (#_) {
# $topdir is used for relative and maxdepth
$topdir = $path;
# slice off the trailing slash if there is one (the
# maxdepth/mindepth code is fussy)
$topdir =~ s{/?$}{}
unless $topdir eq '/';
$self->_call_find( { %{ $self->{extras} }, wanted => $sub }, $path );
}
chdir $cwd;
return #found;
}
use warnings;
package main;