Perl: check existence of file with wildcard - perl

I am trying to use -e to check existence of a file, $name is any input specified by user, "_file_" is something fixed, and * could be anything possible. Currently it is not able to detect the file.
if (-e $name."_file_*.txt)
{
do something;
}

Why not use glob() for that?
if (my #files = glob("\Q$name\E_file_*.txt")) {
# do something
}

This is one of the way I could find the existing files with the particular name:
use strict;
use warnings;
use Cwd;
my $name = "Test";
my $curdir = getcwd();
my #txtfiles = glob "$curdir/*.txt";
foreach my $txtfile (#txtfiles)
{
if($txtfile=~m/$name\_file\_(.*?)\.txt/)
{
print "Ok...\n";
}
else { next; }
}

I would suggest you use File::Find module.
use strict;
use warnings;
use File::Find;
# this takes the function a reference and will be executed for each file in the directory.
find({ wanted => \&process, follow => 1 }, '/dir/to/search' );
sub process {
my $filename = $_;
my $filepath = $File::Find::name;
if( $filename=~m/$name\_file\_(.*?)\.txt/ ){
# file exists and do further processing
} else {
# file does not exists
}
}

Related

Perl: How to search a file named ".cfg" in a directory and all it's parent directories

How to search a file named ".cfg" in a directory and all it's parent directories
I am fetching the name as below code but i would like to know if there is any better way to do it.
Also i would like to know the recursive way to do the same.
sub get_p4_config_updir($ $)
{
my ($client_root, $cfg_file) = #_;
# Dir from where search starts - it's a client root here
my $cur_dir = $client_root;
printf("**** cur_dir: $cur_dir ****\n");
my $slashes = $cur_dir =~ y/\///;
printf("**** no of back slashes: $slashes ****\n");
while($slashes > 2) {
my ($parent_dir, $b) = $cur_dir =~ /(.*)\/(.*)/;
printf("**** parent_dir: $parent_dir, b: $b ****\n");
$slashes--;
if (-e "$cur_dir/$cfg_file") {
printf("**** File exists in dir: $cur_dir ****\n");
return $cur_dir;
}
$cur_dir = $parent_dir;
}
return "";
}
my $cfg = '.cfg';
my $dir = '/user/home/wkspace/abc/def/MAIN';
my $path = get_p4_config_updir($dir, $cfg);
if ($path ne "") {
printf("**** File exists in dir: $path ****\n");
} else {
printf("**** File not found ****\n");
}
An example using Path::Tiny:
#!/usr/bin/env perl
use warnings;
use strict;
use feature qw/say/;
use Path::Tiny;
# Returns a Path::Tiny object to the directory containing the file
# being looked for, or undef if not found.
sub get_p4_config_updir {
my ($client_root, $cfg_file) = #_;
my $dir = path($client_root)->realpath;
while (1) {
# say "Looking at $dir";
if ($dir->child($cfg_file)->exists) {
return $dir;
} elsif ($dir->is_rootdir) {
return undef;
} else {
$dir = $dir->parent;
}
}
}
my $cfg = '.cfg';
my $dir = '/user/home/wkspace/abc/def/MAIN';
say get_p4_config_updir($dir, $cfg) // "File not found";
Or a version that's similar to #rajashekar's idea of walking the directory tree by using chdir to get each directory's parent. This one uses File::chdir, which lets you localize changes to the current working directory (and restores the original when the function/scope exits), as well as providing a handy array view of the current directory and its parents that can be manipulated:
use File::chdir;
...
sub get_p4_config_updir {
my ($client_root, $cfg_file) = #_;
local $CWD = $client_root; # Magic happens here
while (1) {
# say "Looking at $CWD";
if (-e $cfg_file) {
return $CWD;
} elsif ($CWD eq "/") {
return undef;
} else {
pop #CWD; # CDs to the next parent directory
}
}
}
You can use core libraries to do this in a platform independent, readable way without having to use cwd and possibly causing action at a distance effects in the rest of your code:
#!/usr/bin/env perl
use strict;
use warnings;
use File::Spec::Functions qw(catfile rel2abs updir);
sub get_p4_config_updir
{
my ($dir, $file) = #_;
$dir = rel2abs($dir);
do {
my $path = catfile $dir => $file;
return $dir if -e $path;
return if $dir eq (my $new_dir = rel2abs(catfile $dir, updir));
$dir = $new_dir;
} while ('NOT_DONE');
return;
}
sub main {
my ($cfg, $dir) = #_;
my $path = get_p4_config_updir($dir, $cfg);
if (defined $path) {
printf("Found '%s' in '%s'\n", $cfg, $path);
}
else {
printf(
"Did not find '%s' in '%s' or any of its parent directories\n",
$cfg,
$dir,
);
}
}
main(#ARGV);
Output:
C:\Users\u\AppData\Local\Temp> perl p.pl linux.bin .
Found 'linux.bin' in 'C:\'
Why deal with pathnames, when you can walk the directory structure up with .. ?
if the file exists in the current directory return it.
else go up .. and the repeat the process.
use Cwd qw(cwd);
sub search_up {
my ($dir, $file) = #_;
chdir($dir);
while (1) {
if (-e $file) {
print "$file exists in $dir\n";
return $dir;
} elsif ($dir eq "/") {
return;
} else {
chdir("..");
$dir = cwd;
}
};
}
Please see if following code snippet complies with your requirements.
The script is looking for configuration file toward root of filesystem, found filenames are stored in an array #found.
use strict;
use warnings;
use feature 'say';
my $dir = '/user/home/wkspace/abc/def/MAIN';
my $ext = 'cfg';
my($cwd,#found);
for( split('/',$dir) ) {
$cwd .= "$_/";
push #found, glob( $cwd . "*\.$ext" );
}
if( #found ) {
say for #found;
} else {
say 'No file(s) was found';
}
exit 0;
Following code snippet is looking for configuration files away from root filesystem starting from $dir.
If any files found then they will be stored under array reference $found and then printed out on the terminal.
If no files get found then you will be informed with a message.
use strict;
use warnings;
use feature 'say';
my $dir = '/user/home/wkspace/abc/def/MAIN';
my $ext = 'cfg';
my $found = find($dir,$ext);
if( $found ) {
say for #$found;
} else {
say 'No file(s) was found';
}
exit 0;
sub find {
my $dir = shift;
my $ext = shift;
my $ret;
for( glob("$dir/*") ) {
push #$ret, $_ if /\.$ext\z/;
if( -d ) {
my $found = find($_,$ext);
push #$ret, #$found if $found;
}
}
return $ret;
}

Perl search for specific subdirectory then process

So for the program I am writing, what I would like for it to do is search through all of the subdirectories within a directory. If the subdirectory name contains a word, let's say "foo", then the program will open this subdirectory and perform a function on the files within the subdirectory. Can anybody give me some help on how to go about this? it also needs to be recursive. Thanks in advance
This can be done using the File::Find module, but I believe Path::Class is superior even though it isn't a core module and will likely need installing.
This program finds the files wanted and calls process to process them. At present the process subroutine simply prints the name of the file for testing.
use strict;
use warnings;
use Path::Class;
my $dir = dir '/path/to/root/directory';
$dir->recurse(callback => sub {
my $node = shift;
return if $node->is_dir;
my $parent = $node->parent;
if ($parent->basename =~ /foo/) {
process($node);
}
});
sub process {
my $file = shift;
print $file, "\n";
}
Update
If you prefer, this program performs the same task using File::Find.
use strict;
use warnings;
use File::Find;
use File::Basename qw/ basename /;
my $dir = '/path/to/root/directory';
find(sub {
return unless -f;
if (basename($File::Find::dir) =~ /foo/) {
process($File::Find::name);
}
}, $dir);
sub process {
my $file = shift;
print $file, "\n";
}
Update
As requested, here is a further solution using Path::Class::Rule for comparison. As daxim suggested the code is a little shorter.
use strict;
use warnings;
use Path::Class::Rule;
my $rule = Path::Class::Rule->new;
$rule->file->and(sub { $_->parent->basename =~ /foo/ });
my $next = $rule->iter('/path/to/root/directory');
while ( my $file = $next->() ) {
process($file);
}
sub process {
my $file = shift;
print $file, "\n";
}

change the directory and grab the xml file to parse certain data in perl

I am trying to parse specific XML file which is located in sub directories of one directory. For some reason i am getting error saying file does not exists. if the file does not exist it should move on to next sub directory.
HERE IS MY CODE
use strict;
use warnings;
use Data::Dumper;
use XML::Simple;
my #xmlsearch = map { chomp; $_ } `ls`;
foreach my $directory (#xmlsearch) {
print "$directory \n";
chdir($directory) or die "Couldn't change to [$directory]: $!";
my #findResults = `find -name education.xml`;
foreach my $educationresults (#findResults){
print $educationresults;
my $parser = new XML::Simple;
my $data = $parser->XMLin($educationresults);
print Dumper($data);
chdir('..');
}
}
ERROR
music/gitar/education.xml
File does not exist: ./music/gitar/education.xml
Using chdir the way you did makes the code IMO less readable. You can use File::Find for that:
use autodie;
use File::Find;
use XML::Simple;
use Data::Dumper;
sub findxml {
my #found;
opendir(DIR, '.');
my #where = grep { -d && m#^[^.]+$# } readdir(DIR);
closedir(DIR);
File::Find::find({wanted => sub {
push #found, $File::Find::name if m#^education\.xml$#s && -f _;
} }, #where);
return #found;
}
foreach my $xml (findxml()){
say $xml;
print Dumper XMLin($xml);
}
Whenever you find yourself relying on backticks to execute shell commands, you should consider whether there is a proper perl way to do it. In this case, there is.
ls can be replaced with <*>, which is a simple glob. The line:
my #array = map { chomp; $_ } `ls`;
Is just a roundabout way of saying
chomp(my #array = `ls`); # chomp takes list arguments as well
But of course the proper way is
my #array = <*>; # no chomp required
Now, the simple solution to all of this is simply to do
for my $xml (<*/education.xml>) { # find the xml files in dir 1 level up
Which will cover one level of directories, with no recursion. For full recursion, use File::Find:
use strict;
use warnings;
use File::Find;
my #list;
find( sub { push #list, $File::Find::name if /^education\.xml$/i; }, ".");
for (#list) {
# do stuff
# #list contains full path names of education.xml files found in subdirs
# e.g. ./music/gitar/education.xml
}
You should note that changing directories is not required, and in my experience, not worth the trouble. Instead of doing:
chdir($somedir);
my $data = XMLin($somefile);
chdir("..");
Simply do:
my $data = XMLin("$somedir/$somefile");

Perl Get Parent Folder Name

What is the solution to get the name of the parent directory using File::Find. I know how to get only the filename or only the directory path but I don't know how to do this for the last containing directory.
For example, if the directory is /dir_1/dir_2/dir_3/.../dir_n/*.txt I need to get the 'dir_n' name.
use strict;
use warnings;
use File::Find;
my $dir = "some_path";
find(\&file_handle, $dir);
sub file_handle {
/\.txt$/ or return;
my $fd = $File::Find::dir;
my $fn = $File::Find::name;
# ...
}
Given the directory path, you then apply File::Basename (another core module) to the path to obtain the last portion of the directory.
use strict;
use warnings;
use File::Find;
use File::Basename;
my $dir = "some_path";
find(\&file_handle, $dir);
sub file_handle {
/\.txt$/ or return;
my $fd = $File::Find::dir;
my $fn = $File::Find::name;
my $dir = basename($fd);
# ....
}
#!/usr/local/bin/perl -w
use strict;
use File::Basename;
use Cwd 'abs_path';
my $f = "../some/path/to/this_directory/and_filename";
my $d = basename(dirname(abs_path($f)));
say $d;
returns "this_directory"
You can just split and grab the second-to-last element in the array:
my $fname = "/folder/sub-folder/filename.bin";
my #parts = split('/', $fname);
if( #parts > 1 ) {
return $parts[#parts - 2];
} else {
return '/';
}
If you are willing to install non-core modules, the Path::Class can come handy:
use Path::Class;
dir("some_dir")->recurse(callback => sub {
my $file = shift;
return if $file->is_dir;
return if $file =~ /\.txt$/i;
my $fn = $file->basename;
my $fd = $file->parent;
my $dir = $file->parent->parent;
});
It gives you handy objects instead of strings and imo nice operations on them.

How can I check the extension of a file using Perl?

To my perl script, a file is passed as an arguement. The file can be a .txt file or a .zip file containing the .txt file.
I want to write code that looks something like this
if ($file is a zip) {
unzip $file
$file =~ s/zip$/txt/;
}
One way to check the extension is to do a split on . and then match the last result in the array (returned by split).
Is there some better way?
You can use File::Basename for this.
#!/usr/bin/perl
use 5.010;
use strict;
use warnings;
use File::Basename;
my #exts = qw(.txt .zip);
while (my $file = <DATA>) {
chomp $file;
my ($name, $dir, $ext) = fileparse($file, #exts);
given ($ext) {
when ('.txt') {
say "$file is a text file";
}
when ('.zip') {
say "$file is a zip file";
}
default {
say "$file is an unknown file type";
}
}
}
__DATA__
file.txt
file.zip
file.pl
Running this gives:
$ ./files
file.txt is a text file
file.zip is a zip file
file.pl is an unknown file type
Another solution is to make use of File::Type which determines the type of binary file.
use strict;
use warnings;
use File::Type;
my $file = '/path/to/file.ext';
my $ft = File::Type->new();
my $file_type = $ft->mime_type($file);
if ( $file_type eq 'application/octet-stream' ) {
# possibly a text file
}
elsif ( $file_type eq 'application/zip' ) {
# file is a zip archive
}
This way, you do not have to deal with missing/wrong extensions.
How about checking the end of the filename?
if ($file =~ /\.zip$/i) {
and then:
use strict;
use Archive::Extract;
if ($file =~ /\.zip$/i) {
my $ae = Archive::Extract->new(archive => $file);
my $ok = $ae->extract();
my $files = $ae->files();
}
more information here.
You can check the file extension using a regex match as:
if($file =~ /\.zip$/i) {
# $file is a zip file
}
I know this question is several years old, but for anyone that comes here in the future, an easy way to break apart a file path into its constituent path, filename, basename and extension is as follows.
use File::Basename;
my $filepath = '/foo/bar.txt';
my ($basename, $parentdir, $extension) = fileparse($filepath, qr/\.[^.]*$/);
my $filename = $basename . $extension;
You can test it's results with the following.
my #test_paths = (
'/foo/bar/fish.wibble',
'/foo/bar/fish.',
'/foo/bar/fish.asdf.d',
'/foo/bar/fish.wibble.',
'/fish.wibble',
'fish.wibble',
);
foreach my $this_path (#test_paths) {
print "Current path: $this_path\n";
my ($this_basename, $parentdir, $extension) = fileparse($this_path, qr/\.[^.]*$/);
my $this_filename = $this_basename . $extension;
foreach my $var (qw/$parentdir $this_filename $this_basename $extension/) {
print "$var = '" . eval($var) . "'\n";
}
print "\n\n";
}
Hope this helps.
Why rely on file extension? Just try to unzip and use appropriate exception handling:
eval {
# try to unzip the file
};
if ($#) {
# not a zip file
}
Maybe a little bit late but it could be used as an alternative reference:
sub unzip_all {
my $director = shift;
opendir my $DIRH, "$director" or die;
my #files = readdir $DIRH;
foreach my $file (#files){
my $type = `file $director/$file`;
if ($type =~ m/gzip compressed data/){
system "gunzip $director/$file";
}
}
close $DIRH;
return;
}
Here is possible to use linux file executing it from perl by the use of backticks(``). You area able to pass the path of your folder and evaluate if exists a file that is classified by file as gzip compressed.
If you do not mind using a perl module, you can use Module::Generic::File, such as:
use Module::Generic::File qw( file );
my $f = file( '/some/where/file.zip' );
if( $f->extension eq 'zip' )
{
# do something
}
Module::Generic::File has a lot of features to handle and manipulate a file.