Find the biggest file in a directory. Perl - perl

The program lists all files in a directory, their size, type and owner. In case file is a directory, owner is the owner of the biggest file in that directory (that's the problem).
use warnings;
use strict;
use diagnostics;
use feature 'say';
use File::Find;
my $dir = shift || die "Provide a directory name";
my #file;
my #owner;
my #size;
my #type;
my $i = 0;
while( glob("$dir/*") )
{
$file[$i] = $_;
find(sub { $size[$i] += -s if -f }, $_);
if( -d $file[$i] )
{
$type[$i] = "d";
$owner[$i] = getpwuid((stat($_))[4]);
}
elsif ( -l $file[$i] )
{
$type[$i] = "l";
$owner[$i] = getpwuid((stat($_))[4]);
}
else
{
$type[$i] = "f";
$owner[$i] = getpwuid((stat($_))[4]);
}
print "$file[$i]:$owner[$i]:$type[$i]:$size[$i]\n";
$i++;
}
At this point in code
if( -d $file[$i] )
{
$type[$i] = "d";
$owner[$i] = getpwuid((stat($_))[4]);
}
i have to find the biggest file in this directory. I figured, that i should use find function, but not sure on how to do it.

Please investigate the following code piece for compliance with your task.
The code uses recursion for directories, core component is glob function.
The result of directory lookup is returned as hash reference. Fill free to utilize this reference as your heart desire.
use strict;
use warnings;
use feature 'say';
use Data::Dumper;
my $dir = shift || die "Provide directory";
my $result = dir_lookup($dir);
say Dumper($result);
exit 0;
sub dir_lookup {
my $dir = shift;
my($record,$max);
my #items = glob("$dir/*");
$max = 0;
for my $name ( #items ) {
my $data;
$data->{name} = $name;
$data->{size} = -s $name;
$data->{owner} = getpwuid((stat($name))[4]);
$data->{type} = 'link' if -l $name;
$data->{type} = 'file' if -f $name;
$data->{type} = 'dir' if -d $name;
if( $data->{size} > $max and -f $name ) {
$max = $data->{size};
$record->{file} = $data;
}
if( $data->{type} eq 'dir' ) {
my $r = dir_lookup($data->{name});
$data->{file} = $r->{file};
$data->{owner} = $r->{file}{owner};
}
push #{$record->{items}}, $data;
}
return $records;
}

Related

Perl: How to search a file named ".cfg" in a directory and all it's parent directories

How to search a file named ".cfg" in a directory and all it's parent directories
I am fetching the name as below code but i would like to know if there is any better way to do it.
Also i would like to know the recursive way to do the same.
sub get_p4_config_updir($ $)
{
my ($client_root, $cfg_file) = #_;
# Dir from where search starts - it's a client root here
my $cur_dir = $client_root;
printf("**** cur_dir: $cur_dir ****\n");
my $slashes = $cur_dir =~ y/\///;
printf("**** no of back slashes: $slashes ****\n");
while($slashes > 2) {
my ($parent_dir, $b) = $cur_dir =~ /(.*)\/(.*)/;
printf("**** parent_dir: $parent_dir, b: $b ****\n");
$slashes--;
if (-e "$cur_dir/$cfg_file") {
printf("**** File exists in dir: $cur_dir ****\n");
return $cur_dir;
}
$cur_dir = $parent_dir;
}
return "";
}
my $cfg = '.cfg';
my $dir = '/user/home/wkspace/abc/def/MAIN';
my $path = get_p4_config_updir($dir, $cfg);
if ($path ne "") {
printf("**** File exists in dir: $path ****\n");
} else {
printf("**** File not found ****\n");
}
An example using Path::Tiny:
#!/usr/bin/env perl
use warnings;
use strict;
use feature qw/say/;
use Path::Tiny;
# Returns a Path::Tiny object to the directory containing the file
# being looked for, or undef if not found.
sub get_p4_config_updir {
my ($client_root, $cfg_file) = #_;
my $dir = path($client_root)->realpath;
while (1) {
# say "Looking at $dir";
if ($dir->child($cfg_file)->exists) {
return $dir;
} elsif ($dir->is_rootdir) {
return undef;
} else {
$dir = $dir->parent;
}
}
}
my $cfg = '.cfg';
my $dir = '/user/home/wkspace/abc/def/MAIN';
say get_p4_config_updir($dir, $cfg) // "File not found";
Or a version that's similar to #rajashekar's idea of walking the directory tree by using chdir to get each directory's parent. This one uses File::chdir, which lets you localize changes to the current working directory (and restores the original when the function/scope exits), as well as providing a handy array view of the current directory and its parents that can be manipulated:
use File::chdir;
...
sub get_p4_config_updir {
my ($client_root, $cfg_file) = #_;
local $CWD = $client_root; # Magic happens here
while (1) {
# say "Looking at $CWD";
if (-e $cfg_file) {
return $CWD;
} elsif ($CWD eq "/") {
return undef;
} else {
pop #CWD; # CDs to the next parent directory
}
}
}
You can use core libraries to do this in a platform independent, readable way without having to use cwd and possibly causing action at a distance effects in the rest of your code:
#!/usr/bin/env perl
use strict;
use warnings;
use File::Spec::Functions qw(catfile rel2abs updir);
sub get_p4_config_updir
{
my ($dir, $file) = #_;
$dir = rel2abs($dir);
do {
my $path = catfile $dir => $file;
return $dir if -e $path;
return if $dir eq (my $new_dir = rel2abs(catfile $dir, updir));
$dir = $new_dir;
} while ('NOT_DONE');
return;
}
sub main {
my ($cfg, $dir) = #_;
my $path = get_p4_config_updir($dir, $cfg);
if (defined $path) {
printf("Found '%s' in '%s'\n", $cfg, $path);
}
else {
printf(
"Did not find '%s' in '%s' or any of its parent directories\n",
$cfg,
$dir,
);
}
}
main(#ARGV);
Output:
C:\Users\u\AppData\Local\Temp> perl p.pl linux.bin .
Found 'linux.bin' in 'C:\'
Why deal with pathnames, when you can walk the directory structure up with .. ?
if the file exists in the current directory return it.
else go up .. and the repeat the process.
use Cwd qw(cwd);
sub search_up {
my ($dir, $file) = #_;
chdir($dir);
while (1) {
if (-e $file) {
print "$file exists in $dir\n";
return $dir;
} elsif ($dir eq "/") {
return;
} else {
chdir("..");
$dir = cwd;
}
};
}
Please see if following code snippet complies with your requirements.
The script is looking for configuration file toward root of filesystem, found filenames are stored in an array #found.
use strict;
use warnings;
use feature 'say';
my $dir = '/user/home/wkspace/abc/def/MAIN';
my $ext = 'cfg';
my($cwd,#found);
for( split('/',$dir) ) {
$cwd .= "$_/";
push #found, glob( $cwd . "*\.$ext" );
}
if( #found ) {
say for #found;
} else {
say 'No file(s) was found';
}
exit 0;
Following code snippet is looking for configuration files away from root filesystem starting from $dir.
If any files found then they will be stored under array reference $found and then printed out on the terminal.
If no files get found then you will be informed with a message.
use strict;
use warnings;
use feature 'say';
my $dir = '/user/home/wkspace/abc/def/MAIN';
my $ext = 'cfg';
my $found = find($dir,$ext);
if( $found ) {
say for #$found;
} else {
say 'No file(s) was found';
}
exit 0;
sub find {
my $dir = shift;
my $ext = shift;
my $ret;
for( glob("$dir/*") ) {
push #$ret, $_ if /\.$ext\z/;
if( -d ) {
my $found = find($_,$ext);
push #$ret, #$found if $found;
}
}
return $ret;
}

How to find the class-file on case insensitive filesystem?

Simple test case (for the demonstration of the problem):
mkdir -p ./lib1/Class ./lib2/Class
touch ./lib1/Class/Name.pm ./lib2/Class/NAME.pm
So, have:
./lib1/Class/Name.pm
./lib2/Class/NAME.pm
Need search for the right file in the case-insensitive filesystem (OS X's HFS+).
The following works on case-sensitive filesystem,
#!/usr/bin/env perl
use 5.014;
use strict;
use warnings;
my #DIRS = qw(./lib1 ./lib2);
for my $class ( qw(Class::Name Class::NAME) ) {
my $file = findClassFile($class);
say $file;
}
sub findClassFile {
my($file) = #_;
$file =~ s|::|/|g;
$file .= ".pm";
for my $dir (#DIRS) {
return "$dir/$file" if( -e "$dir/$file" );
}
return undef;
}
and prints
./lib1/Class/Name.pm
./lib2/Class/NAME.pm
on the OS X, it prints incorrectly:
./lib1/Class/Name.pm
./lib1/Class/NAME.pm
How to find on the OSX's insensitive filesystem the correct filename?
Ps: Now only comes to my mind write and recursive routine with opendir/readdir/chdir and checking the filenames what are comes from readdir. Not to shabby... Exists some more easy way?
My current solution is:
#!/usr/bin/env perl
use 5.014;
use strict;
use warnings;
my #DIRS = qw(./lib1 ./lib2 /Users/me/tmp/lib3);
for my $class ( qw(Class::Name Class::NAME CLASS::name Class::Namex) ) {
my $file = findClassFile($class);
say $file // "Not found $class";
}
sub findClassFile {
my($classname) = #_;
my $file = ($classname =~ s|::|/|gr) . ".pm";
for my $dir (#DIRS) {
return "$dir/$file" if( FileExists("$dir/$file") );
}
return undef;
}
sub FileExists {
my($path) = #_;
my $curr = $path =~ m|^/| ? "/" : ".";
for my $part (split '/', $path) {
next unless $part;
opendir(my $dfd, $curr) || return undef;
my #files = grep {/^$part$/} readdir($dfd);
closedir($dfd);
return undef unless( #files );
$curr .= "/$part";
}
return $curr;
}
what prints:
./lib1/Class/Name.pm
./lib2/Class/NAME.pm
/Users/me/tmp/lib3/CLASS/name.pm
Not found Class::Namex
so - it's working, only don't like it.. ;)

Unable to find it out duplicate - perl

I am traversing all files to get the desired one in some directory tree recursively, as soon as i am getting that files i doing some operation on them but before doing the operation i need to check whether i have performed operation on this file or not if yes then don't do it again else continue :
But the prob is, i am unable to find the way to check the condition :(
Here is my code :
use strict;
use warnings;
use autodie;
use File::Find 'find';
use File::Spec;
use Data::Printer;
my ( $root_path, $id ) = #ARGV;
our $anr_name;
opendir my ($dh), $root_path;
my #dir_list = grep -d, map File::Spec->catfile( $root_path, $_ ), grep { not /\A\.\.?\z/ } readdir $dh;
closedir $dh;
my $count;
for my $dir (#dir_list) {
find(
sub {
return unless /traces[_d]*/;
my $file = $_;
my #all_anr;
#print "$file\n\n";
my $file_name = $File::Find::name;
open( my $fh, "<", $file ) or die "cannot open file:$!\n";
my #all_lines = <$fh>;
my $i = 0;
foreach my $check (#all_lines) {
if ( $i < 10 ) {
if ( $check =~ /Cmd line\:\s+com\.android\..*/ ) {
$anr_name = $check;
my #temp = split( ':', $anr_name );
$anr_name = $temp[1];
push( #all_anr, $anr_name );
#print "ANR :$anr_name\n";
my $chk = check_for_dublicate_anr(#all_anr);
if ( $chk eq "1" ) {
# performed some action
}
}
$i++;
} else {
close($fh);
last;
}
}
},
$dir
);
}
sub check_for_dublicate_anr {
my #anrname = #_;
my %uniqueAnr = ();
foreach my $item (#anrname) {
unless ( $uniqueAnr{$item} ) {
# if we get here, we have not seen it before
$uniqueAnr{$item} = 1;
return 1;
}
}
}
You can simplify things with Path::Class and Path::Class::Rule:
use 5.010;
use warnings;
use Path::Class;
use Path::Class::Rule;
my $root = ".";
my #dirs = grep { -d $_ } dir($root)->children();
my $iter = Path::Class::Rule->new->file->name(qr{traces[_d]*})->iter(#dirs);
my $seen;
while ( my $file = $iter->() ) {
for ( $file->slurp( chomp => 1 ) ) {
next unless /Cmd line:\s+(com\.android\.\S*)/;
do_things( $file, $1 ) unless $seen->{$1}++;
}
}
sub do_things {
my ( $file, $str ) = #_;
say "new $str in the $file";
}

How to create the next file or folder in a series of progressively numbered files?

Sorry for the bad title but this is the best I could do! :D
I have a script which creates a new project every time the specified function is called.
Each project must be stored in its own folder, with the name of the project. But, if you don't specify a name, the script will just name it "new projectX", where X is a progressive number.
With time the user could rename the folders or delete some, so every time the script runs, it checks for the smallest number available (not used by another folder) and creates the relevant folder.
Now I managed to make a program which I think works as wanted, but I would like to hear from you if it's OK or there's something wrong which I'm unable to spot, given my inexperience with the language.
while ( defined( $file = readdir $projects_dir ) )
{
# check for files whose name start with "new project"
if ( $file =~ m/^new project/i )
{
push( #files, $file );
}
}
# remove letters from filenames, only the number is left
foreach $file ( #files )
{
$file =~ s/[a-z]//ig;
}
#files = sort { $a <=> $b } #files;
# find the smallest number available
my $smallest_number = 0;
foreach $file ( #files )
{
if ( $smallest_number != $file )
{
last;
}
$smallest_number += 1;
}
print "Smallest number is $smallest_number";
Here's a basic approach for this sort of problem:
sub next_available_dir {
my $n = 1;
my $d;
$n ++ while -e ($d = "new project$n");
return $d;
}
my $project_dir = next_available_dir();
mkdir $project_dir;
If you're willing to use a naming pattern that plays nicely with Perl's string auto-increment feature, you can simplify the code further, eliminating the need for $n. For example, newproject000.
I think I would use something like:
use strict;
use warnings;
sub new_project_dir
{
my($base) = #_;
opendir(my $dh, $base) || die "Failed to open directory $base for reading";
my $file;
my #numbers;
while ($file = readdir $dh)
{
$numbers[$1] = 1 if ($file =~ m/^new project(\d+)$/)
}
closedir($dh) || die "Failed to close directory $base";
my $i;
my $max = $#numbers;
for ($i = 0; $i < $max; $i++)
{
next if (defined $numbers[$i]);
# Directory did not exist when we scanned the directory
# But maybe it was created since then!
my $dir = "new project$i";
next unless mkdir "$base/$dir";
return $dir;
}
# All numbers from 0..$max were in use...so try adding new numbers...
while ($i < $max + 100)
{
my $dir = "new project$i";
$i++;
next unless mkdir "$base/$dir";
return $dir;
}
# Still failed - give in...
die "Something is amiss - all directories 0..$i in use?";
}
Test code:
my $basedir = "base";
mkdir $basedir unless -d $basedir;
for (my $j = 0; $j < 10; $j++)
{
my $dir = new_project_dir($basedir);
print "Create: $dir\n";
if ($j % 3 == 2)
{
my $k = int($j / 2);
my $o = "new project$k";
rmdir "$basedir/$o";
print "Remove: $o\n";
}
}
Try this:
#!/usr/bin/env perl
use strict;
use warnings;
# get the current list of files
# see `perldoc -f glob` for details.
my #files = glob( 'some/dir/new\\ project*' );
# set to first name, in case there are none others
my $next_file = 'new project1';
# check for others
if( #files ){
# a Schwartian transform
#files = map { $_->[0] } # get original
sort { $a->[1] <=> $b->[1] } # sort by second field which are numbers
map { [ $_, do{ ( my $n = $_ ) =~ s/\D//g; $n } ] } # create an anonymous array with original value and the second field nothing but digits
#files;
# last file name is the biggest
$next_file = $files[-1];
# add one to it
$next_file =~ s/(.*)(\d+)$/$1.($2+1)/e;
}
print "next file: $next_file\n";
Nothing wrong per se, but that's an awful lot of code to achieve a single objective (get the minimum index of directories.
A core module, couple of subs and few Schwartzian transforms will make the code more flexible:
use strict;
use warnings;
use List::Util 'min';
sub num { $_[0] =~ s|\D+||g } # 'new project4' -> '4', 'new1_project4' -> '14' (!)
sub min_index {
my ( $dir, $filter ) = #_;
$filter = qr/./ unless defined $filter; # match all if no filter specified
opendir my $dirHandle, $dir or die $!;
my $lowest_index = min # get the smallest ...
map { num($_) } # ... numerical value ...
grep { -d } # ... from all directories ...
grep { /$filter/ } # ... that match the filter ...
readdir $dirHandle; # ... from the directory contents
$lowest_index++ while grep { $lowest_index == num( $_ ) } readdir $dirhandle;
return $lowest_index;
}
# Ready to use!
my $index = min_index ( 'some/dir' , qr/^new project/ );
my $new_project_name = "new project $index";

DBD::CSV: How can I generate different behavior with the two f_ext-options ".csv" and ".csv/r"?

This is from the DBD::File-documentation:
f_ext
This attribute is used for setting the file extension where (CSV) files are opened. There are several possibilities.
DBI:CSV:f_dir=data;f_ext=.csv
In this case, DBD::File will open only table.csv if both table.csv and table exist in the datadir. The table will still be named table. If your datadir has files with extensions, and you do not pass this attribute, your table is named table.csv, which is probably not what you wanted. The extension is always case-insensitive. The table names are not.
DBI:CSV:f_dir=data;f_ext=.csv/r
In this case the extension is required, and all filenames that do not match are ignored.
It was not possible for me to generate different behavior with the two options ".csv/r" and ".csv". Could someone show me an example, where I can see the difference between ".csv/r" and ".csv"?
I can't seem to get it to do anything different either. The relevant section of code is
sub file2table
{
my ($data, $dir, $file, $file_is_tab, $quoted) = #_;
$file eq "." || $file eq ".." and return;
my ($ext, $req) = ("", 0);
if ($data->{f_ext}) {
($ext, my $opt) = split m/\//, $data->{f_ext};
if ($ext && $opt) {
$opt =~ m/r/i and $req = 1;
}
}
(my $tbl = $file) =~ s/$ext$//i;
$file_is_tab and $file = "$tbl$ext";
# Fully Qualified File Name
my $fqfn;
unless ($quoted) { # table names are case insensitive in SQL
opendir my $dh, $dir or croak "Can't open '$dir': $!";
my #f = grep { lc $_ eq lc $file } readdir $dh;
#f == 1 and $file = $f[0];
closedir $dh or croak "Can't close '$dir': $!";
}
$fqfn = File::Spec->catfile ($dir, $file);
$file = $fqfn;
if ($ext) {
if ($req) {
# File extension required
$file =~ s/$ext$//i or return;
}
else {
# File extension optional, skip if file with extension exists
grep m/$ext$/i, glob "$fqfn.*" and return;
$file =~ s/$ext$//i;
}
}
$data->{f_map}{$tbl} = $fqfn;
return $tbl;
} # file2table
Does this demonstrate the difference?:
sandbox % echo "a,b,c" > foo
sandbox % echo "a,b,c" > foo.csv
sandbox % echo "a,b,c" > bar
sandbox % echo "a,b,c" > baz.csv
sandbox % perl -MDBI -wle'print for DBI->connect("dbi:CSV:f_ext=.csv")->tables'
"merijn".baz
"merijn".bar
"merijn".foo
sandbox % perl -MDBI -wle'print for DBI->connect("dbi:CSV:f_ext=.csv/r")->tables'
"merijn".baz
"merijn".foo
sandbox %
f_ext=.csv only makes the .csv a preference, but nor a requirement: in the first case, the file "bar" with no .csv extension is still used, but "foo.csv" is chosen over "foo". With f_ext=.csv/r", "bar" is ignored, as it has no ".csv" extension.
Now in version 0.39 of DBD::File this part looks like this:
sub file2table
{
my ($self, $meta, $file, $file_is_table, $respect_case) = #_;
$file eq "." || $file eq ".." and return; # XXX would break a possible DBD::Dir
my ($ext, $req) = ("", 0);
if ($meta->{f_ext}) {
($ext, my $opt) = split m/\//, $meta->{f_ext};
if ($ext && $opt) {
$opt =~ m/r/i and $req = 1;
}
}
# (my $tbl = $file) =~ s/$ext$//i;
my ($tbl, $dir, $user_spec_file);
if ($file_is_table and defined $meta->{f_file}) {
$tbl = $file;
($file, $dir, undef) = File::Basename::fileparse ($meta->{f_file});
$user_spec_file = 1;
}
else {
($tbl, $dir, undef) = File::Basename::fileparse ($file, $ext);
$user_spec_file = 0;
}
-d File::Spec->catdir ($meta->{f_dir}, $dir) or
croak (File::Spec->catdir ($meta->{f_dir}, $dir) . ": $!");
!$respect_case and $meta->{sql_identifier_case} == 1 and # XXX SQL_IC_UPPER
$tbl = uc $tbl;
!$respect_case and $meta->{sql_identifier_case} == 2 and # XXX SQL_IC_LOWER
$tbl = lc $tbl;
my $searchdir = File::Spec->file_name_is_absolute ($dir)
? $dir
: Cwd::abs_path (File::Spec->catdir ($meta->{f_dir}, $dir));
$searchdir eq $meta->{f_dir} and
$dir = "";
unless ($user_spec_file) {
$file_is_table and $file = "$tbl$ext";
# Fully Qualified File Name
my $cmpsub;
if ($respect_case) {
$cmpsub = sub {
my ($fn, undef, $sfx) = File::Basename::fileparse ($_, qr/\.[^.]*/);
$fn eq $tbl and
return (lc $sfx eq lc $ext or !$req && !$sfx);
return 0;
}
}
else {
$cmpsub = sub {
my ($fn, undef, $sfx) = File::Basename::fileparse ($_, qr/\.[^.]*/);
lc $fn eq lc $tbl and
return (lc $sfx eq lc $ext or !$req && !$sfx);
return 0;
}
}
opendir my $dh, $searchdir or croak "Can't open '$searchdir': $!";
my #f = sort { length $b <=> length $a } grep { &$cmpsub ($_) } readdir $dh;
#f > 0 && #f <= 2 and $file = $f[0];
!$respect_case && $meta->{sql_identifier_case} == 4 and # XXX SQL_IC_MIXED
($tbl = $file) =~ s/$ext$//i;
closedir $dh or croak "Can't close '$searchdir': $!";
#(my $tdir = $dir) =~ s{^\./}{}; # XXX We do not want all tables to start with ./
#$tdir and $tbl = File::Spec->catfile ($tdir, $tbl);
$dir and $tbl = File::Spec->catfile ($dir, $tbl);
my $tmpfn = $file;
if ($ext) {
if ($req) {
# File extension required
$tmpfn =~ s/$ext$//i or return;
}
# else {
# # File extension optional, skip if file with extension exists
# grep m/$ext$/i, glob "$fqfn.*" and return;
# $tmpfn =~ s/$ext$//i;
# }
}
}
my $fqfn = File::Spec->catfile ($searchdir, $file);
my $fqbn = File::Spec->catfile ($searchdir, $tbl);
$meta->{f_fqfn} = $fqfn;
$meta->{f_fqbn} = $fqbn;
!defined $meta->{f_lockfile} && $meta->{f_lockfile} and
$meta->{f_fqln} = $meta->{f_fqbn} . $meta->{f_lockfile};
$meta->{table_name} = $tbl;
return $tbl;
} # file2table
As far as I can see, the two f_ext-options are working as expected.