Perl Script to remove files older than a year - perl

I am trying to make a script which will delete files older than a year.
This is my code:
#!/usr/bin/perl
$files = ".dat|.exe";
#file_del = split('\|',$files);
print "#file_del";
$dates = "365|365|365|365";
#date_del = split('\|',$dates);
if($#date_del == 0){
for $i (#file_del){
my $f = `find \"/Users/ABC/Desktop/mydata\" -name *$i -mtime +date[0]`;
print "$f";
}
}
else{
for $i (0..$#file_del){
my $f = `find \"/Users/ABC/Desktop/mydata\" -name *$file_del[$i] -mtime +$date_del[$i]`;
print "$f";
}
}
Issues I am facing:
It is not detecting .txt files, otherwise .data,.exe,.dat etc it is detecting.
Also -mtime is 365. But a leap year(366 days) I have to change my script.

$myDir = "/Users/ABC/Desktop/mydata/";
$cleanupDays = 365
$currentMonth = (localtime)[4] + 1;
$currentyear = (localtime)[5] + 1900;
if ($currentMonth < 3) {
$currentyear -= 1;
}
if( 0 == $currentyear % 4 and 0 != $currentyear % 100 or 0 == $currentyear % 400 ) {
$cleanupDays += 1;
}
$nbFiles = 0;
$runDay = (time - $^T)/86400; # Number of days script is running
opendir FH_DIR, $myDir
or die "$0 - ERROR directory '$myDir' doesn't exist\n");
foreach $fileName (grep !/^\./, (readdir FH_DIR)) {
if (((-M "$myDir$fileName") + $runDay) > $cleanupDays) {
unlink "$myDir$fileName" or print "ERROR:NOT deleted:$fileName ";
$nbFiles++;
}
}
closedir FH_DIR;
print "$nbFiles files deleted\n";

Use the brilliant Path::Class to make life easy:
use Modern::Perl;
use Path::Class;
my $dir = dir( '/Users', 'ABC', 'Desktop', 'mydata' );
$dir->traverse( sub {
my ( $child, $cont ) = #_;
if ( not $child->is_dir and $child->stat ) {
if ( $child->stat->ctime < ( time - 365 * 86400 ) ) {
say "$child: " .localtime( $child->stat->ctime );
# to delete:
# unlink $child;
}
}
return $cont->();
} );

You can also use the command find2perl. Like:
find2perl . -mtime -365 -exec rm {} \;
What will produce a perl script to use the File::Find - e.g.:
use strict;
use File::Find ();
use vars qw/*name *dir *prune/;
*name = *File::Find::name;
*dir = *File::Find::dir;
*prune = *File::Find::prune;
sub wanted;
File::Find::find({wanted => \&wanted}, '.');
exit;
sub wanted {
my ($dev,$ino,$mode,$nlink,$uid,$gid);
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
(int(-M _) < 365) &&
(unlink($_) || warn "$name: $!\n");
}

Related

Find the biggest file in a directory. Perl

The program lists all files in a directory, their size, type and owner. In case file is a directory, owner is the owner of the biggest file in that directory (that's the problem).
use warnings;
use strict;
use diagnostics;
use feature 'say';
use File::Find;
my $dir = shift || die "Provide a directory name";
my #file;
my #owner;
my #size;
my #type;
my $i = 0;
while( glob("$dir/*") )
{
$file[$i] = $_;
find(sub { $size[$i] += -s if -f }, $_);
if( -d $file[$i] )
{
$type[$i] = "d";
$owner[$i] = getpwuid((stat($_))[4]);
}
elsif ( -l $file[$i] )
{
$type[$i] = "l";
$owner[$i] = getpwuid((stat($_))[4]);
}
else
{
$type[$i] = "f";
$owner[$i] = getpwuid((stat($_))[4]);
}
print "$file[$i]:$owner[$i]:$type[$i]:$size[$i]\n";
$i++;
}
At this point in code
if( -d $file[$i] )
{
$type[$i] = "d";
$owner[$i] = getpwuid((stat($_))[4]);
}
i have to find the biggest file in this directory. I figured, that i should use find function, but not sure on how to do it.
Please investigate the following code piece for compliance with your task.
The code uses recursion for directories, core component is glob function.
The result of directory lookup is returned as hash reference. Fill free to utilize this reference as your heart desire.
use strict;
use warnings;
use feature 'say';
use Data::Dumper;
my $dir = shift || die "Provide directory";
my $result = dir_lookup($dir);
say Dumper($result);
exit 0;
sub dir_lookup {
my $dir = shift;
my($record,$max);
my #items = glob("$dir/*");
$max = 0;
for my $name ( #items ) {
my $data;
$data->{name} = $name;
$data->{size} = -s $name;
$data->{owner} = getpwuid((stat($name))[4]);
$data->{type} = 'link' if -l $name;
$data->{type} = 'file' if -f $name;
$data->{type} = 'dir' if -d $name;
if( $data->{size} > $max and -f $name ) {
$max = $data->{size};
$record->{file} = $data;
}
if( $data->{type} eq 'dir' ) {
my $r = dir_lookup($data->{name});
$data->{file} = $r->{file};
$data->{owner} = $r->{file}{owner};
}
push #{$record->{items}}, $data;
}
return $records;
}

"No such file" when opening multiple files in a directory, but no error when opening only one file

I can open one file in a directory and run the following code. However, when I try to use the same code on multiple files within a directory, I get an error regarding there not being a file.
I have tried to make sure that I am naming the files correctly, that they are in the right format, that they are located in my current working directory, and that things are referenced correctly.
I know a lot of people have had this error before and have posted similar questions, but any help would be appreciated.
Working code:
#!/usr/bin/perl
use warnings;
use strict;
use diagnostics;
use List::Util qw( min max );
my $RawSequence = loadSequence("LDTest.fasta");
my $windowSize = 38;
my $stepSize = 1;
my %hash;
my $s1;
my $s2;
my $dist;
for ( my $windowStart = 0; $windowStart <= 140; $windowStart += $stepSize ) {
my $s1 = substr( $$RawSequence, $windowStart, $windowSize );
my $s2 = 'CGGAGCTTTACGAGCCGTAGCCCAAACAGTTAATGTAG';
# the 28 nt forward primer after the barcode plus the first 10 nt of the mtDNA dequence
my $dist = levdist( $s1, $s2 );
$hash{$dist} = $s1;
#print "Distance between '$s1' and '$s2' is $dist\n";
sub levdist {
my ( $seq1, $seq2 ) = (#_)[ 0, 1 ];
my $l1 = length($s1);
my $l2 = length($s2);
my #s1 = split '', $seq1;
my #s2 = split '', $seq2;
my $distances;
for ( my $i = 0; $i <= $l1; $i++ ) {
$distances->[$i]->[0] = $i;
}
for ( my $j = 0; $j <= $l2; $j++ ) {
$distances->[0]->[$j] = $j;
}
for ( my $i = 1; $i <= $l1; $i++ ) {
for ( my $j = 1; $j <= $l2; $j++ ) {
my $cost;
if ( $s1[ $i - 1 ] eq $s2[ $j - 1 ] ) {
$cost = 0;
}
else {
$cost = 1;
}
$distances->[$i]->[$j] = minimum(
$distances->[ $i - 1 ]->[ $j - 1 ] + $cost,
$distances->[$i]->[ $j - 1 ] + 1,
$distances->[ $i - 1 ]->[$j] + 1,
);
}
}
my $min_distance = $distances->[$l1]->[$l2];
for ( my $i = 0; $i <= $l1; $i++ ) {
$min_distance = minimum( $min_distance, $distances->[$i]->[$l2] );
}
for ( my $j = 0; $j <= $l2; $j++ ) {
$min_distance = minimum( $min_distance, $distances->[$l1]->[$j] );
}
return $min_distance;
}
}
sub minimum {
my $min = shift #_;
foreach (#_) {
if ( $_ < $min ) {
$min = $_;
}
}
return $min;
}
sub loadSequence {
my ($sequenceFile) = #_;
my $sequence = "";
unless ( open( FASTA, "<", $sequenceFile ) ) {
die $!;
}
while (<FASTA>) {
my $line = $_;
chomp($line);
if ( $line !~ /^>/ ) {
$sequence .= $line; #if the line doesn't start with > it is the sequence
}
}
return \$sequence;
}
my #keys = sort { $a <=> $b } keys %hash;
my $BestMatch = $hash{ keys [0] };
if ( $keys[0] < 8 ) {
$$RawSequence =~ s/\Q$BestMatch\E/CGGAGCTTTACGAGCCGTAGCCCAAACAGTTAATGTAG/g;
print ">|Forward|Distance_of_Best_Match: $keys[0] |Sequence_of_Best_Match: $BestMatch", "\n",
"$$RawSequence", "\n";
}
Here is an abbreviated version of my non-working code. Things that haven't changed I didn't included:
Headers and Globals:
my $dir = ("/Users/roblogan/Documents/FakeFastaFiles");
my #ArrayofFiles = glob "$dir/*.fasta";
foreach my $file ( #ArrayofFiles ) {
open( my $Opened, $file ) or die "can't open file: $!";
while ( my $OpenedFile = <$Opened> ) {
my $RawSequence = loadSequence($OpenedFile);
for ( ... ) {
...;
print
">|Forward|Distance_of_Best_Match: $keys[0] |Sequence_of_Best_Match: $BestMatch",
"\n", "$$RawSequence", "\n";
}
}
}
The exact error is:
Uncaught exception from user code:
No such file or directory at ./levenshtein_for_directory.pl line 93, <$Opened> line 1.
main::loadSequence('{\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf470\x{a}') called at ./levenshtein_for_directory.pl line 22
line 93:
89 sub loadSequence{
90 my ($sequenceFile) = #_;
91 my $sequence = "";
92 unless (open(FASTA, "<", $sequenceFile)){
93 die $!;
94 }
Line 22:
18 foreach my $file ( #ArrayofFiles ) {
19 open (my $Opened, $file) or die "can't open file: $!";
20 while (my $OpenedFile = <$Opened>) {
21
22 my $RawSequence = loadSequence($OpenedFile);
23
I just learned that "FASTA file" is a settled term. Wasn't aware of that and previously thought they are some files and contain filenames or something. As #zdim already said, you're opening these files twice.
The following code gets a list of FASTA files (only the filenames) and then calls loadSequence with each such a filename. That subroutine then opens the given file, concatenates the none-^> lines to one big line and returns it.
# input: the NAME of a FASTA file
# return: all sequences in that file as one very long string
sub loadSequence
{
my ($fasta_filename) = #_;
my $sequence = "";
open( my $fasta_fh, '<', $fasta_filename ) or die "Cannot open $fasta_filename: $!\n";
while ( my $line = <$fasta_fh> ) {
chomp($line);
if ( $line !~ /^>/ ) {
$sequence .= $line; #if the line doesn't start with > it is the sequence
}
}
close($fasta_fh);
return $sequence;
}
# ...
my $dir = '/Users/roblogan/Documents/FakeFastaFiles';
my #ArrayofFiles = glob "$dir/*.fasta";
foreach my $filename (#ArrayofFiles) {
my $RawSequence = loadSequence($filename);
# ...
}
You seem to be trying to open files twice. The line
my #ArrayofFiles = glob "$dir/*.fasta";
Gives you the list of files. Then
foreach my $file (#ArrayofFiles){
open (my $Opened, $file) or die "can't open file: $!";
while (my $OpenedFile = <$Opened>) {
my $RawSequence = loadSequence($OpenedFile);
# ...
does the following, line by line. It iterates through files, opens each, reads a line from it, and then submits that line to the function loadSequence().
However, in that function you attempt to open a file again
sub loadSequence{
my ($sequenceFile) = #_;
my $sequence = "";
unless (open(FASTA, "<", $sequenceFile)){
# ...
The $sequenceFile variable in the function is passed to the function as $OpenedFile -- which is a line in the file that is already opened and being read from, not the file name. While I am not certain about details of your code, the error you show seems to be consistent with this.
It may be that you are confusing the glob, which gives you the list of files, with the opendir which would indeed need a following readdir to access the files.
Try renaming $OpenedFile to, say, $line (which it is) and see how it looks then.

perl Parallel::ForkManager stuck in this script?

I am trying to run a Perl script in parallel and got stuck at a point here. See the example script:
If I run it without the -fork 4 option, it runs fine:
perl perl_parallel_forkmanager_ls.pl -limit 10
799c89a4c78eafbfb9e7962b8e9705f7 /etc/apt/trusted.gpg
ff163e8e9e38670705a9f2cea8b530c9 /etc/apt/trusted.gpg~
075e92fd5c6f0dcdad857603f03dd3a5 /etc/bash_completion.d/R
b269c1383a87a7da2cc309c929ba35ca /etc/bash_completion.d/grub
7cbefff45508d2ed69576bebc80e66bb /etc/bash_completion.d/docker
facb1fdc0fcf7f6b150442d1a9036795 /etc/bash_completion.d/pulseaudio-bash-completion.sh
69dfca7a7b55181cef06b9ed28debb20 /etc/gnome/defaults.list
a65e81e55558941ce0f3080b9333e18f /etc/sensors3.conf
9e87bc86a77261acfb2bae618073a787 /etc/grub.d/20_linux_xen
8039709ee9648dabda0cdca713f2ed49 /etc/grub.d/30_os-prober
1bc18861cc2438517ce6b6c22fd4fa49 /etc/grub.d/10_linux
But if I run it with a value of -fork 4 smaller than the value of -limit 10, it ignores the value of limit:
perl perl_parallel_forkmanager_ls.pl -fork 4 -limit 10 2>/dev/null | wc -l
80
Any ideas?
#!/usr/bin/perl
use strict;
use warnings;
use Parallel::ForkManager;
use Getopt::Long;
my $dir = '/etc'; my $fork = 1; my $size = '9876'; my $limit;
my $verbose;
GetOptions(
'dir:s' => \$dir,
'fork:s' => \$fork,
'size:s' => \$size,
'limit:s' => \$limit,
'verbose' => \$verbose,
);
my $cmd; my $ret;
$cmd = "find $dir -size +".$size."c -type f 2>/dev/null";
open(P, "-|", "$cmd") or die "$cmd -- $!";
my $pm; $pm=new Parallel::ForkManager($fork) if ($fork > 1);
my $count = 0;
while (<P>) {
if ($fork > 1) {
$pm->start and next;
}
my $file = $_; chomp $file;
my $md5 = `md5sum $file`;
print "$md5";
$pm->finish if ($fork > 1);
$count++;
last if (defined $limit && $count > $limit);
};
$pm->wait_all_children if ($fork > 1);
close P;
The statements after $pm->finish are never reached when -fork > 1 is given.. You should change the order of the statements in the while loop:
while (<P>) {
$count++;
last if (defined $limit && $count > $limit);
if ($fork > 1) {
$pm->start and next;
}
my $file = $_; chomp $file;
my $md5 = `md5sum $file`;
print "$md5";
$pm->finish if ($fork > 1);
};

Recursive unique search in particular sub directories - Perl

Here's my directory structure..
Current
/ | \
a d g
/ \ / \ |
b c e morning evenin
/ \ / \ |
hello hi bad good f
/ \
good night
Where current, a,b,c,d,e, f,g are directories and other are files.
Now I want to recursively search in current folder such that the search shouldn't be done only in g folder of current directory. Plus, as 'good' file is same in current-a-c-good and current-d-e-f-good, the contents of it should be listed only once.
Can you please help me how to do it?
The suggestion of Paulchenkiller in comments is fine. The File::Find module searchs recursively and lets to handle easily what to do with files and directories during its traverse. Here you have something similar to what you are looking for. It uses preprocess option to prune the directory and the wanted option to get all file names.
#!/usr/bin/env perl
use strict;
use warnings;
use File::Find;
my (%processed_files);
find( { wanted => \&wanted,
preprocess => \&dir_preprocess,
}, '.',
);
for ( keys %processed_files ) {
printf qq|%s\n|, $_;
}
sub dir_preprocess {
my (#entries) = #_;
if ( $File::Find::dir eq '.' ) {
#entries = grep { ! ( -d && $_ eq 'g' ) } #entries;
}
return #entries;
}
sub wanted {
if ( -f && ! -l && ! defined $processed_files{ $_ } ) {
$processed_files{ $_ } = 1;
}
}
my $path = "/some/path";
my $filenames = {};
recursive( $path );
print join( "\n", keys %$filenames );
sub recursive
{
my $p = shift;
my $d;
opendir $d, $p;
while( readdir $d )
{
next if /^\./; # this will skip '.' and '..' (but also '.blabla')
# check it is dir
if( -d "$p/$_" )
{
recursive( "$p/$_" );
}
else
{
$filenames->{ $_ } = 1;
}
}
closedir $d;
}

DBD::CSV: How can I generate different behavior with the two f_ext-options ".csv" and ".csv/r"?

This is from the DBD::File-documentation:
f_ext
This attribute is used for setting the file extension where (CSV) files are opened. There are several possibilities.
DBI:CSV:f_dir=data;f_ext=.csv
In this case, DBD::File will open only table.csv if both table.csv and table exist in the datadir. The table will still be named table. If your datadir has files with extensions, and you do not pass this attribute, your table is named table.csv, which is probably not what you wanted. The extension is always case-insensitive. The table names are not.
DBI:CSV:f_dir=data;f_ext=.csv/r
In this case the extension is required, and all filenames that do not match are ignored.
It was not possible for me to generate different behavior with the two options ".csv/r" and ".csv". Could someone show me an example, where I can see the difference between ".csv/r" and ".csv"?
I can't seem to get it to do anything different either. The relevant section of code is
sub file2table
{
my ($data, $dir, $file, $file_is_tab, $quoted) = #_;
$file eq "." || $file eq ".." and return;
my ($ext, $req) = ("", 0);
if ($data->{f_ext}) {
($ext, my $opt) = split m/\//, $data->{f_ext};
if ($ext && $opt) {
$opt =~ m/r/i and $req = 1;
}
}
(my $tbl = $file) =~ s/$ext$//i;
$file_is_tab and $file = "$tbl$ext";
# Fully Qualified File Name
my $fqfn;
unless ($quoted) { # table names are case insensitive in SQL
opendir my $dh, $dir or croak "Can't open '$dir': $!";
my #f = grep { lc $_ eq lc $file } readdir $dh;
#f == 1 and $file = $f[0];
closedir $dh or croak "Can't close '$dir': $!";
}
$fqfn = File::Spec->catfile ($dir, $file);
$file = $fqfn;
if ($ext) {
if ($req) {
# File extension required
$file =~ s/$ext$//i or return;
}
else {
# File extension optional, skip if file with extension exists
grep m/$ext$/i, glob "$fqfn.*" and return;
$file =~ s/$ext$//i;
}
}
$data->{f_map}{$tbl} = $fqfn;
return $tbl;
} # file2table
Does this demonstrate the difference?:
sandbox % echo "a,b,c" > foo
sandbox % echo "a,b,c" > foo.csv
sandbox % echo "a,b,c" > bar
sandbox % echo "a,b,c" > baz.csv
sandbox % perl -MDBI -wle'print for DBI->connect("dbi:CSV:f_ext=.csv")->tables'
"merijn".baz
"merijn".bar
"merijn".foo
sandbox % perl -MDBI -wle'print for DBI->connect("dbi:CSV:f_ext=.csv/r")->tables'
"merijn".baz
"merijn".foo
sandbox %
f_ext=.csv only makes the .csv a preference, but nor a requirement: in the first case, the file "bar" with no .csv extension is still used, but "foo.csv" is chosen over "foo". With f_ext=.csv/r", "bar" is ignored, as it has no ".csv" extension.
Now in version 0.39 of DBD::File this part looks like this:
sub file2table
{
my ($self, $meta, $file, $file_is_table, $respect_case) = #_;
$file eq "." || $file eq ".." and return; # XXX would break a possible DBD::Dir
my ($ext, $req) = ("", 0);
if ($meta->{f_ext}) {
($ext, my $opt) = split m/\//, $meta->{f_ext};
if ($ext && $opt) {
$opt =~ m/r/i and $req = 1;
}
}
# (my $tbl = $file) =~ s/$ext$//i;
my ($tbl, $dir, $user_spec_file);
if ($file_is_table and defined $meta->{f_file}) {
$tbl = $file;
($file, $dir, undef) = File::Basename::fileparse ($meta->{f_file});
$user_spec_file = 1;
}
else {
($tbl, $dir, undef) = File::Basename::fileparse ($file, $ext);
$user_spec_file = 0;
}
-d File::Spec->catdir ($meta->{f_dir}, $dir) or
croak (File::Spec->catdir ($meta->{f_dir}, $dir) . ": $!");
!$respect_case and $meta->{sql_identifier_case} == 1 and # XXX SQL_IC_UPPER
$tbl = uc $tbl;
!$respect_case and $meta->{sql_identifier_case} == 2 and # XXX SQL_IC_LOWER
$tbl = lc $tbl;
my $searchdir = File::Spec->file_name_is_absolute ($dir)
? $dir
: Cwd::abs_path (File::Spec->catdir ($meta->{f_dir}, $dir));
$searchdir eq $meta->{f_dir} and
$dir = "";
unless ($user_spec_file) {
$file_is_table and $file = "$tbl$ext";
# Fully Qualified File Name
my $cmpsub;
if ($respect_case) {
$cmpsub = sub {
my ($fn, undef, $sfx) = File::Basename::fileparse ($_, qr/\.[^.]*/);
$fn eq $tbl and
return (lc $sfx eq lc $ext or !$req && !$sfx);
return 0;
}
}
else {
$cmpsub = sub {
my ($fn, undef, $sfx) = File::Basename::fileparse ($_, qr/\.[^.]*/);
lc $fn eq lc $tbl and
return (lc $sfx eq lc $ext or !$req && !$sfx);
return 0;
}
}
opendir my $dh, $searchdir or croak "Can't open '$searchdir': $!";
my #f = sort { length $b <=> length $a } grep { &$cmpsub ($_) } readdir $dh;
#f > 0 && #f <= 2 and $file = $f[0];
!$respect_case && $meta->{sql_identifier_case} == 4 and # XXX SQL_IC_MIXED
($tbl = $file) =~ s/$ext$//i;
closedir $dh or croak "Can't close '$searchdir': $!";
#(my $tdir = $dir) =~ s{^\./}{}; # XXX We do not want all tables to start with ./
#$tdir and $tbl = File::Spec->catfile ($tdir, $tbl);
$dir and $tbl = File::Spec->catfile ($dir, $tbl);
my $tmpfn = $file;
if ($ext) {
if ($req) {
# File extension required
$tmpfn =~ s/$ext$//i or return;
}
# else {
# # File extension optional, skip if file with extension exists
# grep m/$ext$/i, glob "$fqfn.*" and return;
# $tmpfn =~ s/$ext$//i;
# }
}
}
my $fqfn = File::Spec->catfile ($searchdir, $file);
my $fqbn = File::Spec->catfile ($searchdir, $tbl);
$meta->{f_fqfn} = $fqfn;
$meta->{f_fqbn} = $fqbn;
!defined $meta->{f_lockfile} && $meta->{f_lockfile} and
$meta->{f_fqln} = $meta->{f_fqbn} . $meta->{f_lockfile};
$meta->{table_name} = $tbl;
return $tbl;
} # file2table
As far as I can see, the two f_ext-options are working as expected.