Read multiple files from folder in perl - perl

I'm pretty new on perl and in need for some help, basically what I want is a program that reads all .txt files from a folder, doing the script and throw the output in a new folder with a new name. Everything works when I'm working with one file at the time, specifying the name of the file.. But I can't get it to work with all of the files in the folder. This is how far I've gotten.
#!/usr/bin/perl
use warnings;
use strict;
use Path::Class;
use autodie;
use File::Find;
my #now = localtime();
my $timeStamp = sprintf(
"%04d%02d%02d-%02d:%02d:%02d",
$now[5] + 1900,
$now[4] + 1,
$now[3], $now[2], $now[1], $now[0]); #A function that translates time
my %wordcount;
my $dir = "/home/smenk/.filfolder";
opendir(DIR, $dir) || die "Kan inte öppna $dir: $!";
my #files = grep { /txt/ } readdir(DIR);
closedir DIR;
my $new_dir = dir("/home/smenk/.result"); # Reads in the folder for save
my $new_file = $new_dir->file("$timeStamp.log"); # Reads in the new file timestamp variable
open my $fh, '<', $dir or die "Kunde inte öppna '$dir' $!";
open my $fhn, '>', $new_file or die "test '$new_file'";
foreach my $file (#files) {
open(FH, "/home/smenk/.filfolder/$file") || die "Unable to open $file - $!\n";
while (<FH>) {
}
close(FH);
}
while (my $line = <$fh>) {
foreach my $str (split /\s+/, $line) {
$wordcount{$str}++;
}
}
my #listing = (sort { $wordcount{$b} <=> $wordcount{$a} } keys %wordcount)[0 .. 9];
foreach my $str (#listing) {
my $output = $wordcount{$str} . " $str\n";
print $fhn $output;
}

Here is the simplest skeleton for the reading part using Path::Class (see also dir and file:
#!/usr/bin/perl
use warnings;
use strict;
use Path::Class;
my $src = dir("/home/smenk/.filfolder");
my #txt_files = grep /[.] txt\z/x, $src->children;
for my $txt_file ( #txt_files ) {
my $in = $txt_file->openr;
while (my $line = <$in>) {
print "OUT: $line";
}
}

You can also use another great module Path::Tiny, for dir/file operations and the Time::Piece for the date/time functions - like:
#!/usr/bin/env perl
use strict;
use warnings;
use Path::Tiny;
use Time::Piece;
my #txtfiles = path("/home/smenk/.filfolder")->children(qr/\.txt\z/);
my $outdir = path("home/smenk/.result");
$outdir->mkpath; #create the dir...
my $t = localtime;
my $outfile = $outdir->child($t->strftime("%Y%m%d-%H%M%S.txt"));
$outfile->touch;
my #outdata;
for my $infile (#txtfiles) {
my #lines = $infile->lines({chomp => 1});
#do something with lines and create the output #data
push #outdata, scalar #lines;
}
$outfile->append({truncate => 1}, map { "$_\n" } #outdata); #or spew;

Related

Perl File::Find is not working

I want to search string in directory using Perl File::Find, but it's not working. It gave me an error:
C:/Perl64/bin/perl.exe D:/DUAN/MailScanner/GetMailForwarder.pl
Error openning file: D:\DUAN\MailScanner\valiases Permission denied
Process finished with exit code 13
Here is my code:
#!/usr/bin/perl
use strict;
use warnings;
use File::Find;
use Data::Dump qw(dump);
my #dirs = 'D:\DUAN\MailScanner\valiases';
## main processing done here
my #found_files = ();
my $pattern = qr/World/;
find( \&wanted, #dirs );
sub wanted
{
next if ($File::Find::name =~ m/^\./);
open my $file, '<', $File::Find::name or die "Error openning file: $File::Find::name $!\n";
while( defined(my $line = <$file>) )
{
if ($line =~ /$pattern/)
{
push #found_files, $_;
last;
}
}
close ($file);
}
foreach my $file(#found_files)
{
print $file, "\n";
}
Very happy to see use strict; - good job!
The following minor code mod should help you get to the bottom of whatever problem you have.
use strict;
use warnings;
use File::Find;
use Data::Dump qw(dump);
my #dirs = ('D:\DUAN\MailScanner\valiases', 'D:\DUAN\additionalPath');
foreach my $dir (#dirs)
{
print "WARNING: $dir is not a directory/folder.\n" unless ( -d $dir );
}
## main processing done here
my #found_files = ();
my $pattern = qr/World/;
find( \&wanted, #dirs );
sub wanted
{
next if ($File::Find::name =~ m/^\./);
if (-r $File::Find::name)
{
open my $file, '<', $File::Find::name or die "Error reading file: $File::Find::name $!\n";
while ( my $line = <$file> )
{
if ($line =~ m/$pattern/)
{
push #found_files, $_;
last;
}
}
close ($file);
}
else
{
print "WARNING: $File::Find::name is not readable. Skipping...\n";
}
}
foreach my $file (#found_files)
{
print "$file\n";
}

perl + read multiple csv files + manipulate files + provide output_files

Apologies if this is a bit long winded, bu i really appreciate an answer here as i am having difficulty getting this to work.
Building on from this question here, i have this script that works on a csv file(orig.csv) and provides a csv file that i want(format.csv). What I want is to make this more generic and accept any number of '.csv' files and provide a 'output_csv' for each inputed file. Can anyone help?
#!/usr/bin/perl
use strict;
use warnings;
open my $orig_fh, '<', 'orig.csv' or die $!;
open my $format_fh, '>', 'format.csv' or die $!;
print $format_fh scalar <$orig_fh>; # Copy header line
my %data;
my #labels;
while (<$orig_fh>) {
chomp;
my #fields = split /,/, $_, -1;
my ($label, $max_val) = #fields[1,12];
if ( exists $data{$label} ) {
my $prev_max_val = $data{$label}[12] || 0;
$data{$label} = \#fields if $max_val and $max_val > $prev_max_val;
}
else {
$data{$label} = \#fields;
push #labels, $label;
}
}
for my $label (#labels) {
print $format_fh join(',', #{ $data{$label} }), "\n";
}
i was hoping to use this script from here but am having great difficulty putting the 2 together:
#!/usr/bin/perl
use strict;
use warnings;
#If you want to open a new output file for every input file
#Do it in your loop, not here.
#my $outfile = "KAC.pdb";
#open( my $fh, '>>', $outfile );
opendir( DIR, "/data/tmp" ) or die "$!";
my #files = readdir(DIR);
closedir DIR;
foreach my $file (#files) {
open( FH, "/data/tmp/$file" ) or die "$!";
my $outfile = "output_$file"; #Add a prefix (anything, doesn't have to say 'output')
open(my $fh, '>', $outfile);
while (<FH>) {
my ($line) = $_;
chomp($line);
if ( $line =~ m/KAC 50/ ) {
print $fh $_;
}
}
close($fh);
}
the script reads all the files in the directory and finds the line with this string 'KAC 50' and then appends that line to an output_$file for that inputfile. so there will be 1 output_$file for every inputfile that is read
issues with this script that I have noted and was looking to fix:
- it reads the '.' and '..' files in the directory and produces a
'output_.' and 'output_..' file
- it will also do the same with this script file.
I was also trying to make it dynamic by getting this script to work in any directory it is run in by adding this code:
use Cwd qw();
my $path = Cwd::cwd();
print "$path\n";
and
opendir( DIR, $path ) or die "$!"; # open the current directory
open( FH, "$path/$file" ) or die "$!"; #open the file
**EDIT::I have tried combining the versions but am getting errors.Advise greatly appreciated*
UserName#wabcl13 ~/Perl
$ perl formatfile_QforStackOverflow.pl
Parentheses missing around "my" list at formatfile_QforStackOverflow.pl line 13.
source dir -> /home/UserName/Perl
Can't use string ("/home/UserName/Perl/format_or"...) as a symbol ref while "strict refs" in use at formatfile_QforStackOverflow.pl line 28.
combined code::
use strict;
use warnings;
use autodie; # this is used for the multiple files part...
#START::Getting current working directory
use Cwd qw();
my $source_dir = Cwd::cwd();
#END::Getting current working directory
print "source dir -> $source_dir\n";
my $output_prefix = 'format_';
opendir my $dh, $source_dir; #Changing this to work on current directory; changing back
for my $file (readdir($dh)) {
next if $file !~ /\.csv$/;
next if $file =~ /^\Q$output_prefix\E/;
my $orig_file = "$source_dir/$file";
my $format_file = "$source_dir/$output_prefix$file";
# .... old processing code here ...
## Start:: This part works on one file edited for this script ##
#open my $orig_fh, '<', 'orig.csv' or die $!; #line 14 and 15 above already do this!!
#open my $format_fh, '>', 'format.csv' or die $!;
#print $format_fh scalar <$orig_fh>; # Copy header line #orig needs changeing
print $format_file scalar <$orig_file>; # Copy header line
my %data;
my #labels;
#while (<$orig_fh>) { #orig needs changing
while (<$orig_file>) {
chomp;
my #fields = split /,/, $_, -1;
my ($label, $max_val) = #fields[1,12];
if ( exists $data{$label} ) {
my $prev_max_val = $data{$label}[12] || 0;
$data{$label} = \#fields if $max_val and $max_val > $prev_max_val;
}
else {
$data{$label} = \#fields;
push #labels, $label;
}
}
for my $label (#labels) {
#print $format_fh join(',', #{ $data{$label} }), "\n"; #orig needs changing
print $format_file join(',', #{ $data{$label} }), "\n";
}
## END:: This part works on one file edited for this script ##
}
How do you plan on inputting the list of files to process and their preferred output destination? Maybe just have a fixed directory that you want to process all the cvs files, and prefix the result.
#!/usr/bin/perl
use strict;
use warnings;
use autodie;
my $source_dir = '/some/dir/with/cvs/files';
my $output_prefix = 'format_';
opendir my $dh, $source_dir;
for my $file (readdir($dh)) {
next if $file !~ /\.csv$/;
next if $file =~ /^\Q$output_prefix\E/;
my $orig_file = "$source_dir/$file";
my $format_file = "$source_dir/$output_prefix$file";
.... old processing code here ...
}
Alternatively, you could just have an output directory instead of prefixing the files. Either way, this should get you on your way.

Can i collect the output of find(\&wanted, #directories) in an array

I am writing a script which will traverse the directory(including subdir also) and push the desired file in an array so that i can work on each file.
Here is my code:
use strict;
use warnings;
use File::Find;
my $path = $ARGV[0];
find({ wanted => \&GetappropriateFile }, $path);
sub GetappropriateFile
{
my $file = $_;
my #all_file;
# print "$file\n";
if ( -f and /traces[_d+]/)
{
#print "$file\n";
open(my $fh, "<", $file) or die "cannot open file:$!\n";
while( my $line = <$fh>){
$line =~ /Cmd\sline:\s+com.android*/;
push(#all_file,$file);
#print "$file\n";
}
close($fh);
#print"#all_file\n";
}
}
Problem Area : my $file = $_;
Instead of using " $file" if i could get a way to use an array here then i can easily read those files one by one and filter it.
Here what i am tring to do is : I have to open each file and check for the string "Cmd line: com.android" as soon as i get this string in the file i have to push this current file in an array and start reading the another file.
It would be better to avoid global vars.
use strict;
use warnings;
use File::Find qw( find );
sub IsAppropriateFile {
my ($file) = #_;
if (-f $file && $file =~ /traces[_d+]/) {
open(my $fh, "<", $file) or die "cannot open file:$!\n";
while ( my $line = <$fh> ) {
if ($line =~ /Cmd\sline:\s+com.android*/) {
return 1;
}
}
}
return 0;
}
{
my $path = $ARGV[0];
my #matching_files;
find({
wanted => sub {
push #matching_files, $_ if IsAppropriateFile($_);
},
}, $path);
print("$_\n") for #matching_files; # Or whatever.
}
Put declaration of #all_file outside of function, and use it after find() finishes,
my #all_file;
sub GetappropriateFile
{
..
}
You could also stop with file reading after successful match,
if ($line =~ /Cmd\sline:\s+com.android*/) {
push(#all_file, $file);
last;
}

Perl text::csv to check then add field

I am reading an csv file and checking for text within a certain field.
If text exists I would like to keep the line and also add a field.
The following while loop in a program works fine, to save lines with text in col[17]:
while (my $row = $csv->getline($infh)) {
if ($row->[16] ne ""){
my $string = $csv->string;
print {$outfh} $string;
}
}
In another test program the following while loop also works fine, to add a field:
while (my $row = $csv->getline($infh)) {
splice #$row, 1, 0, "1GM";
$csv->print($outfh, $row);
}
I would like to combine these to work in the same program but I am having syntax nitemares.
Can anyone show some pointers to a perl novice?
Here is the full "test program":
#!/usr/bin/perl
use strict;
use warnings;
use Text::CSV;
use Cwd;
use File::Basename;
my $dirname = basename(getcwd);
my $input_file = $dirname . ".txt";
my $output_file = $dirname . '_extract.txt';
my $csv = Text::CSV->new({binary => 1, eol=> "\015\012"})
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open my $infh, $input_file or die "Cannot open '$input_file': $!";
open my $outfh, '>>', $output_file or die "Cannot open '$output_file': $!";
while (my $row = $csv->getline($infh)) {
splice #$row, 1, 0, "1GM";
$csv->print($outfh, $row);
}
close $infh;
close $outfh;
$csv->eof or die "Processing of '$input_file' terminated prematurely";
Added:
I would like the test program to also include the test for the text, somehow using;
if ($row->[16] ne "")
From the other program - So that the extra column is only added to the rows I want to keep.
Cannot figure out how to do this...
Added 2/18: This Worked!, but does not seem very elegant...
#!/usr/bin/perl
use strict;
use warnings;
use Text::CSV;
use Cwd;
use File::Basename;
my $dirname = basename(getcwd);
my $input_file = $dirname . ".txt";
my $output_file = $dirname . '_extract.txt';
my $column = '1GM'; # Text for new column to insert if a match
my $csv = Text::CSV->new({binary => 1, eol=> "\015\012"})
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open my $infh, $input_file or die "Cannot open '$input_file': $!";
open my $outfh, '>>', $output_file or die "Cannot open '$output_file': $!";
while (my $row = $csv->getline($infh)) {
if ($row->[16] ne "") # add/insert column if col[17] has text
{
splice #$row, 1, 0, "$column";
if ($row->[1] eq "$column") # print to file only rows with new column
{
$csv->print($outfh, $row);
}
}
}
close $infh;
close $outfh;
$csv->eof or die "Processing of '$input_file' terminated prematurely";
Added 2/18 5:40pm - Got it..This works!!
while (my $row = $csv->getline($infh)) {
splice #$row, 1, 0, "$column"; # add column
if ($row->[17] ne "") # print if test is true
{
$csv->print($outfh, $row);
}
}

<DATA> prevents foreach loop from being executed, why? :)

I have two nested foreach loops. If I use this code:
foreach (#directories) {
my $actual_directory = $_;
print "\nactual directory: ".$actual_directory."\n";
foreach (#files) {
my $file_name = $_;
my $actual_file = $actual_directory.$file_name;
print $actual_file."\n";
open(DATA, $actual_file) or die "Nelze otevřít zdrojový soubor: $!\n";
my $line_number = 0;
# while (<DATA>){
# my #znaky = split(' ',$_);
# my $poradi = $znaky[0]; #poradi nukleotidu
# my $hodnota = $znaky[1]; #hodnota
# my #temp = $files_to_sum_of_lines{$actual_file};
# $temp[$line_number] += $hodnota;
# $files_to_sum_of_lines{$actual_file} = #temp;
# $line_number+=1;
# }
# close(DATA);
}
}
I got this output:
actual directory: /home/n/Plocha/counting_files/1/
/home/n/Plocha/counting_files/1/a.txt
/home/n/Plocha/counting_files/1/b.txt
actual directory: /home/n/Plocha/counting_files/2/
/home/n/Plocha/counting_files/2/a.txt
/home/n/Plocha/counting_files/2/b.txt
However, if I uncomment "while (<DATA>){ }", I loose a.txt and b.txt, so the output looks like this:
actual directory: /home/n/Plocha/counting_files/1/
/home/n/Plocha/counting_files/1/a.txt
/home/n/Plocha/counting_files/1/b.txt
actual directory: /home/n/Plocha/counting_files/2/
/home/n/Plocha/counting_files/2/
/home/n/Plocha/counting_files/2/
How can this while (<DATA>) prevent my foreach from being executed?
Any help will be appreciated. Thanks a lot.
In addition to not using DATA, try using lexical loop variables, and lexical filehandles. Also, Perl's built-in $. keeps track of line numbers for you.
for my $actual_directory (#directories) {
print "\nactual directory: ".$actual_directory."\n";
foreach my $file_name (#files) {
my $actual_file = $actual_directory.$file_name;
print $actual_file."\n";
open my $INPUT, '<', $actual_file
or die "Nelze otevřít zdrojový soubor: $!\n";
while (my $line = <$INPUT>) {
my #znaky = split(' ', $line);
my $poradi = $znaky[0]; #poradi nukleotidu
my $hodnota = $znaky[1]; #hodnota
#temp = $files_to_sum_of_lines{$actual_file};
$temp[ $. ] += $hodnota;
$files_to_sum_of_lines{$actual_file} = #temp;
}
close $INPUT;
}
}
On the other hand, I can't quite tell if there is a logic error in there. Something like the following might be useful:
#!/usr/bin/perl
use warnings; use strict;
use Carp;
use File::Find;
use File::Spec::Functions qw( catfile canonpath );
my %counts;
find(\&count_lines_in_files, #ARGV);
for my $dir (sort keys %counts) {
print "$dir\n";
my $dircounts = $counts{ $dir };
for my $file (sort keys %{ $dircounts }) {
printf "\t%s: %d\n", $file, $dircounts->{ $file };
}
}
sub count_lines_in_files {
my $file = canonpath $_;
my $dir = canonpath $File::Find::dir;
my $path = canonpath $File::Find::name;
return unless -f $path;
$counts{ $dir }{ $file } = count_lines_in_file($path);
}
sub count_lines_in_file {
my ($path) = #_;
my $ret = open my $fh, '<', $path;
unless ($ret) {
carp "Cannot open '$path': $!";
return;
}
1 while <$fh>;
my $n_lines = $.;
close $fh
or croak "Cannot close '$path': $!";
return $n_lines;
}
Perl uses __DATA__ to make a pseudo-data file at the end of the package. You can access that using the filehandle DATA, e.g. <DATA>. Is it possible that your filehandle is conflicting? Try changing the filehandle to something else and see if it works better.