My code loops through multiple files in a directory, parses each file and appends the parsed content of each file to FinalVariantfile.txt.
The code works, but duplicates the content of each file.
When I ran the code with two files the output contained 4 files. Could someone please explain why this is happening and how to fix this?
#!/usr/bin/perl -w
use strict;
#directory structure
my $home = "/data/";
my $tsvdirectory = $home . "test_all_runs/" . $ARGV[0];
my $tsvfiles = $home . "test_all_runs/" . $ARGV[0] . "/tsv_files.txt";
my $FinalVariants = $home . "test_all_runs/" . $ARGV[0] . "/FinalVariantfile.txt";
my #tsvfiles = ();
my #currentlines = ();
my $currentline = '';
my $currentCNVline = '';
my #currentCNVlines = ();
my #HotSpotLines = ();
my #CNVLines = ();
# command to produce the vcf_files.txt file stored in each individual run
# directory; the file list includes solely vcf files which have not been
# previously prepared and/or annotated
my $cmd = `ls $tsvdirectory/FOCUS*\.tsv > $tsvfiles`;
# print "$cmd";
my $cmda = "ls $tsvdirectory/FOCUS*\.tsv > $tsvfiles";
# print "$cmda";
# this code opens the vcf_files.txt file and passes each line into an array for
# indidivudal manipulation
open( TXT2, "$tsvfiles" );
while ( <TXT2> ) {
push( #tsvfiles, $_ );
}
close(TXT2);
foreach ( #tsvfiles ) {
chop($_);
}
# this code then parses each of the files listed by name in the tsvfiles array
foreach ( #tsvfiles ) {
my $currenttsvfile = "$_"; # establishes the current file being manipulated
my $MDLfinaltsvfile = $currenttsvfile;
$MDLfinaltsvfile =~ s/\.tsv/_prepared\.txt/g;
# this series of variable calls names the various intermediate or
# final output files
my $MDLlinestsvfile = $currenttsvfile;
$MDLlinestsvfile =~ s/\.tsv/_withCNV\.txt/g;
my $Variantlinestsvfile = $currenttsvfile;
$Variantlinestsvfile =~ s/\.tsv/_HotSpot\.txt/g;
my $MDLtsvfile = $currenttsvfile;
$MDLtsvfile =~ s/\.tsv/_FilteredAllcolumns\.txt/g;
my $MDLsampleid = $currenttsvfile;
$MDLsampleid =~ s/\-oncogene.tsv//g;
print "The currentVCFis############# " . $currenttsvfile . "\n";
my #SampleID = ();
#SampleID = split /\//, $MDLsampleid;
print "The sampleIDis##############" . $SampleID[4] . "\n";
my $CNVdata = $currenttsvfile;
$CNVdata =~ s/\.tsv/_cnv\.txt/g;
my $FinalCNVdata = $currenttsvfile;
$FinalCNVdata =~ s/\.tsv/_finalcnv\.txt/g;
my $cmd2 = `fgrep -v "#" $currenttsvfile > $MDLlinestsvfile`;
print "$cmd2"; # this code extracts from the current vcf file all of the
# lines of data and outputs them into a separate file
my $cmd5 = `grep -vwE "(CNV|intronic|synonymous|utr_3|utr_5)"
#removes lines that contain CNV/intronic/synonymous/utr_3/utr_5"
$MDLlinestsvfile > $Variantlinestsvfile`;
print "$cmd5";
open( my $fh_in, '<', $Variantlinestsvfile )
or die "cannot open $Variantlinestsvfile: $!\n";
#removes lines that contain 0/0 and ./. genotypes from field 70.
open( my $fh_out, '>', $MDLtsvfile )
or die "cannot open $MDLtsvfile: $!\n";
while ( my $line = <$fh_in> ) {
# tab/field-based:
my #fields = split( /\s+/, $line );
print $fh_out $line unless ( $fields[70] =~ m|([0.])/\1| );
}
close($fh_in);
close($fh_out);
#open each filtered file with all columns and pushes it into array.
open( TXT2, "$MDLtsvfile" );
while (<TXT2>) {
push( #HotSpotLines, $_ );
}
close(TXT2);
foreach (#HotSpotLines) {
chop($_);
my #HotSpotEntries = ();
my $currentMDLline = $_;
#HotSpotEntries = split( /\t/, $currentMDLline );
my $chr = $HotSpotEntries[9];
my $position = $HotSpotEntries[10];
my $cosmicids = $HotSpotEntries[21];
my $refforward = $HotSpotEntries[67];
my $genotype = $HotSpotEntries[70];
my $altforward = $HotSpotEntries[77];
my $altreverse = $HotSpotEntries[78];
my $cDNA = $HotSpotEntries[81];
my $exon = $HotSpotEntries[83];
my $conseq = $HotSpotEntries[84];
my $location = $HotSpotEntries[88];
my $geneclass = $HotSpotEntries[92];
my $aachange = $HotSpotEntries[98];
my $transcript = $HotSpotEntries[100];
$currentline
= $SampleID[4] . "\t"
. $chr . "\t"
. $position . "\t"
. $cosmicids . "\t"
. $refforward . "\t"
. $refreverse . "\t"
. $genotype . "\t"
. $altforward . "\t"
. $altreverse . "\t"
. $cDNA . "\t"
. $exon . "\t"
. $conseq . "\t"
. $location . "\t"
. $geneclass . "\t"
. $aachange . "\t"
. $transcript;
# print "The currentVCFlineis ".$currentline."\n";
push( #currentlines, $currentline );
}
my $i;
for ( $i = 0; $i < #currentlines; $i += 1 ) {
my $currentguiline = $currentlines[$i];
my $cmd5 = `echo "$currentguiline" >> $FinalVariants`;
print "$cmd5";
#my $cmd9 = `sed -i '1i$SampleID[4]' $FinalVariants`; print $cmd9;
}
}
There is no need to start so many new shell subprocesses to do such basic operations. ls, fgrep, grep and echo all have equivalents in Perl, and especially calling echo for each line of text is a very poor way of copying one file to another
I suspect that your problem is because of the line
my $cmd5 = `echo "$currentguiline" >> $FinalVariants`;
which will append each element of #currentlines to the end of the file. So the first time you run your program it will contain a single copy of the result, but every subsequent run will just add more data to the end of your file and it will keep growing
I hate to offer a hack to get things working, but it would take me ages to understand what your program is doing behind all the confusion and write a proper concise version. You can fix it temporarily by adding the line
unlink $FinalVariants or die $!;
before the foreach ( #tsvfiles ) { ... } loop. This will delete the file and ensure that a new version is created for each execution of your program.
Okay, I've studied your code carefully and I think this will do what you intend. Without any data or even file name samples I've been unable to test it beyond making sure that it compiles, so it will be a miracle if it works first time, but I believe it's the best chance you have of getting a coherent solution
Note that there's a problem with $refreverse that you use in your own code but never declare or define it, so there's no way that the code you show will create the problem you say it does because it dies during compilation with the error message
Global symbol "$refreverse" requires explicit package name
I've guessed that it's right after $ref_forward at index 68
Please report back about how well this functions
#!/usr/bin/perl
use strict;
use warnings 'all';
my $home = "/data";
my $tsv_directory = "$home/test_all_runs/$ARGV[0]";
my $final_variants = "$tsv_directory/final_variant_file.txt";
open my $out_fh, '>', $final_variants
or die qq{Unable to open "$final_variants" for output: $!};
my #tsv_files = glob "$tsv_directory/FOCUS*.tsv";
for my $tsv_file ( #tsv_files ) {
print "The current VCF is ############# $tsv_file\n";
$tsv_file =~ m|([^/]+)-oncogene.tsv$| or die "Cant extract Sample ID";
my $sample_id = $1;
print "The sample ID is ############## $sample_id\n";
open my $in_fh, '<', $tsv_file
or die qq{Unable to open "$tsv_file" for input: $!};
while ( <$in_fh> ) {
next if /^#/;
next if /\b(?:CNV|intronic|synonymous|utr_3|utr_5)\b/;
my #fields = split;
next if $fields[70] eq '0/0' or $fields[70] eq './.';
my #wanted = ( 9, 10, 21, 67, 68, 70, 77, 78, 81, 83, 84, 88, 92, 98, 100 );
my $current_line = join "\t", #fields[#wanted];
print $out_fh $current_line, "\n";
}
}
Related
I would like to check one of the file's values contains on another file. if one of the value contains it will show there is existing bin for that specific, if no, it will show there is no existing bin limit. the problem is I am not sure how to check all values at once.
first DID1 text file value contain :
L84A:D:O:M:
L84C:B:E:D:
second DID text file value contain :
L84A:B:E:Q:X:F:i:M:Y:
L84C:B:E:Q:X:F:i:M:Y:
L83A:B:E:Q:X:F:i:M:Y:
if first 4words value are match, need to check all value for that line.
for example L84A in first text file & second text file value has M . it should print out there is an existing M bin
below is my code :
use strict;
use warnings;
my $filename = 'DID.txt';
my $filename1 = 'DID1.txt';
my $count = 0;
open( FILE2, "<$filename1" )
or die("Could not open log file. $!\n");
while (<FILE2>) {
my ($number) = $_;
chomp($number);
my #values1 = split( ':', $number );
open( FILE, "<$filename" )
or die("Could not open log file. $!\n");
while (<FILE>) {
my ($line) = $_;
chomp($line);
my #values = split( ':', $line );
foreach my $val (#values) {
if ( $val =~ /$values1[0]/ ) {
$count++;
if ( $values[$count] =~ /$values1[$count]/ ) {
print
"Yes ,There is an existing bin & DID\n #values1\n";
}
else {
print "No, There is an existing bin & DID\n";
}
}
}
}
}
I cannot check all value. please help to give any advice on it since this is my first time learning for perl language. Thanks a lot :)
Based on my understanding I write this code:
use strict;
use warnings;
#use ReadWrite;
use Array::Utils qw(:all);
use vars qw($my1file $myfile1cnt $my2file $myfile2cnt #output);
$my1file = "did1.txt"; $my2file = "did2.txt";
We are going to read both first and second files (DID1 and DID2).
readFileinString($my1file, \$myfile1cnt); readFileinString($my2file, \$myfile2cnt);
In first file, as per the OP's request the first four characters should be matched with second file and then if they matched we need to check rest of the characters in the first file with the second one.
while($myfile1cnt=~m/^((\w){4})\:([^\n]+)$/mig)
{
print "<LineStart>";
my $lineChk = $1; my $full_Line = $3; #print ": $full_Line\n";
my #First_values = split /\:/, $full_Line; #print join "\n", #First_values;
If the first four digit matched then,
if($myfile2cnt=~m/^$lineChk\:([^\n]+)$/m)
{
Storing the rest of the content in the same and to be split with colon and getting the characters to be matched with first file contents.
my $FullLine = $1; my #second_values = split /:/, $FullLine;
Then search each letter first and second content which matched line...
foreach my $sngletter(#First_values)
{
If the letters are matched with first and second file its going to be printed.
if( grep {$_ eq "$sngletter"} #second_values)
{
print "Matched: $sngletter\t";
}
}
}
else { print "Not Matched..."; }
This is just information that the line end.
print "<LineEnd>\n"
}
#------------------>Reading a file
sub readFileinString
#------------------>
{
my $File = shift;
my $string = shift;
use File::Basename;
my $filenames = basename($File);
open(FILE1, "<$File") or die "\nFailed Reading File: [$File]\n\tReason: $!";
read(FILE1, $$string, -s $File, 0);
close(FILE1);
}
Read search pattern and data into hash (first field is a key), then go through data and select only field included into pattern for this key.
use strict;
use warnings;
use feature 'say';
my $input1 = 'DID1.txt'; # look for key,pattern(array)
my $input2 = 'DID.txt'; # data - key,elements(array)
my $pattern;
my $data;
my %result;
$pattern = file2hash($input1); # read pattern into hash
$data = file2hash($input2); # read data into hash
while( my($k,$v) = each %{$data} ) { # walk through data
next unless defined $pattern->{$k}; # skip those which is not in pattern hash
my $find = join '|', #{ $pattern->{$k} }; # form search pattern for grep
my #found = grep {/$find/} #{ $v }; # extract only those of interest
$result{$k} = \#found; # store in result hash
}
while( my($k,$v) = each %result ) { # walk through result hash
say "$k has " . join ':', #{ $v }; # output final result
}
sub file2hash {
my $filename = shift;
my %hash;
my $fh;
open $fh, '<', $filename
or die "Couldn't open $filename";
while(<$fh>) {
chomp;
next if /^\s*$/; # skip empty lines
my($key,#data) = split ':';
$hash{$key} = \#data;
}
close $fh;
return \%hash;
}
Output
L84C has B:E
L84A has M
I have a input and list file like this:
input.txt file:
>gi|NP_415931.4
MTEQQKLTFTALQQRLDSLMLRDRLRFSRRLHGVKKVKNPDAQQAIFQEMAKEIDQAAGKVLLREAARPEITYPD
>gi|NP_418770.2
MMNKSNFEFLKGVNDFTYAIACAAENNYPDDPNTTLIKMRMFGEATAKHLGLL
>gi|YP_026226.4
MRKFTLNIFTLSLGLAVMPMVEAAPTAQQQLLEQVRLGEATHREDLVQQSLYRLELIDPNNPDVVAARFRSLLRQGDIDGAQKQ
list.txt file:
NP_415931.4: 1-5, 6-8
YP_026226.4: 3-7, 9-9, 10, 12-15
Now, for this time, I want a csv formatted output.csv (with certain added header) as (for the above inputs):
ID,Regions,Length,Sequences
NP_415931.4,1-5,5,MTEQQ
,6-8,3,KLT
YP_026226.4,3-7,5,KFTLN
,9-9,1,F
,10,1,T
,12-15,4,SLGL
that is, it first match the list file headers with those of input files and the matched once's sequences are taken and then it gives the output arranging in the above format.
the excel view of the output.csv would be:
How can I generate the above output.csv file from those inputs?
Thanks
Here is an approach. To summarize: We have a master database file input.txt with all defined sequences. Our job is to extract certain information from this database and write it to a CSV file. The information about what to extract is given in file list.txt.
use feature qw(say);
use strict;
use warnings;
my $input_fn = 'input.txt';
open ( my $fh1, '<', $input_fn ) or die "Could not open file '$input_fn': $!";
my %seqs;
while( my $line = <$fh1> ) {
my ($id ) = $line =~ /gi\|(.*)$/;
chomp( my $seq = <$fh1> );
$seqs{$id} = $seq;
}
close $fh1;
say join ',', qw(ID Regions Length Sequences);
my $list_fn = 'list.txt';
open ( my $fh2, '<', $list_fn ) or die "Could not open file '$list_fn': $!";
while( my $line = <$fh2> ) {
chomp $line;
my ( $id, #regions ) = split /[:,]\s?/, $line;
for my $i (0..$#regions) {
my $region = $regions[$i];
my $start = my $end = $region;
if ( $region =~ /(\d+)-(\d+)/ ) {
$start = $1;
$end = $2;
}
my $name = ($i == 0) ? $id : "";
my $seq = substr( $seqs{$id}, $start - 1, $end - $start + 1);
say join ',', $name, $region, length( $seq ), $seq;
}
}
close $fh2;
Output:
ID,Regions,Length,Sequences
NP_415931.4,1-5,5,MTEQQ
,6-8,3,KLT
YP_026226.4,3-7,5,KFTLN
,9-9,1,F
,10,1,T
,12-15,4,SLGL
I am trying to write file rename Perl script, for reducing manual efforts. Manually I open the pdf file, copy the title and rename the file name according to the title.
I am writing below code to rename the pdf according to the file title. e.g. SPE-180024-MS is title and pdf should be renamed to that
According to my logic it should rename the file, but the output is not proper
#!/usr/bin/perl
use strict;
#use warnings;
use Cwd;
use File::Basename;
#use File::Copy;
use File::Find;
use PDF::API2;
use CAM::PDF;
my $path1 = getcwd;
open( F6, ">Ref.txt" );
opendir( DIR, $path1 ) or die $!;
my #dots = grep /(.*?)\-(MS)$/, readdir(DIR);
closedir(DIR);
my #file;
my #files;
my $check;
my $err_1;
my $err_2;
my $err_3;
foreach my $file (#dots) {
#print F6 $file."\n";
opendir DIR1, $file or die "Can't open $file: $!";
my #files = sort grep { -f "$file/$_" } readdir DIR1;
my $data1 = join( ",", <#files> );
closedir DIR1;
#print F6 #files."\n";
my $a = #files;
if ($data1 =~ m#(((\w+)\-(\d+)\-MS)\.(pdf))#
#&& $data1=~m#((\w+)\-(\d+)\-MS\.(xml))#) #((.*?)\.xml)#
) {
my $check = $2;
#print F6 $1."\n";
if ( $data1 =~ m#(((\w+)\-(\d+)\-MS)\.(xml))# ) {
my $check1 = $2;
my $first = $1;
if ( $check eq $file || $check1 eq $file ) {
}
else {
#print F6 $file."\tDIFFERENT FILE PRESENT\n";
}
}
}
foreach my $f1 ( glob("$file/*.xml") ) {
#print F6 $f1."\n";
open( FH, '<', $f1 ) or die "Cannot open file: $f1";
my $data2 = join( "", <FH> );
#print F6 $data2."\n";
close FH;
if ( $data2 =~ m#(<page-count count="(\d+)"/>)# ) {
my $page = $2;
#print F6 $f1."\t".$1."\n";
if ( $f1 =~ m#(.*?)-MS/((.*?)-MS)#s
#SPE-173391-MS/SPE-173393-MS #(.*?)\.(.*?)$/s)
) {
my $f11 = $2;
#print F6 $f11."\n";
if ( $file eq $f11 ) {
}
else {
$err_1
= $err_1
. $file . "\t"
. $f11
. "\tDIFFERENT XML FILE PRESENT\n";
#print F6 $file."\t".$f11."\tDIFFERENT XML FILE PRESENT\n";
#print F6 $file."\tDIFFERENT XML FILE PRESENT\n";
}
foreach my $f2 ( glob("$file/*.pdf") ) {
open( F2, "<$f2" ) or die "Cannot open file: $f2";
my $data = join( "", <F2> );
close F2;
my $xml_list = $data;
my $pdf = PDF::API2->open($f2);
my $pages = $pdf->pages;
#print F6 $f2."\t".$pages."\n";
if ($f2 =~ m#(.*?)-MS/((.*?)-MS)#
#/(.*?)\.(.*?)$/s
) {
my $f21 = $2;
if ( $file eq $f21 ) {
}
else {
$err_2
= $err_2
. $file . "\t"
. $f21
. "\tDIFFERENT PDF FILE PRESENT\n";
#print F6 $file."\t".$f21."\tDIFFERENT PDF FILE PRESENT\n";
}
while ( $f11 =~ m/$f21/gs ) {
if ( $page !~ m#$pages#s ) {
$err_3
= $err_3
. $f1 . "\t"
. $page . "\t"
. $f2 . "\t"
. $pages . "\n";
#print F6 $f1."\t".$page."\t".$f2."\t".$pages."\n";
$data2 =~ s#<page-count count="$page"\/>#<page-count count="$pages"\/>#gs;
open( FH, '>', $f1 ) or die "Cannot open file: $f1";
print FH $data2 . "\n";
close FH;
}
}
}
}
}
}
}
}
close F6;
This is the document. The marked heading is what I want.
You cannot just open a PDF file and operate on it. It's different from a text file so it has to be parsed.
You can use CAM::PDF. It will convert your pdf to text which can be later analysed to get the title.
The links provided above covers enough stuff to get your job done. I am reproducing some relevant stuff here
use CAM::PDF;
my $pdf = CAM::PDF->new('test1.pdf');
$pageNum = 1
my $page1 = $pdf->getPageContent(pageNum);
The variable page1 will have the contents of page specified by pageNum variable. Rest is a matter of extracting the required information.
If you find converting the entire pdf to text then you can use getpdftext.pl which is a part of CAM::PDF however that's inefficient compared to reading a single page.
PDFs usually have a bunch of metadata, among them is the document title. If you're lucky, you will find the desired PDF title in there. A Perl example using PDF::API2 and its info method:
use autodie;
use Modern::Perl;
use PDF::API2;
my $file = '/your/sample/file.pdf';
my $pdf = PDF::API2->open( $file );
my %pdf_info = $pdf->info;
my $title = $pdf_info{Title};
my $renamed_dir = '/some/where/else/';
if ( $title ) {
my $new_name = $renamed_dir . $title;
if ( -f $new_name ) {
warn "File $new_name already exists, move it out of the way!";
} else {
$pdf->saveas( $new_name );
}
} else {
warn "No title found in document info.";
}
If you need to use some part of the text, then you should convert it to text first. Since you failed to mention any OS restrictions you get a Debian/Ubuntu solution for that. First, install the package poppler-utils. Then use the freshly installed tool pdftotext to extract all the text from the PDF. It might be a good idea to use pdftotext -layout. From the resulting text you will have to grep/parse the line with your "title", and then use that to rename (or much safer: copy) the PDF.
I have a problem with this code. It matches the directories . and .. but not the others. Can you explain me why?
my $Checked_directory = $Tested_directory . '\Sources';
opendir(Checked_directory, $Checked_directory)
or print STDOUT "\n 101 - Cant open $Checked_directory: $!\n";
#files = readdir(Checked_directory); #etablished the list of file in the checked folder
foreach my $fileToTest (#files) {
print $fileToTest . "\n"; #debug
if ($fileToTest =~ m/.jpg$/) {
print 'file... ' . $fileToTest . "\n";
$Localisation_file = $Tested_directory . '\Sources\\' . $fileToTest; #file to test
}
elsif (-d $fileToTest) {
print ">>>>" . $fileToTest . "\n";
}
}
closedir(Checked_directory);
The names in #files, as you seem to be aware, don't have any path information, which means the -d looks for them in the current working directory, fails to find them, and reports false. The same would happen with an -e (exists) test.
You should alter your code like this. Note that identifiers that start with capital letters are reserved for global identifiers. It is also a bad idea to use bareword file and directory handles -- lexical handles are current best practice.
my $checked_directory = "$tested_directory\\Sources";
opendir $checked_dh, $checked_directory
or print STDOUT "\n 101 - Cant open $checked_directory: $!\n";
while (my $file_to_test = readdir $checked_dh) {
print "$file_to_test\n";
my $localisation_file = "$tested_directory\\Sources\\$file_to_test";
if (-d $localisation_file) {
print ">>>> $file_to_test\n";
}
elsif ($file_to_test =~ /\.jpg$/) {
print "file... $file_to_test\n";
}
}
closedir $checked_dh;
OK, so a quick test on your code - I've mocked up a directory structure. Looks like you're 'doing it windows style?'
I'm using this:
use strict;
use warnings;
my $Tested_directory = 'C:\\temp';
my $Checked_directory = $Tested_directory . '\Sources';
opendir( Checked_directory, $Checked_directory )
or print STDOUT "\n 101 - Cant open $Checked_directory: $!\n";
my #files = readdir(Checked_directory)
; #etablished the list of file in the checked folder
foreach my $fileToTest (#files) {
print "File: $fileToTest \n"; #debug
if ( $fileToTest =~ m/.jpg$/ ) {
print 'file... ' . $fileToTest . "\n";
my $Localisation_file =
$Tested_directory . '\Sources\\' . $fileToTest; #file to test
}
else {
print "\n$fileToTest is not a jpg\n";
}
if ( -d $fileToTest ) {
print ">>>>" . $fileToTest . "\n";
}
}
closedir(Checked_directory);
And I've created a directory structure looking like this:
Directory of C:\temp\Sources
22/01/2015 13:07 <DIR> .
22/01/2015 13:07 <DIR> ..
22/01/2015 13:07 0 notajpg.jpg
22/01/2015 13:04 <DIR> SubDir
22/01/2015 13:07 6 wibble.txt
2 File(s) 6 bytes
And indeed - my 'SubDir' doesn't get >>>> prefixed in the output.
File: .
>>>>.
File: ..
>>>>..
File: notajpg.jpg
file... notajpg.jpg
File: SubDir
File: wibble.txt
So your .jpg bit is working fine. But where '.' and '..' pick up as dirs, 'SubDir' does not.
The reason seems to be related to path. . and .. exist in every directory, so that test will work regardless of script path.
You need to test against ( -d "$Checked_directory\\$fileToTest" )
Giving:
use strict;
use warnings;
my $Tested_directory = 'C:\\temp';
my $Checked_directory = $Tested_directory . '\Sources';
opendir( Checked_directory, $Checked_directory )
or print STDOUT "\n 101 - Cant open $Checked_directory: $!\n";
my #files = readdir(Checked_directory)
; #etablished the list of file in the checked folder
foreach my $fileToTest (#files) {
print "File: $fileToTest \n"; #debug
if ( $fileToTest =~ m/.jpg$/ ) {
print 'file... ' . $fileToTest . "\n";
my $Localisation_file =
$Tested_directory . '\Sources\\' . $fileToTest; #file to test
}
elsif ( -d "$Checked_directory\\$fileToTest" ) {
print ">>>>" . $fileToTest . "\n";
}
}
closedir(Checked_directory);
Buiding on from this question. I am still having syntax trouble with this script:
use strict;
use warnings;
use autodie; # this is used for the multiple files part...
#START::Getting current working directory
use Cwd qw();
my $source_dir = Cwd::cwd();
#END::Getting current working directory
print "source dir -> $source_dir\n";
my $output_prefix = 'format_';
#print "dh -> $dh\n";
opendir my $dh, $source_dir; #Changing this to work on current directory; changing back
# added the "()" here ($dh) as otherwise an error
for my $file (readdir($dh)) {
next if $file !~ /\.csv$/;
next if $file =~ /^\Q$output_prefix\E/;
my $orig_file = "$source_dir/$file";
my $format_file = "$source_dir/$output_prefix$file";
# .... old processing code here ...
## Start:: This part works on one file edited for this script ##
#open my $orig_fh, '<', 'orig.csv' or die $!; #line 14 and 15 above already do this!!
#open my $format_fh, '>', 'format.csv' or die $!;
print "format_file-> $format_file\n";
#print $format_fh scalar <$orig_fh>; # Copy header line #orig needs changeing
print {$format_file} scalar <$orig_file>; # Copy header line
my %data;
my #labels;
#while (<$orig_fh>) { #orig needs changing
while (<$orig_file>) {
chomp;
my #fields = split /,/, $_, -1;
my ($label, $max_val) = #fields[1,12];
if ( exists $data{$label} ) {
my $prev_max_val = $data{$label}[12] || 0;
$data{$label} = \#fields if $max_val and $max_val > $prev_max_val;
}
else {
$data{$label} = \#fields;
push #labels, $label;
}
}
for my $label (#labels) {
#print $format_fh join(',', #{ $data{$label} }), "\n"; #orig needs changing
print $format_file join(',', #{ $data{$label} }), "\n";
}
## END:: This part works on one file edited for this script ##
}
I can fix this line opendir my $dh, $source_dir; by adding brackets ($dh)
but i am still having trouble with this line print {$format_file} scalar <$orig_file>; # Copy header line line
I get the following error:
Can't use string ("/home/Kevin Smith/Perl/format_or"...) as a symbol ref while "strict refs" in use at formatfile_QforStackOverflow.pl line 29.
Can anyone advise?
I have tried using advise here but there is not much joy.
Use print $format_file ... or print ${format_file} ...
However $format_file is just a string containing the name of the file, not a filehandle. You have to open the file:
open my $format_fh, '>', $format_file or die $!;
...
print $format_$fh ... ;