perl match lines from one fine to another file then output the current line and the next line to a new file [closed] - perl

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 9 years ago.
Improve this question
If any of you could modify the code so that the sequence names in file 1 are searched within file 2, and if there is a match, the lines in file 1 and its next line are copied to an outfile. right now the code only copies the matched titles but not its next line which is the sequence to the outfile. thanks
for example:
FILE 1 :
SEQUENCE 1 NAME
SEQUENCE 2 NAME
SEQUENCE 3 NAME
FILE 2:
SEQUENCE 1 NAME
AGTCAGTCAGTCAGTCAGTC
SEQUENCE 2 NAME
AAGGGTTTTCCCCCCAAAAA
SEQUENCE 3 NAME
GGGGTTTTTTTTTTAAAAAC
SEQUENCE 4 NAME
AAGTCCCCCCCCCCAAGGTT
etc.
OUTFILE:
SEQUENCE 1 NAME
AGTCAGTCAGTCAGTCAGTC
SEQUENCE 2 NAME
AAGGGTTTTCCCCCCAAAAA
SEQUENCE 3 NAME
GGGGTTTTTTTTTTAAAAAC
code:
use strict;
use warnings;
my $f1 = 'FILE1.fasta';
open FILE1, "$f1" or die "Could not open file \n";
my $f2= 'FILE2.fasta';
open FILE2, "$f2" or die "Could not open file \n";
my $outfile = $ARGV[1];
my #outlines;
my $n=0;
foreach (<FILE1>) {
my $y = 0;
my $outer_text = $_ ;
seek(FILE2,0,0);
foreach (<FILE2>) {
my $inner_text = $_;
if($outer_text eq $inner_text) {
print "$outer_text\n";
push(#outlines, $outer_text);
$n++;
}
}
}
open (OUTFILE, "sequences.fasta") or die "Cannot open $outfile \ +n";
print OUTFILE #outlines;
close OUTFILE;

For very large FILE1, %seen hash could be tied to some of DBM storage,
use strict;
use warnings;
my $f1 = 'FILE1.fasta';
open FILE1, "<", $f1 or die $!;
my $f2 = 'FILE2.fasta';
open FILE2, "<", $f2 or die $!;
# my $outfile = $ARGV[1];
open OUTFILE, ">", "sequences.fasta" or die $!;
my %seen;
while (<FILE1>) {
$seen{$_} = 1;
}
while (<FILE2>) {
my $next_line = <FILE2>;
if ($seen{$_}) {
print OUTFILE $_, $next_line;
}
}
close OUTFILE;

I would put the contents of file 2 into a hash, then check if each record from file 1 was in the hash:
#!perl
use strict;
use warnings;
my $f2= 'FILE2.fasta';
open FILE2, "$f2" or die "Could not open file \n";
my $k;
my $v;
my %hash;
while (defined($k = <FILE2>)) {
chomp $k;
$v = <FILE2>;
$hash{$k} = $v;
}
my $f1 = 'FILE1.fasta';
open FILE1, "$f1" or die "Could not open file \n";
open (OUTFILE, ">sequences.fasta") or die "Cannot open seqeneces.fasta\n";
while (<FILE1>) {
chomp;
if (exists($hash{$_})) {
print OUTFILE "$_\n";
print OUTFILE "$hash{$_}\n";
}
}
close OUTFILE;

Related

I want to write multiple files from one file without using array to remove complexity

I want to write multiple files from one file (getting latest data every time) without using array to remove complexity. I already tried it using array but when data is high than it will slow down the process.
Kindly give some hint to me how I will remove the complexity of the program.
Input: read a text file from a directory.
Output:
File1.pl - 1 2 3 4 5 6
File2.pl - 6 7 8 9 10
File3.pl -11 12 13 14 15
File4.pl -16 17 18 19 20
I do this using array:
use feature 'state';
open (DATA,"<","e:/today.txt");
#array=<DATA>;
$sizeofarray=scalar #array;
print "Total no. of lines in file is :$sizeofarray";
$count=1;
while($count<=$sizeofarray)
{
open($fh,'>',"E:/e$count.txt");
print $fh "#array[$count-1..($count+3)]\n";
$count+=5;
}
Store lines in a small buffer, and open a file every fifth line and write the buffer to it
use warnings;
use strict;
use feature 'say';
my $infile = shift || 'e:/today.txt';
open my $fh_in, '<', $infile or die "Can't open $infile: $!";
my ($fh_out, #buf);
while (<$fh_in>) {
push #buf, $_;
if ($. % 5 == 0) {
my $file = 'e' . (int $./5) . '.txt';
open $fh_out, '>', $file or do {
warn "Can't open $file: $!";
next;
};
print $fh_out $_ for #buf;
#buf = ();
}
}
# Write what's left over, if any, after the last batch of five
if (#buf) {
my $file = 'e' . ( int($./5)+1 ) . '.txt';
open $fh_out, '>', $file or die "Can't open $file: $!";
print $fh_out $_ for #buf;
}
As I observed from your code You can try this
use warnings;
use strict;
open (my $fh,"<","today.txt") or die "Error opening $!";
my $count = 1;
while(my $line = <$fh>)
{
open my $wh,'>',"e$count.txt" or die "Error creating $!";
print $wh $line;
for(1..4){
if(my $v = scalar <$fh>){
print $wh $v ;
}
else{
last ;
}
}
$count++;
}

Getting an error as "No such file or directory" in perl

Here is my code, I am passing the files with subroutine. From subroutine i am not able to open the file. and it is throwing an error a
"Couldn't open
inputFiles/Fundamental.FinancialLineItem.FinancialLineItem.SelfSourcedPublic.SHE.1.2017-01-11-2259.Full.txt
: No such file or directory at Practice_Dubugg.pl line 40."
use strict;
use warnings;
use Getopt::Std;
use FileHandle;
my %opts;
my $optstr = "i:o:";
getopts("$optstr", \%opts);
if($opts{i} eq '' || $opts{o} eq '' )
{
print "usage: perl TextCompare_Fund.pl <-i INPUTFILE> <-o MAPREDUCE OUTPUTFILE>\n";
die 1;
}
my $inputFilesPath=$opts{i};
my $outputFilesPath=$opts{o};
my #ifiles=`ls $inputFilesPath`;
my #ofiles=`ls $outputFilesPath`;
foreach my $ifile (#ifiles)
{
my $ifile_substr=substr("$ifile",0,-25);
foreach my $ofile (#ofiles)
{
my $ofile_substr=substr("$ofile",0,-21);
my $result=$ifile_substr cmp $ofile_substr;
if($result eq 0)
{
print "$result\n";
#print "$ifile\n";
compare($ifile,$ofile)
}
}
}
sub compare
{
my $afile="$_[0]";
my $bfile="$_[1]";
my $path1="$inputFilesPath/$afile";
my $path2="$outputFilesPath/$bfile";
#open FILE, "<", $path1 or die "$!:$path1";
open my $infile, "<", $path1 or die "Couldn't open $path1: $!";
my %a_lines;
my %b_lines;
my $count1=0;
while (my $line = <$infile>)
{
chomp $line;
$a_lines{$line} = undef;
$count1=$count1+1;
}
print"File1 records count : $count1\n";
close $infile;
my $file=substr("$afile",0,-25);
my $OUTPUT = "/hadoop/user/m6034690/Kishore/$file.comparision_result";
open my $outfile, "<", $path2 or die "Couldn't open $path2: $!";
open (OUTPUT, ">$OUTPUT") or die "Cannot open $OUTPUT \n";
my $count=0;
my $count2=0;
while (my $line = <$outfile>)
{
chomp $line;
$b_lines{$line} = undef;
$count2=$count2+1;
next if exists $a_lines{$line};
$count=$count+1;
print OUTPUT "$line \t===> The Line which is selected from file2/arg2 is mismatching/not available in file1\n";
}
print "File2 records count : $count2\n";
print "Total mismatching/unavailable records in file1 : $count\n";
close $outfile;
close OUTPUT;
}
try by adding the following lines after te below comment.
my $afile="$_[0]";
my $bfile="$_[1]";
my $path1="$inputFilesPath/$afile";
my $path2="$outputFilesPath/$bfile";
#comment, add the below chomps right under the above portion.
chomp $path1;
chomp $path2;
My test works as the path is now properly formatted.
my result:
File1 records count : 1
File2 records count : 1
Total mismatching/unavailable records in file1 : 1
It took a while to figure this issue out, but it was a bit confusing as the script says usage is -i INPUTFILE and -o OUTPUTFILE
This tells me I should add a file path and not path to folders, but regardless, issue should be resolved.
Edit:
an even better option, We should add the chomp where the ls occurs.
chomp( my #ifiles=`ls $inputFilesPath` );
chomp( my #ofiles=`ls $outputFilesPath` );

How can I merge some columns of two files using perl?

I want to merge the first column of input1.txt and the third column of input2.txt. How can I do it? My code doesn't do what I want.
Input1:
1 6
2 7
3 8
4 9
Input2:
a 4 8
b 6 7
c 3 4
d 2 6
Requested output:
1 8
2 7
3 4
4 6
My code:
#!/usr/bin/perl
use strict;
use warnings;
open my $input1, '<', "input1.txt" or die qq{Failed to open "input1.txt" for writing: $!};
open my $input2, '<', "input2.txt" or die qq{Failed to open "input2.txt" for writing: $!};
open my $outfile, '>', "outfile.txt" or die qq{Failed to open "outfile.txt" for writing: $!};
while(<$input1>)
{
my #columns1 = split;
print $outfile join("\t", $columns1[0], "\n");
}
while(<$input2>)
{
my #columns2 = split;
print $outfile join("\t", $columns2[2], "\n");
}
close($input1);
close($input2);
close($outfile);
Another way to get the requested output is to use one while loop instead of two:
mod.pl
#!/usr/bin/perl
use strict;
use warnings;
open my $input1, '<', "input1.txt" or die qq{Failed to open "input1.txt" for writing: $!};
open my $input2, '<', "input2.txt" or die qq{Failed to open "input2.txt" for writing: $!};
open my $outfile, '>', "outfile.txt" or die qq{Failed to open "outfile.txt" for writing: $!};
while(my $l1 = <$input1>){
my $l2 = <$input2>;
chomp $l1;
chomp $l2;
my #columns1 = split(/ /, $l1);
my #columns2 = split(/ /, $l2);
print $outfile join("\t", $columns1[1-1], $columns2[3-1]),"\n";
}
close($input1);
close($input2);
close($outfile);
#$ perl mod.pl
#$ cat outfile.txt
1 8
2 7
3 4
4 6
Do this:
$filename1 = $ARGV[0]; #for taking input1.txt as the first argument
$filename2 = $ARGV[1]; #for taking input2.txt as the second argument
#data1;
#column1;
open(INPUT_FILE, $filename1)
or die "Couldn't open $filename1!";
while (<INPUT_FILE>) {
my $currentLine = $_; #read the input file one line at a time, storing it to $currentLine
#data1 = split " ", $currentLine; #split your line by space
$firstcolumn = $data1[0]; #store the first column's data
push #column1, $firstcolumn ; #push the first column's data into an array
}
#data2;
#column3;
open(INPUT_FILE, $filename2)
or die "Couldn't open $filename2!";
while (<INPUT_FILE>) {
my $currentLine = $_;
#data2 = split " ", $currentLine;
$thirdcolumn = $data2[2]; #store the third column's data
push #column3, $thirdcolumn ;
}
$size = #column1;
open (OUTPUTFILE, '>>outfile.txt');
for($i = 0; $i < $size; $i++){
print OUTPUTFILE "$column1[$i] $column3[$i]\n"; #writing each entry into the outfile.txt
}
close(INPUT_FILE);
close (OUTPUTFILE);
And when you run your perl program in command line, do:
yourprogram.pl input1.txt input2.txt outfile.txt
And it should work.
I tried the program and opened the outfile.txt and your requested output is in there.
Your code print serially, but you need is parallel
#!/usr/bin/perl
use strict;
use warnings;
open my $input1, '<', "input1.txt" or die qq{Failed to open "input1.txt" for writing: $!};
open my $input2, '<', "input2.txt" or die qq{Failed to open "input2.txt" for writing: $!};
open my $outfile, '>', "outfile.txt" or die qq{Failed to open "outfile.txt" for writing: $!};
my ($line1, $line2);
while(1)
{
$line1 = <$input1> || '';
$line2 = <$input2> || '';
my #columns1 = split ' ', $line1;
my #columns2 = split ' ', $line2;
print $outfile join("\t", $columns1[0], $columns2[2]), "\n";
last if !$line1 && !$line2;
}
close($input1);
close($input2);
close($outfile);
It doesn't have to be this complicated. Read the first file's first column in an array and print it along with the third field of second file. Unless you have files with different number of rows, this should work just fine.
perl -lane'
BEGIN { $x = pop; #col1 = map { (split)[0] } <>; #ARGV = $x }
print join " ", $col1[$.-1], $F[-1]
' input1 input2
1 8
2 7
3 4
4 6

How to replace string dynamically using perl script

I am trying to solve below issues.
I have 2 files. Address.txt and File.txt. I want to replace all A/B/C/D (File.txt) with corresponding string value (Read from Address.txt file) using perl script. It's not replacing in my output file. I am getting same content of File.txt.
I tried below codes.
Here is Address.txt file
A,APPLE
B,BAL
C,CAT
D,DOG
E,ELEPHANT
F,FROG
G,GOD
H,HORCE
Here is File.txt
A B C
X Y X
M N O
D E F
F G H
Here is my code :
use strict;
use warnings;
open (MYFILE, 'Address.txt');
foreach (<MYFILE>){
chomp;
my #data_new = split/,/sm;
open INPUTFILE, "<", $ARGV[0] or die $!;
open OUT, '>ariout.txt' or die $!;
my $src = $data_new[0];
my $des = $data_new[1];
while (<INPUTFILE>) {
# print "In while :$src \t$des\n";
$_ =~ s/$src/$des/g;
print OUT $_;
}
close INPUTFILE;
close OUT;
# /usr/bin/perl -p -i -e "s/A/APPLE/g" ARGV[0];
}
close (MYFILE);
If i Write $_ =~ s/A/Apple/g;
Then output file is fine and A is replacing with "Apple". But when dynamically coming it's not getting replaced.
Thanks in advance. I am new in perl scripting language . Correct me if I am wrong any where.
Update 1: I updated below code . It's working fine now. My questions Big O of this algo.
Code :
#!/usr/bin/perl
use warnings;
use strict;
open( my $out_fh, ">", "output.txt" ) || die "Can't open the output file for writing: $!";
open( my $address_fh, "<", "Address.txt" ) || die "Can't open the address file: $!";
my %lookup = map { chomp; split( /,/, $_, 2 ) } <$address_fh>;
open( my $file_fh, "<", "File1.txt" ) || die "Can't open the file.txt file: $!";
while (<$file_fh>) {
my #line = split;
for my $char ( #line ) {
( exists $lookup{$char} ) ? print $out_fh " $lookup{$char} " : print $out_fh " $char ";
}
print $out_fh "\n";
}
Not entirely sure how you want your output formatted. Do you want to keep the rows and columns as is?
I took a similar approach as above but kept the formatting the same as in your 'file.txt' file:
#!/usr/bin/perl
use warnings;
use strict;
open( my $out_fh, ">", "output.txt" ) || die "Can't open the output file for writing: $!";
open( my $address_fh, "<", "address.txt" ) || die "Can't open the address file: $!";
my %lookup = map { chomp; split( /,/, $_, 2 ) } <$address_fh>;
open( my $file_fh, "<", "file.txt" ) || die "Can't open the file.txt file: $!";
while (<$file_fh>) {
my #line = split;
for my $char ( #line ) {
( exists $lookup{$char} ) ? print $out_fh " $lookup{$char} " : print $out_fh " $char ";
}
print $out_fh "\n";
}
That will give you the output:
APPLE BAL CAT
X Y X
M N O
DOG ELEPHANT FROG
FROG GOD HORCE
Here's another option that lets Perl handle the opening and closing of files:
use strict;
use warnings;
my $addresses_txt = pop;
my %hash = map { $1 => $2 if /(.+?),(.+)/ } <>;
push #ARGV, $addresses_txt;
while (<>) {
my #array;
push #array, $hash{$_} // $_ for split;
print "#array\n";
}
Usage: perl File.txt Addresses.txt [>outFile.txt]
The last, optional parameter directs output to a file.
Output on your dataset:
APPLE BAL CAT
X Y X
M N O
DOG ELEPHANT FROG
FROG GOD HORCE
The name of the addresses' file is implicitly popped off of #ARGV for use later. Then, a hash is built, using the key/value pairs in File.txt.
The addresses' file is read, splitting each line into its single elements, and the defined-or (//) operator is used to returned the defined hash item or the single element, which is then pushed onto #array. Finally, the array is interpolated in a print statement.
Hope this helps!
First, here is your existing program, rewritten slightly
open the address file
convert the address file to a hash so that the letters are the keys and the strings the values
open the other file
read in the single line in it
split the line into single letters
use the letters to lookup in the hash
use strict;
use warnings;
open(my $a,"Address.txt")||die $!;
my %address=map {split(/,/) } map {split(' ')} <$a>;
open(my $f,"File.txt")||die $!;
my $list=<$f>;
for my $letter (split(' ',$list)) {
print $address{$letter}."\n" if (exists $address{$letter});
}
to make another file with the substitutions in place alter the loop that processes $list
for my $letter (split(' ',$list)) {
if (exists $address{$letter}) {
push #output, $address{$letter};
}
else {
push #output, $letter;
}
}
open(my $o,">newFile.txt")||die $!;
print $o "#output";
Your problem is that in every iteration of your foreach loop you overwrite any changes made earlier to output file.
My solution:
use strict;
use warnings;
open my $replacements, 'Address.txt' or die $!;
my %r;
foreach (<$replacements>) {
chomp;
my ($k, $v) = split/,/sm;
$r{$k} = $v;
}
my $re = '(' . join('|', keys %r) . ')';
open my $input, "<", $ARGV[0] or die $!;
while (<$input>) {
s/$re/$r{$1}/g;
print;
}
#!/usr/bin/perl -w
# to replace multiple text strings in a file with text from another file
#select text from 1st file, replace in 2nd file
$file1 = 'Address.txt'; $file2 = 'File.txt';
# save the strings by which to replace
%replacement = ();
open IN,"$file1" or die "cant open $file1\n";
while(<IN>)
{chomp $_;
#a = split ',',$_;
$replacement{$a[0]} = $a[1];}
close IN;
open OUT,">replaced_file";
open REPL,"$file2" or die "cant open $file2\n";
while(<REPL>)
{chomp $_;
#a = split ' ',$_; #replaced_data = ();
# replace strings wherever possible
foreach $i(#a)
{if(exists $replacement{$i}) {push #replaced_data,$replacement{$i};}
else {push #replaced_data,$i;}
}
print OUT trim(join " ",#replaced_data),"\n";
}
close REPL; close OUT;
########################################
sub trim
{
my $str = $_[0];
$str=~s/^\s*(.*)/$1/;
$str=~s/\s*$//;
return $str;
}

merging two files using perl keeping the copy of original file in other file

I have to files like A.ini and B.ini ,I want to merge both the files in A.ini
examples of files:
A.ini::
a=123
b=xyx
c=434
B.ini contains:
a=abc
m=shank
n=paul
my output in files A.ini should be like
a=123abc
b=xyx
c=434
m=shank
n=paul
I want to this merging to be done in perl language and I want to keep the copy of old A.ini file at some other place to use old copy
A command line variant:
perl -lne '
($a, $b) = split /=/;
$v{$a} = $v{$a} ? $v{$a} . $b : $_;
END {
print $v{$_} for sort keys %v
}' A.ini B.ini >NEW.ini
How about:
#!/usr/bin/perl
use strict;
use warnings;
my %out;
my $file = 'path/to/A.ini';
open my $fh, '<', $file or die "unable to open '$file' for reading: $!";
while(<$fh>) {
chomp;
my ($key, $val) = split /=/;
$out{$key} = $val;
}
close $fh;
$file = 'path/to/B.ini';
open my $fh, '<', $file or die "unable to open '$file' for reading: $!";
while(<$fh>) {
chomp;
my ($key, $val) = split /=/;
if (exists $out{$key}) {
$out{$key} .= $val;
} else {
$out{$key} = $val;
}
}
close $fh;
$file = 'path/to/A.ini';
open my $fh, '>', $file or die "unable to open '$file' for writing: $!";
foreach(keys %out) {
print $fh $_,'=',$out{$_},"\n";
}
close $fh;
The two files to be merged can be read in a single pass and don't need to be treated as separate source files. That allows the use of <> to read all files passed as parameters on the command line.
Keeping a backup copy of A.ini is simply a matter of renaming it before writing the merged data to a new file of the same name.
This program appears to do what you need.
use strict;
use warnings;
my $file_a = $ARGV[0];
my (#keys, %values);
while (<>) {
if (/\A\s*(.+?)\s*=\s*(.+?)\s*\z/) {
push #keys, $1 unless exists $values{$1};
$values{$1} .= $2;
}
}
rename $file_a, "$file_a.bak" or die qq(Unable to rename "$file_a": $!);
open my $fh, '>', $file_a or die qq(Unable to open "$file_a" for output: $!);
printf $fh "%s=%s\n", $_, $values{$_} for #keys;
output (in A.ini)
a=123abc
b=xyx
c=434
m=shank
n=paul