Vertical index Perl - perl

File 1 has ranges 3-9, 2-6 etc
3 9
2 6
12 20
File2 has values: column 1 indicates the range and column 2 has values.
1 4
2 4
3 5
4 4
5 4
6 1
7 1
8 1
9 4
I would like to calculate the sum of values (file2, column2) for ranges in file1). Eg: If range is 3-9, then sum of values will be 5+4+4+1+1+1+4 = 20
What I have tried is:
open (FILE1,"file1.txt");
open (FILE2,"file2.txt");
#file1 = <FILE1>;
#file2 = <FILE2>;
foreach (#file1)
{
#split_file2 = split("\\s",$_); //splitting the file by space
foreach (#file2)
{
#split_file2 = split("\\s",$_); //splitting the file by space
if (#split_file1[0] == #split_file2[0]) //if column0 of file1 matches with column0 of file2
{
$x += #split_file2[1]; //sum the column1 of file2
if ( #split_file2[0] == #split_file1[0] ) //until column1 of file1 = column0 of file2.
{
last;
}
}
}}

Always use use strict; use warnings;.
split /\s/ is easier to read. split ' ' is what you actually want.
Don't use global variables (e.g. for file handles).
It's useful to check if open succeeds, if only by adding or die $!.
Use meaningful names, not file1 and file2.
use strict;
use warnings;
use feature qw( say );
use List::Util qw( sum );
my $file1 = 'file1.txt';
my $file2 = 'file2.txt';
my #file2;
{
open(my $fh, '<', $file2)
or die "Can't open $file2: $!\n";
while (<$fh>) {
my ($k, $v) = split;
$file2[$k] = $v;
}
}
{
open(my $fh, '<', $file1)
or die "Can't open $file1: $!\n";
while (<$fh>) {
my ($start, $end) = split;
say sum grep defined, #file2[$start .. $end];
}
}

Another solution:
#!/usr/bin/perl
use strict;
use warnings;
my $f1 = shift;
my $f2 = shift;
open FH1, "<", $f1 or die "$!\n";
open FH2, "<", $f2 or die "$!\n";
my %data;
while (<FH1>) {
$data{$1} = $2 if ($_ =~ m/^(\d+)\s+(\d+)$/);
}
while (<FH2>) {
if ($_ =~ m/^(\d+)\s+(\d+)$/) {
my $sum;
for ($1..$2) {
$sum += $data{$_} if defined($data{$_});
}
print "sum for $1-$2: $sum\n" if defined($sum);
}
}
close FH1;
close FH2;
Call: script.pl values.txt ranges.txt

Related

Duplicate values in column

I have a original file which has following columns,
02-May-2018,AAPL,Sell,0.25,1000
02-May-2018,C,Sell,0.25,2000
02-May-2018,JPM,Sell,0.25,3000
02-May-2018,WFC,Sell,0.25,5000
02-May-2018,AAPL,Sell,0.25,7000
02-May-2018,GOOG,Sell,0.25,8000
02-May-2018,GOOG,Sell,0.25,9000
02-May-2018,C,Sell,0.25,2000
02-May-2018,AAPL,Sell,0.25,3000
I am trying to print this original line if I see value in the second column more then 2 times.. for example, if I see AAPL more then 2 times desired result should print
02-May-2018,AAPL,Sell,0.25,1000
02-May-2018,AAPL,Sell,0.25,7000
02-May-2018,AAPL,Sell,0.25,3000
So Far, I have written the following which prints results multiple times which is wrong.. can you please help on what I am doing wrong?
open (FILE, "<$TMPFILE") or die "Could not open $TMPFILE";
open (OUT, ">$TMPFILE1") or die "Could not open $TMPFILE1";
%count = ();
#symbol = ();
while ($line = <FILE>)
{
chomp $line;
(#data) = split(/,/,$line);
$count{$data[1]}++;
#keys = sort {$count{$a} cmp $count{$b}} keys %count;
for my $key (#keys)
{
if ( $count{$key} > 2 )
{
print "$line\n";
}
}
}
I'd do it something like this - store lines you've seen in a 'buffer' and print them out again if the condition is hit (before continuing to print as you go):
#!/usr/bin/env perl
use strict;
use warnings;
my %buffer;
my %count_of;
while ( my $line = <> ) {
my ( $date, $ticker, #values ) = split /,/, $line;
#increment the count
$count_of{$ticker}++;
if ( $count_of{$ticker} < 3 ) {
#count limit not hit, so stash the current line in the buffer.
$buffer{$ticker} .= $line;
next;
}
#print the buffer if the count has been hit
if ( $count_of{$ticker} == 3 ) {
print $buffer{$ticker};
}
#only gets to here once the limit is hit, so just print normally.
print $line;
}
With your input data, this outputs:
02-May-2018,AAPL,Sell,0.25,1000
02-May-2018,AAPL,Sell,0.25,7000
02-May-2018,AAPL,Sell,0.25,3000
Simple answer:
push #{ $lines{(split",")[1]} }, $_ while <>;
print #{ $lines{$_} } for grep #{ $lines{$_} } > 2, sort keys %lines;
perl program.pl inputfile > outputfile
You need to read the input file twice, because you don't know the final counts until you get to the end of the file
use strict;
use warnings 'all';
my ($TMPFILE, $TMPFILE1) = qw/ infile outfile /;
my %counts;
{
open my $fh, '<', $TMPFILE or die "Could not open $TMPFILE: $!";
while ( <$fh> ) {
my #fields = split /,/;
++$counts{$fields[1]};
}
}
open my $fh, '<', $TMPFILE or die "Could not open $TMPFILE: $!";
open my $out_fh, '>', $TMPFILE1 or die "Could not open $TMPFILE1: $!";
while ( <$fh> ) {
my #fields = split /,/;
print $out_fh $_ if $counts{$fields[1]} > 2;
}
output
02-May-2018,AAPL,Sell,0.25,1000
02-May-2018,AAPL,Sell,0.25,7000
02-May-2018,AAPL,Sell,0.25,3000
This should work:
use strict;
use warnings;
open (FILE, "<$TMPFILE") or die "Could not open $TMPFILE";
open (OUT, ">$TMPFILE1") or die "Could not open $TMPFILE1";
my %data;
while ( my $line = <FILE> ) {
chomp $line;
my #line = split /,/, $line;
push(#{$data{$line[1]}}, $line);
}
foreach my $key (keys %data) {
if(#{$data{$key}} > 2) {
print "$_\n" foreach #{$data{$key}};
}
}

How can I merge some columns of two files using perl?

I want to merge the first column of input1.txt and the third column of input2.txt. How can I do it? My code doesn't do what I want.
Input1:
1 6
2 7
3 8
4 9
Input2:
a 4 8
b 6 7
c 3 4
d 2 6
Requested output:
1 8
2 7
3 4
4 6
My code:
#!/usr/bin/perl
use strict;
use warnings;
open my $input1, '<', "input1.txt" or die qq{Failed to open "input1.txt" for writing: $!};
open my $input2, '<', "input2.txt" or die qq{Failed to open "input2.txt" for writing: $!};
open my $outfile, '>', "outfile.txt" or die qq{Failed to open "outfile.txt" for writing: $!};
while(<$input1>)
{
my #columns1 = split;
print $outfile join("\t", $columns1[0], "\n");
}
while(<$input2>)
{
my #columns2 = split;
print $outfile join("\t", $columns2[2], "\n");
}
close($input1);
close($input2);
close($outfile);
Another way to get the requested output is to use one while loop instead of two:
mod.pl
#!/usr/bin/perl
use strict;
use warnings;
open my $input1, '<', "input1.txt" or die qq{Failed to open "input1.txt" for writing: $!};
open my $input2, '<', "input2.txt" or die qq{Failed to open "input2.txt" for writing: $!};
open my $outfile, '>', "outfile.txt" or die qq{Failed to open "outfile.txt" for writing: $!};
while(my $l1 = <$input1>){
my $l2 = <$input2>;
chomp $l1;
chomp $l2;
my #columns1 = split(/ /, $l1);
my #columns2 = split(/ /, $l2);
print $outfile join("\t", $columns1[1-1], $columns2[3-1]),"\n";
}
close($input1);
close($input2);
close($outfile);
#$ perl mod.pl
#$ cat outfile.txt
1 8
2 7
3 4
4 6
Do this:
$filename1 = $ARGV[0]; #for taking input1.txt as the first argument
$filename2 = $ARGV[1]; #for taking input2.txt as the second argument
#data1;
#column1;
open(INPUT_FILE, $filename1)
or die "Couldn't open $filename1!";
while (<INPUT_FILE>) {
my $currentLine = $_; #read the input file one line at a time, storing it to $currentLine
#data1 = split " ", $currentLine; #split your line by space
$firstcolumn = $data1[0]; #store the first column's data
push #column1, $firstcolumn ; #push the first column's data into an array
}
#data2;
#column3;
open(INPUT_FILE, $filename2)
or die "Couldn't open $filename2!";
while (<INPUT_FILE>) {
my $currentLine = $_;
#data2 = split " ", $currentLine;
$thirdcolumn = $data2[2]; #store the third column's data
push #column3, $thirdcolumn ;
}
$size = #column1;
open (OUTPUTFILE, '>>outfile.txt');
for($i = 0; $i < $size; $i++){
print OUTPUTFILE "$column1[$i] $column3[$i]\n"; #writing each entry into the outfile.txt
}
close(INPUT_FILE);
close (OUTPUTFILE);
And when you run your perl program in command line, do:
yourprogram.pl input1.txt input2.txt outfile.txt
And it should work.
I tried the program and opened the outfile.txt and your requested output is in there.
Your code print serially, but you need is parallel
#!/usr/bin/perl
use strict;
use warnings;
open my $input1, '<', "input1.txt" or die qq{Failed to open "input1.txt" for writing: $!};
open my $input2, '<', "input2.txt" or die qq{Failed to open "input2.txt" for writing: $!};
open my $outfile, '>', "outfile.txt" or die qq{Failed to open "outfile.txt" for writing: $!};
my ($line1, $line2);
while(1)
{
$line1 = <$input1> || '';
$line2 = <$input2> || '';
my #columns1 = split ' ', $line1;
my #columns2 = split ' ', $line2;
print $outfile join("\t", $columns1[0], $columns2[2]), "\n";
last if !$line1 && !$line2;
}
close($input1);
close($input2);
close($outfile);
It doesn't have to be this complicated. Read the first file's first column in an array and print it along with the third field of second file. Unless you have files with different number of rows, this should work just fine.
perl -lane'
BEGIN { $x = pop; #col1 = map { (split)[0] } <>; #ARGV = $x }
print join " ", $col1[$.-1], $F[-1]
' input1 input2
1 8
2 7
3 4
4 6

Perl program to compute average for each column of numbers within file

This is what I have so far, but the code is only showing contents of column 1. I'm not sure how to compute columns. I am really new to programming, so this may be an easy question.
my $filename = "Q5.txt";
open(my $fh, "<", $filename) or die "Could not open '$filename'\n";
while (my $line = <$fh>) {
$count++;
#line = $line;
for (#line) {
...
}
}
print $line[0];
Something like this should suit you
use strict;
use warnings;
use autodie;
my $filename = 'Q5.txt';
my ($n, #totals);
open my $fh, '<', $filename;
while (<$fh>) {
my #fields = split;
$totals[$_] += $fields[$_] for 0 .. $#fields;
++$n;
}
$_ /= $n for #totals;
print "#totals\n";

how to count the number of specific characters through each line from file?

I'm trying to count the number of 'N's in a FASTA file which is:
>Header
AGGTTGGNNNTNNGNNTNGN
>Header2
AGNNNNNNNGNNGNNGNNGN
so in the end I want to get the count of number of 'N's and each header is a read so I want to make a histogram so I would at the end output something like this:
# of N's # of Reads
0 300
1 240
etc...
so there are 300 sequences or reads that have 0 number of 'N's
use strict;
use warnings;
my $file = shift;
my $output_file = shift;
my $line;
my $sequence;
my $length;
my $char_N_count = 0;
my #array;
my $count = 0;
if (!defined ($output_file)) {
die "USAGE: Input FASTA file\n";
}
open (IFH, "$file") or die "Cannot open input file$!\n";
open (OFH, ">$output_file") or die "Cannot open output file $!\n";
while($line = <IFH>) {
chomp $line;
next if $line =~ /^>/;
$sequence = $line;
#array = split ('', $sequence);
foreach my $element (#array) {
if ($element eq 'N') {
$char_N_count++;
}
}
print "$char_N_count\n";
}
Try this. I changed a few things like using scalar file handles. There are many ways to do this in Perl, so some people will have other ideas. In this case I used an array which may have gaps in it - another option is to store results in a hash and key by the count.
Edit: Just realised I'm not using $output_file, because I have no idea what you want to do with it :) Just change the 'print' at the end to 'print $out_fh' if your intent is to write to it.
use strict;
use warnings;
my $file = shift;
my $output_file = shift;
if (!defined ($output_file)) {
die "USAGE: $0 <input_file> <output_file>\n";
}
open (my $in_fh, '<', $file) or die "Cannot open input file '$file': $!\n";
open (my $out_fh, '>', $output_file) or die "Cannot open output file '$output_file': $!\n";
my #results = ();
while (my $line = <$in_fh>) {
next if $line =~ /^>/;
my $num_n = ($line =~ tr/N//);
$results[$num_n]++;
}
print "# of N's\t# of Reads\n";
for (my $i = 0; $i < scalar(#results) ; $i++) {
unless (defined($results[$i])) {
$results[$i] = 0;
# another option is to 'next' if you don't want to show the zero totals
}
print "$i\t\t$results[$i]\n";
}
close($in_fh);
close($out_fh);
exit;

How can I delete the last 10 lines of a file in perl

I am taking a total number of line as a user input and then I am deleting those numbers of l ine from the file.
I saw this learn.perl.org/faq/perlfaq5.html#How-do-I-count-the-number-of-lines-in-a-file- and then I tired the below simple logic.
Logic:
Get the Total number of lines
Subtracts it by the numbers entered by user
print the lines
Here is my code :
#!/usr/bin/perl -w
use strict;
open IN, "<", "Delete_line.txt"
or die " Can not open the file $!";
open OUT, ">", "Update_delete_line.txt"
or die "Can not write in the file $!";
my ($total_line, $line, $number, $printed_line);
print"Enter the number of line to be delete\n";
$number = <STDIN>;
while ($line = <IN>) {
$total_line = $.; # Total number of line in the file
}
$printed_line = $total_line - $number;
while ($line = <IN>) {
print OUT $line unless $.== $printed_line;
}
Well, neither i am getting any error in code nor any out put ? why I just don't know.
Can any one give me some suggestion.
A Perl solution that's efficient for large files requires the use of File::ReadBackwards
use File::ReadBackwards qw( );
my $num_lines = 10;
my $qfn = 'file.txt';
my $pos = do {
my $fh = File::ReadBackwards->new($qfn)
or die $!;
$fh->readline() for 1..$num_lines;
$fh->tell()
};
truncate($qfn, $pos)
or die $!;
This does not read the whole file twice (unlike the OP's method).
This does not read the whole file (unlike the Tie::File solutions).
This does not read the whole file into memory.
Yet another way is to use Tie::File
#!/usr/bin/env perl
use strict;
use warnings;
use Tie::File;
tie my #lines, 'Tie::File', 'myfile' or die "$!\n";
$#lines -= 10;
untie #lines;
This has the advantage of not loading the file into memory while acting like it does.
Here a solution that passes through a stream and prints all but the last n lines where n is a command line argument:
#!/usr/bin/perl
my #cache;
my $n = shift #ARGV;
while(<>) {
push #cache, $_;
print shift #cache if #cache > $n;
}
or the one-liner version:
perl -ne'BEGIN{$n=shift#ARGV}push#c,$_;print shift#c if#c>$n' NUMBER
After finishing reading from IN, you have to reopen it or seek IN, 0, 0 to reset its position. You also have to set $. to zero again.
Also, the final condition should be changed to unless $. > $printed_line so you skip all the lines over the threshold.
The "more fun" answer: use Tie::File!
use strict;
use warnings;
use Tie::File;
tie my #file, 'Tie::File', 'filename' or die "$!";
$#file -= 10;
Just read the file in reverse and delete the first n lines: -
open my $filehandle, "<", "info.txt";
my #file = <$filehandle>;
splice(#file, -10);
print #file;
Note: This loads the entire file into memory.
You could just buffer the last 10 lines and then not print out the remaining 10.
use English qw<$INPLACE_EDIT>;
{ local #ARGV = $name_of_file_to_edit;
local $INPLACE_EDIT = '.bak';
my #buffer;
for ( 1..$num_lines_to_trim ) {
push #buffer, <>;
}
while ( <> ) {
print shift #buffer;
push #buffer, $_;
}
}
You could also do this with File::Slurp::edit_file_lines:
my #buffer;
my $limit_reached = 0;
edit_file_lines {
push #buffer, $_;
return ( $limit_reached ||= #buffer > $num_lines_to_trim ) ? shift #buffer
: ''
;
} $name_of_file;
my $num_lines = 10;
my $qfn = 'file.txt';
system('head', '-n', -$num_lines, '--', $qfn);
die "Error" if $?;
Easy with a C like for :
#!/usr/bin/perl -w
use strict;
open(my $in,"<","Delete_line.txt") or die "Can not open the file $!";
open(my $out,">","Update_delete_line.txt") or die"Can not write in the file $!";
print"Enter the number of lines to be delete\n";
my $number=<STDIN>;
my #file = <$in>;
for (my $i = 0; $i < $#file - $number + 1; $i++) {
print $out $file[$i];
}
close $in;
close $out;
#
# Reads a file trims the top and the bottom of by passed num of lines
# and return the string
# stolen from : http://stackoverflow.com/a/9330343/65706
# usage :
# my $StrCatFile = $objFileHandler->ReadFileReturnTrimmedStrAtTopBottom (
# $FileToCat , $NumOfRowsToRemoveAtTop , $NumOfRowsToRemoveAtBottom) ;
sub ReadFileReturnTrimmedStrAtTopBottom {
my $self = shift ;
my $file = shift ;
my $NumOfLinesToRemoveAtTop = shift ;
my $NumOfLinesToRemoveAtBottom = shift ;
my #cache ;
my $StrTmp = () ;
my $StrReturn = () ;
my $fh = () ;
open($fh, "<", "$file") or cluck ( "can't open file : $file for reading: $!" ) ;
my $counter = 0;
while (<$fh>) {
if ($. >= $NumOfLinesToRemoveAtTop + 1) {
$StrTmp .= $_ ;
}
}
close $fh;
my $sh = () ;
open( $sh, "<", \$StrTmp) or cluck( "can't open string : $StrTmp for reading: $!" ) ;
while(<$sh>) {
push ( #cache, $_ ) ;
$StrReturn .= shift #cache if #cache > $NumOfLinesToRemoveAtBottom;
}
close $sh ;
return $StrReturn ;
}
#eof ReadFileReturnTrimmedStrAtTopBottom
#