hash doesnt print right while reading from file in perl - perl

I am trying to build a hash in perl by reading a file.
File content is as below:
s1=i1
s2=i2
s3=i3
And my code is as below:
my $FD;
open ($FD, "read") || die "Cant open the file: $!";
while(<$FD>){
chomp $_;
print "\n Line read = $_\n";
$_ =~ /([0-9a-z]*)=([0-9a-zA-Z]*)/;
#temp_arr=($2,$3,$4);
print "Array = #temp_arr\n";
$HASH{$1}=\#temp_arr;
print "Hash now = ";
foreach(keys %HASH){print "$_ = $HASH{$_}->[0]\n";};
}
And my output as below
Line read = s1=i1
Array = i1
Hash now = s1 = i1
Line read = s2=i2
Array = i2
Hash now = s2 = i2
s1 = i2
Line read = s3=i3
Array = i3
Hash now = s2 = i3
s1 = i3
s3 = i3
Why is the value for all the keys in the end printed as i3?????

Because you are putting references to the same array in each value.
Try something like this:
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my %result;
open my $fh, '<', 'read' or die $!;
while (my $line=<$fh>) {
chomp $line;
my ($key, $value)=split /=/, $line, 2;
die "$key already exists" if (exists $result{$key});
$result{$key}=$value;
}
print Dumper(\%result);
Output is:
$VAR1 = {
's1' => 'i1',
's3' => 'i3',
's2' => 'i2'
};

\#temp_arr is a reference to the global variable #temp_arr. You are re-initializing it repeatedly, but it's still a reference to the original variable.
You need to lexically scope the #temp_arr (my #temp_arr=($2,$3,$4);) or pass a new reference in to the hash ($HASH{$1} = [ $2,$3,$4 ];)

Try this:
my $FD;
open ($FD, "read") || die "Cant open the file: $!";
for(<$FD>){
chomp $_;
push(#temp_arr,$1,$2) if($_=~/(.*?)=(.*)/);
}
%HASH=#temp_arr;
print Dumper \%HASH;

Try this.
open (my $FD, "read") || die "Cant open the file: $!";
my %HASH = map {chomp $_; my #x = split /=/, $_; $x[0] => $x[1]} <$FD>;
print "Key: $_ Value: $HASH{$_}\n" for (sort keys %HASH);

Beside the errors in your "open" statement, try keeping it simple then make it unreadable.
my ($FD, $a, $b, $k);
$FD = "D:\\Perl\\test.txt";
open (FD, "<$FD") or die "Cant open the file $FD: $!";
while(<FD>){
chomp $_;
print "\n Line read = $_\n";
($a, $b) = split('=', $_);
print "A: $a, B: $b\n";
$HASH{$a}="$b";
print "Hash now ..\n";
foreach $k (sort keys %HASH){
print "Key: $k -- HASH{$k} = $HASH{$k}\n";
}
}

Related

Duplicate values in column

I have a original file which has following columns,
02-May-2018,AAPL,Sell,0.25,1000
02-May-2018,C,Sell,0.25,2000
02-May-2018,JPM,Sell,0.25,3000
02-May-2018,WFC,Sell,0.25,5000
02-May-2018,AAPL,Sell,0.25,7000
02-May-2018,GOOG,Sell,0.25,8000
02-May-2018,GOOG,Sell,0.25,9000
02-May-2018,C,Sell,0.25,2000
02-May-2018,AAPL,Sell,0.25,3000
I am trying to print this original line if I see value in the second column more then 2 times.. for example, if I see AAPL more then 2 times desired result should print
02-May-2018,AAPL,Sell,0.25,1000
02-May-2018,AAPL,Sell,0.25,7000
02-May-2018,AAPL,Sell,0.25,3000
So Far, I have written the following which prints results multiple times which is wrong.. can you please help on what I am doing wrong?
open (FILE, "<$TMPFILE") or die "Could not open $TMPFILE";
open (OUT, ">$TMPFILE1") or die "Could not open $TMPFILE1";
%count = ();
#symbol = ();
while ($line = <FILE>)
{
chomp $line;
(#data) = split(/,/,$line);
$count{$data[1]}++;
#keys = sort {$count{$a} cmp $count{$b}} keys %count;
for my $key (#keys)
{
if ( $count{$key} > 2 )
{
print "$line\n";
}
}
}
I'd do it something like this - store lines you've seen in a 'buffer' and print them out again if the condition is hit (before continuing to print as you go):
#!/usr/bin/env perl
use strict;
use warnings;
my %buffer;
my %count_of;
while ( my $line = <> ) {
my ( $date, $ticker, #values ) = split /,/, $line;
#increment the count
$count_of{$ticker}++;
if ( $count_of{$ticker} < 3 ) {
#count limit not hit, so stash the current line in the buffer.
$buffer{$ticker} .= $line;
next;
}
#print the buffer if the count has been hit
if ( $count_of{$ticker} == 3 ) {
print $buffer{$ticker};
}
#only gets to here once the limit is hit, so just print normally.
print $line;
}
With your input data, this outputs:
02-May-2018,AAPL,Sell,0.25,1000
02-May-2018,AAPL,Sell,0.25,7000
02-May-2018,AAPL,Sell,0.25,3000
Simple answer:
push #{ $lines{(split",")[1]} }, $_ while <>;
print #{ $lines{$_} } for grep #{ $lines{$_} } > 2, sort keys %lines;
perl program.pl inputfile > outputfile
You need to read the input file twice, because you don't know the final counts until you get to the end of the file
use strict;
use warnings 'all';
my ($TMPFILE, $TMPFILE1) = qw/ infile outfile /;
my %counts;
{
open my $fh, '<', $TMPFILE or die "Could not open $TMPFILE: $!";
while ( <$fh> ) {
my #fields = split /,/;
++$counts{$fields[1]};
}
}
open my $fh, '<', $TMPFILE or die "Could not open $TMPFILE: $!";
open my $out_fh, '>', $TMPFILE1 or die "Could not open $TMPFILE1: $!";
while ( <$fh> ) {
my #fields = split /,/;
print $out_fh $_ if $counts{$fields[1]} > 2;
}
output
02-May-2018,AAPL,Sell,0.25,1000
02-May-2018,AAPL,Sell,0.25,7000
02-May-2018,AAPL,Sell,0.25,3000
This should work:
use strict;
use warnings;
open (FILE, "<$TMPFILE") or die "Could not open $TMPFILE";
open (OUT, ">$TMPFILE1") or die "Could not open $TMPFILE1";
my %data;
while ( my $line = <FILE> ) {
chomp $line;
my #line = split /,/, $line;
push(#{$data{$line[1]}}, $line);
}
foreach my $key (keys %data) {
if(#{$data{$key}} > 2) {
print "$_\n" foreach #{$data{$key}};
}
}

Perl - have a comma separated output , want to write that in a CSV

Here is the code:
my #col= sort keys %colnames;
print "mRNA,".join(",",#col)."\n";
foreach my $row(keys %rownames){
print "$row";
foreach my $col(#col){
my $num=$mat{$col}->{$row};
$num=~s/(\.\d\d)\d+/$1/;
print ",$num";
}
print "\n";
}
Output:
mRNA,Benzopyrene12h_replica1,Benzopyrene12h_replica2
E2F1,5.01,4.72
REV1,2.76,2.67
POLK,1.21,1.87
POLH,1.49,1.56
POLI,1.94,2.45
Please help me write this output to .csv file.
Something like this might work... Combining with Miller's answer. I didn't test it, just giving you an idea. And it's defiantly could be written more cleanly and less redundant.
use strict;
use warnings;
use autodie;
my $csvFile = Text::CSV->new ( { binary => 1, eol => "\n" } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
my #col= sort keys %colnames;
my #csv;
$csv[0][0] = "mRNA,";
my #joinCol = join(",",#col);
my $i =1;
foreach (#joinCol) {
$csv[0][$i] = $_;
$i++;
}
my $k = 1;
foreach my $row(keys %rownames){
my $j = 0;
print "$row";
$csv[$k][$j] = $row;
foreach my $col(#col){
my $num=$mat{$col}->{$row};
$num=~s/(\.\d\d)\d+/$1/;
print ",$num";
$csv[$k][$j] = $num;
$j++;
}
print "\n";
$k++;
}
open $fh, '>', "new.csv" or die "Couldn't open csv file: $! \n";
for (#csv) {
$csvFile->print($fh, $_);
}
close $fh;
To write to a CSV file, use Text::CSV
use strict;
use warnings;
use autodie;
# Your Data Initialization
my %colnames; # = Something
my %rownames; # = Something else
my %mat; # = a hash of hash
# Prepare CSV
my $csv = Text::CSV->new ( { binary => 1, eol => "\n" } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open $fh, '>', "new.csv";
my #col = sort keys %colnames;
# Output Header
$csv->print($fh, ['mRNA', #col]);
# Output Rows
for my $row (keys %rownames){
my #data = ($row);
for my $col (#col){
my $num = $mat{$col}{$row};
$num =~ s/(\.\d\d)\d+/$1/;
push #data, $num;
}
$csv->print($fh, \#data);
}
close $fh;

Reading and comparing lines in Perl

I am having trouble with getting my perl script to work. The issue might be related to the reading of the Extract file line by line within the while loop, any help would be appreciated. There are two files
Bad file that contains a list of bad IDs (100s of IDs)
2
3
Extract that contains a delimited data with the ID in field 1 (millions of rows)
1|data|data|data
2|data|data|data
2|data|data|data
2|data|data|data
3|data|data|data
4|data|data|data
5|data|data|data
I am trying to remove all the rows from the large extract file where the IDs match. There can be multiple rows where the ID matches. The extract is sorted.
#use strict;
#use warnning;
$SourceFile = $ARGV[0];
$ToRemove = $ARGV[1];
$FieldNum = $ARGV[2];
$NewFile = $ARGV[3];
$LargeRecords = $ARGV[4];
open(INFILE, $SourceFile) or die "Can't open source file: $SourceFile \n";
open(REMOVE, $ToRemove) or die "Can't open toRemove file: $ToRemove \n";
open(OutGood, "> $NewFile") or die "Can't open good output file \n";
open(OutLarge, "> $LargeRecords") or die "Can't open Large Records output file \n";
#Read in the list of bad IDs into array
#array = <REMOVE>;
#Loop through each bad record
foreach (#array)
{
$badID = $_;
#read the extract line by line
while(<INFILE>)
{
#take the line and split it into
#fields = split /\|/, $_;
my $extractID = $fields[$FieldNum];
#print "Here's what we got: $badID and $extractID\n";
while($extractID == $badID)
{
#Write out bad large records
print OutLarge join '|', #fields;
#Get the next line in the extract file
#fields = split /\|/, <INFILE>;
my $extractID = $fields[$FieldNum];
$found = 1; #true
#print " We got a match!!";
#remove item after it has been found
my $input_remove = $badID;
#array = grep {!/$input_remove/} #array;
}
print OutGood join '|', #fields;
}
}
Try this:
$ perl -F'|' -nae 'BEGIN {while(<>){chomp; $bad{$_}++;last if eof;}} print unless $bad{$F[0]};' bad good
First, you are lucky: The number of bad IDs is small. That means, you can read the list of bad IDs once, stick them in a hash table without running into any difficulty with memory usage. Once you have them in a hash, you just read the big data file line by line, skipping output for bad IDs.
#!/usr/bin/env perl
use strict;
use warnings;
# hardwired for convenience
my $bad_id_file = 'bad.txt';
my $data_file = 'data.txt';
my $bad_ids = read_bad_ids($bad_id_file);
remove_data_with_bad_ids($data_file, $bad_ids);
sub remove_data_with_bad_ids {
my $file = shift;
my $bad = shift;
open my $in, '<', $file
or die "Cannot open '$file': $!";
while (my $line = <$in>) {
if (my ($id) = extract_id(\$line)) {
exists $bad->{ $id } or print $line;
}
}
close $in
or die "Cannot close '$file': $!";
return;
}
sub read_bad_ids {
my $file = shift;
open my $in, '<', $file
or die "Cannot open '$file': $!";
my %bad;
while (my $line = <$in>) {
if (my ($id) = extract_id(\$line)) {
$bad{ $id } = undef;
}
}
close $in
or die "Cannot close '$file': $!";
return \%bad;
}
sub extract_id {
my $string_ref = shift;
if (my ($id) = ($$string_ref =~ m{\A ([0-9]+) }x)) {
return $id;
}
return;
}
I'd use a hash as follows:
use warnings;
use strict;
my #bad = qw(2 3);
my %bad;
$bad{$_} = 1 foreach #bad;
my #file = qw (1|data|data|data 2|data|data|data 2|data|data|data 2|data|data|data 3|data|data|data 4|data|data|data 5|data|data|data);
my %hash;
foreach (#file){
my #split = split(/\|/);
$hash{$split[0]} = $_;
}
foreach (sort keys %hash){
print "$hash{$_}\n" unless exists $bad{$_};
}
Which gives:
   
1|data|data|data
4|data|data|data
5|data|data|data

How to replace string dynamically using perl script

I am trying to solve below issues.
I have 2 files. Address.txt and File.txt. I want to replace all A/B/C/D (File.txt) with corresponding string value (Read from Address.txt file) using perl script. It's not replacing in my output file. I am getting same content of File.txt.
I tried below codes.
Here is Address.txt file
A,APPLE
B,BAL
C,CAT
D,DOG
E,ELEPHANT
F,FROG
G,GOD
H,HORCE
Here is File.txt
A B C
X Y X
M N O
D E F
F G H
Here is my code :
use strict;
use warnings;
open (MYFILE, 'Address.txt');
foreach (<MYFILE>){
chomp;
my #data_new = split/,/sm;
open INPUTFILE, "<", $ARGV[0] or die $!;
open OUT, '>ariout.txt' or die $!;
my $src = $data_new[0];
my $des = $data_new[1];
while (<INPUTFILE>) {
# print "In while :$src \t$des\n";
$_ =~ s/$src/$des/g;
print OUT $_;
}
close INPUTFILE;
close OUT;
# /usr/bin/perl -p -i -e "s/A/APPLE/g" ARGV[0];
}
close (MYFILE);
If i Write $_ =~ s/A/Apple/g;
Then output file is fine and A is replacing with "Apple". But when dynamically coming it's not getting replaced.
Thanks in advance. I am new in perl scripting language . Correct me if I am wrong any where.
Update 1: I updated below code . It's working fine now. My questions Big O of this algo.
Code :
#!/usr/bin/perl
use warnings;
use strict;
open( my $out_fh, ">", "output.txt" ) || die "Can't open the output file for writing: $!";
open( my $address_fh, "<", "Address.txt" ) || die "Can't open the address file: $!";
my %lookup = map { chomp; split( /,/, $_, 2 ) } <$address_fh>;
open( my $file_fh, "<", "File1.txt" ) || die "Can't open the file.txt file: $!";
while (<$file_fh>) {
my #line = split;
for my $char ( #line ) {
( exists $lookup{$char} ) ? print $out_fh " $lookup{$char} " : print $out_fh " $char ";
}
print $out_fh "\n";
}
Not entirely sure how you want your output formatted. Do you want to keep the rows and columns as is?
I took a similar approach as above but kept the formatting the same as in your 'file.txt' file:
#!/usr/bin/perl
use warnings;
use strict;
open( my $out_fh, ">", "output.txt" ) || die "Can't open the output file for writing: $!";
open( my $address_fh, "<", "address.txt" ) || die "Can't open the address file: $!";
my %lookup = map { chomp; split( /,/, $_, 2 ) } <$address_fh>;
open( my $file_fh, "<", "file.txt" ) || die "Can't open the file.txt file: $!";
while (<$file_fh>) {
my #line = split;
for my $char ( #line ) {
( exists $lookup{$char} ) ? print $out_fh " $lookup{$char} " : print $out_fh " $char ";
}
print $out_fh "\n";
}
That will give you the output:
APPLE BAL CAT
X Y X
M N O
DOG ELEPHANT FROG
FROG GOD HORCE
Here's another option that lets Perl handle the opening and closing of files:
use strict;
use warnings;
my $addresses_txt = pop;
my %hash = map { $1 => $2 if /(.+?),(.+)/ } <>;
push #ARGV, $addresses_txt;
while (<>) {
my #array;
push #array, $hash{$_} // $_ for split;
print "#array\n";
}
Usage: perl File.txt Addresses.txt [>outFile.txt]
The last, optional parameter directs output to a file.
Output on your dataset:
APPLE BAL CAT
X Y X
M N O
DOG ELEPHANT FROG
FROG GOD HORCE
The name of the addresses' file is implicitly popped off of #ARGV for use later. Then, a hash is built, using the key/value pairs in File.txt.
The addresses' file is read, splitting each line into its single elements, and the defined-or (//) operator is used to returned the defined hash item or the single element, which is then pushed onto #array. Finally, the array is interpolated in a print statement.
Hope this helps!
First, here is your existing program, rewritten slightly
open the address file
convert the address file to a hash so that the letters are the keys and the strings the values
open the other file
read in the single line in it
split the line into single letters
use the letters to lookup in the hash
use strict;
use warnings;
open(my $a,"Address.txt")||die $!;
my %address=map {split(/,/) } map {split(' ')} <$a>;
open(my $f,"File.txt")||die $!;
my $list=<$f>;
for my $letter (split(' ',$list)) {
print $address{$letter}."\n" if (exists $address{$letter});
}
to make another file with the substitutions in place alter the loop that processes $list
for my $letter (split(' ',$list)) {
if (exists $address{$letter}) {
push #output, $address{$letter};
}
else {
push #output, $letter;
}
}
open(my $o,">newFile.txt")||die $!;
print $o "#output";
Your problem is that in every iteration of your foreach loop you overwrite any changes made earlier to output file.
My solution:
use strict;
use warnings;
open my $replacements, 'Address.txt' or die $!;
my %r;
foreach (<$replacements>) {
chomp;
my ($k, $v) = split/,/sm;
$r{$k} = $v;
}
my $re = '(' . join('|', keys %r) . ')';
open my $input, "<", $ARGV[0] or die $!;
while (<$input>) {
s/$re/$r{$1}/g;
print;
}
#!/usr/bin/perl -w
# to replace multiple text strings in a file with text from another file
#select text from 1st file, replace in 2nd file
$file1 = 'Address.txt'; $file2 = 'File.txt';
# save the strings by which to replace
%replacement = ();
open IN,"$file1" or die "cant open $file1\n";
while(<IN>)
{chomp $_;
#a = split ',',$_;
$replacement{$a[0]} = $a[1];}
close IN;
open OUT,">replaced_file";
open REPL,"$file2" or die "cant open $file2\n";
while(<REPL>)
{chomp $_;
#a = split ' ',$_; #replaced_data = ();
# replace strings wherever possible
foreach $i(#a)
{if(exists $replacement{$i}) {push #replaced_data,$replacement{$i};}
else {push #replaced_data,$i;}
}
print OUT trim(join " ",#replaced_data),"\n";
}
close REPL; close OUT;
########################################
sub trim
{
my $str = $_[0];
$str=~s/^\s*(.*)/$1/;
$str=~s/\s*$//;
return $str;
}

Multiple Sorts within a text file

Looking for a non-case sensitive search using perl, So if a "!" is detected at the start of the line, a new sort begins (only on the section).
[test file]
! Sort Section
!
a
g
r
e
! New Sort Section
1
2
d
3
h
becomes,
[test file]
! Sort Section
!
a
e
g
r
! New Sort Section
1
2
3
d
h
Here's one way to do it:
use strict;
use warnings;
my $filename = shift or die 'filename!';
my #sections;
my $current;
# input
open my $fh, '<', $filename or die "open $filename: $!";
while ( <$fh> ) {
if ( m/^!/ ) {
$current = [ $_ ];
push #sections, $current;
}
else {
push #$current, $_;
}
}
close $fh;
# output
for ( #sections ) {
print shift #$_; # print first line
print sort #$_; # print rest
}
Another one, using an output file. More importantly, not loading an entire file into memory:
use strict;
use warnings;
sub output {
my( $lines, $fh ) = #_;
return unless #$lines;
print $fh shift #$lines; # print first line
print $fh sort { lc $a cmp lc $b } #$lines; # print rest
return;
}
# ==== main ============================================================
my $filename = shift or die 'filename!';
my $outfn = "$filename.out";
die "output file $outfn already exists, aborting\n" if -e $outfn;
# prereqs okay, set up input, output and sort buffer
open my $fh, '<', $filename or die "open $filename: $!";
open my $fhout, '>', $outfn or die "open $outfn: $!";
my $current = [];
# process data
while ( <$fh> ) {
if ( m/^!/ ) {
output $current, $fhout;
$current = [ $_ ];
}
else {
push #$current, $_;
}
}
output $current, $fhout;
close $fhout;
close $fh;