I have a hash with few keys and each key has 20 values.
%test={
a=> 10 14 34 56 ....
b=> 56 67 89 66 ...
..
}
#values= {a,b,..}
I want to tie values from this hash to another file as shown below
my input file.txt
ID
ID
ID
...
expected file.txt
ID ,10 ,56
ID ,14, 67
ID ,34, 89
ID ,56, 66
..
My code right now ties the all the values to the first line of my file. please help formatting it.
my $match = "ID";
tie my #lines, 'Tie::File', 'file.txt' or die "failed : $!";
for my $line (#lines) {
while ( $line =~ /^($match.*)/ ) {
$line = $1 . "," . join ',',#test{#values};
}
}
untie #lines;
right now my output is
file.txt
ID ,10 ,14, 34, 56,... 56, 67, 89, 66....
ID
ID
ID
I'm a bit confused by your question...
You have some template file that only contains ID at the beginning of (n) lines?
And you want to iterate over each $key by $test->{$key}[$line_count]?
Something seems fishy(I think you must be leaving something out) here. There's going to be quite a few ways to go wrong with this design...
Anyways, I think this is what you're going for:
my $match = "ID";
my $test = {
a => [ qw(1 3 5) ],
b => [ qw(2 4 6) ],
};
tie my #lines, 'Tie::File', 'file.txt' or die "failed : $!";
my $i = 0;
for my $line (#lines) {
if( $line =~ /^($match.*)/ ) {
my #stuff = ();
for my $key ( keys %$test ) {
push #stuff, $test->{$key}[$i];
}
$line = $1 . ", " . join(', ', #stuff);
$i++;
}
}
untie #lines;
Assuming that this is what you have/want:
$ cat file.txt
ID
ID
ID
$ test.pl
$ !cat
cat file.txt
ID, 1, 2
ID, 3, 4
ID, 5, 6
Do you simply want
my %test = (
a => [ 10, 14, 34, 56, ... ],
b => [ 56, 67, 89, 66, ... ],
);
for (0..$#{ $test{a} }) {
print(join(',', 'ID', $test{a}[$_], $test{b}[$_]), "\n");
}
You could write to a file instead of STDOUT by creating the file using
open(my $fh, '>', 'file.txt')
or die("Can't create file.txt: $!\n");
and then using
print($fh ...);
but it's better to let the user redirect the output to the file using >file.txt.
Here is my take, although the tie seems superfluous to me:
use strict;
use warnings;
use Tie::File;
my %test=(
a=> [qw(10 14 34 56)],
b=> [qw(56 67 89 66)]
);
my #values= qw(a b);
my $match = "ID";
tie my #lines, 'Tie::File', 'file.txt' or die "failed : $!";
my $i = 0;
for my $line (#lines) {
if ( $line =~ /^($match.*)/ ) {
$line = $1 . "," . join(',', map { $test{$_}->[$i]} #values );
$i++;
}
}
untie #lines;
Output (file.txt):
ID,10,56
ID,14,67
ID,34,89
ID,56,66
Related
I have two files, I need to do comparison to find out the matching and non-matching data. I got two problems now:
Question 1: one of my hashes can only capture the 2nd row of the 'num', i tried to use
push #{hash1{name1}},$x1,$y1,$x2,$y2
but it is still returning the 2nd row of the 'num'.
File1 :
name foo
num 111 222 333 444
name jack
num 999 111 222 333
num 333 444 555 777
File2:
name jack
num 999 111 222 333
num 333 444 555 777
name foo
num 666 222 333 444
This is my code:
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my $input1=$ARGV[0];
my $input2=$ARGV[1];
my %hash1;
my %hash2;
my $name1;
my $name2;
my $x1;
my $x2;
my $y2;
my $y1;
open my $fh1,'<', $input1 or die "Cannot open file : $!\n";
while (<$fh1>)
{
chomp;
if(/^name\s+(\S+)/)
{
$name1 = $1;
}
if(/^num\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/)
{
$x1 = $1;
$y1 = $2;
$x2 = $3;
$y2 = $4;
}
$hash1{$name1}=[$x1,$y1,$x2,$y2];
}
close $fh1;
print Dumper (\%hash1);
open my $fh2,'<', $input2 or die "Cannot open file : $!\n";
while (<$fh2>)
{
chomp;
if(/^name\s+(\S+)/)
{
$name2 = $1;
}
if(/^num\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/)
{
$x1 = $1;
$y1 = $2;
$x2 = $3;
$y2 = $4;
}
$hash2{$name2}=[$x1,$y1,$x2,$y2];
}
close $fh2;
print Dumper (\%hash2);
My output:
$VAR1 = {
'jack' => [
'333',
'444',
'555',
'777'
],
'foo' => [
'111',
'222',
'333',
'444'
]
};
$VAR1 = {
'jack' => [
'333',
'444',
'555',
'777'
],
'foo' => [
'666',
'222',
'333',
'444'
]
};
My expected Output:
$VAR1 = {
'jack' => [
'999',
'111',
'222',
'333',
'333',
'444',
'555',
'777'
],
'foo' => [
'111',
'222',
'333',
'444'
]
};
$VAR1 = {
'jack' => [
'999',
'111',
'222',
'333',
'333',
'444',
'555',
'777'
],
'foo' => [
'666',
'222',
'333',
'444'
]
};
Question 2: I tried to use this foreach loop to do the matching of keys and values and print out in a table format.
I tried this :
print "Name\tx1\tX1\tY1\tX2\tY2\n"
foreach my $k1(keys %hash1)
{
foreach my $k2 (keys %hash2)
{
if($hash1{$name1} == $hash2{$name2})
{
print "$name1,$x1,$y1,$x2,$y2"
}
}
}
but Im getting :
"my" variable %hash2 masks earlier declaration in same scope at script.pl line 67.
"my" variable %hash1 masks earlier declaration in same scope at script.pl line 69.
"my" variable $name1 masks earlier declaration in same scope at script.pl line 69.
"my" variable %hash2 masks earlier declaration in same statement at script.pl line 69.
"my" variable $name2 masks earlier declaration in same scope at script.pl line 69.
syntax error at script.pl line 65, near "$k1("
Execution of script.pl aborted due to compilation errors.
my desired output for matching :
Name x1 y1 x2 y2
jack 999 111 222 333
333 444 555 777
The one direct error is that you assign to a hash element with $hash2{$name2}=[...], what overwrites whatever was at that key before. Thus your output shows for jake the second set of numbers only. You need to push to that arrayref. Some comments on the code are below.
Here is a rudimentary (but working) code. Please note and implement the omitted checks.
use warnings;
use strict;
use feature 'say';
my ($f1, $f2) = #ARGV;
die "Usage: $0 file1 file2\n" if not $f1 or not $f2;
my $ds1 = read_file($f1);
my $ds2 = read_file($f2);
compare_data($ds1, $ds2);
sub compare_data {
my ($ds1, $ds2) = #_;
# Add: check whether one has more keys; work with the longer one
foreach my $k (sort keys %$ds1) {
if (not exists $ds2->{$k}) {
say "key $k does not exist in dataset 2";
next;
}
# Add tests: do both datasets have the same "ref" type here?
# If those are arrayrefs, as expected, are they the same size?
my #data = #{$ds1->{$k}};
foreach my $i (0..$#data) {
if ($data[$i] ne $ds2->{$k}->[$i]) {
say "differ for $k: $data[$i] vs $ds2->{$k}->[$i]";
}
}
}
}
sub read_file {
my ($file) = #_;
open my $fh, '<', $file or die "Can't open $file: $!";
my (%data, $name);
while (<$fh>) {
my #fields = split;
if ($fields[0] eq 'name') {
$name = $fields[1];
next;
}
elsif ($fields[0] eq 'num') {
push #{$data{$name}}, #fields[1..$#fields];
}
}
return \%data;
}
I'm leaving it as an exercise to code the desired format of the printout. The above prints
differ for foo: 111 vs 666
Note comments in code to add tests. As you descend into data structures to compare them you need to check whether they carry the same type of data at each level (see ref) and whether they are of the same size (so you wouldn't try to read past the end of an array). Once you get this kind of work under your belt search for modules for this.
I use eq in comparison of data (in arrayrefs) since it's not stated firmly that they are numbers. But if they are, as it appears to be the case, change eq to == .
Doing a code review would take us too far, but here are a few remarks
When you catch yourself needing such long list of variables think "collections" and reconsider your choice of data structures for the problem. Note that in the example above I didn't need a single scalar variable for data (I used one for temporary storage of the name)
Picking strings apart with a regex is part and parcel of text analysis -- when suitable. Familiarize yourself with other approaches. At this point see split
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 5 years ago.
Improve this question
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use open ':encoding(UTF-8)', ':std';
use List::Util qw( sum );
my $filename = 'data1.txt';
open(my $fh, '<:encoding(UTF-8)', $filename)
or die "Could not open file '$filename' $!";
while (my $row = <$fh>) {
chomp $row;
print "$row\n";
}
my $filename2 = 'data2.txt';
open(my $fh2, '<:encoding(UTF-8)', $filename2)
or die "Could not open file '$filename2' $!";
while (my $row = <$fh2>) {
chomp $row;
print "$row\n";
}
my #last = ();
my %grades = (
Ahmet => {
quiz1 => 97,
quiz2 => 67,
quiz3 => 93,
},
Su => {
quiz1 => 88,
quiz2 => 82,
quiz3 => 99,
});
my %sum;
for my $name (keys %grades){
$sum{$name} = sum(values %{ $grades{$name} });
}
for my $name (sort { $sum{$a} <=> $sum{$b} } keys %sum){
push #last, "$name: $sum{$name}\n";
}
my %grades2 = (
Bugra => {
quiz1 => 33,
quiz2 => 41,
quiz3 => 59,
},
Lale => {
quiz1 => 79,
quiz2 => 31,
quiz3 => 62,
},
);
my %sum2;
for my $name (keys %grades2){
$sum2{$name} = sum(values %{ $grades2{$name} });
}
for my $name (sort { $sum2{$a} <=> $sum2{$b} } keys %sum2){
push #last, "$name: $sum2{$name}\n";
}
my #last1 = sort { lc($a) cmp lc($b) } #last;
print #last1;
This is my code. I want to take values from a text file something like ( marry 10 65 23) and write to a two dimensional array. I managed array separately end of the read text file it has to be seen like grade1 and grade2 for data1.txt and data2.txt. I can pull the values but I couldn't write to two dimensional array. Also result is correct.
I read your question as How do I populate the hashes %grade1 and %grade2 from the files data1.txt and data2.txt?
I also assume that your files data1.txt and data2.txt have the following structure (whitespace separated):
marry 10 65 23
john 20 30 40
I suggest to write a function that takes the filename as paramater and returns a reference to a populated hash:
sub read_grades_from_file
{
my $filename = shift;
my $result = {};
open( my $fh, '<:encoding(UTF-8)', $filename )
or die "Could not open file '$filename' $!\n";
while ( my $row = <$fh> ) {
next unless $row =~ /\S/; # skip empty lines
my ( $name, $quiz1, $quiz2, $quiz3 ) = split( ' ', $row );
$result->{$name} = {
quiz1 => $quiz1,
quiz2 => $quiz2,
quiz3 => $quiz3,
};
}
close($fh);
return $result;
}
The function is used as follows:
my $result = read_grades_from_file('data1.txt'); # returns hashref
my %grade1 = %{$result}; # dereference $result to make it a hash
$result = read_grades_from_file('data2.txt');
my %grade2 = %{$result};
The result of read_grades_from_file is a reference to a hash so it has to be de-referenced and then assigned to %grade. Thus the two steps.
Perhaps the data structures you are using are overly complex. You probably only need a single %grades hash, for example.
The following will take data from space- or tab-separated records from both two input files - ignoring comments or empty lines.
my %grades;
while (1) {
my $row1 = <$data1>;
my $row2 = <$data2>;
last unless (defined $row1 or defined $row2);
chomp ($row1, $row2);
if (defined $row1 and $row1 !~ /(^#|^$)/) {
my ($name, #quizzes) = split /[ \t]/, $row1, 4;
$grades{$name}{'grades1'} = sum(#quizzes);
}
if (defined $row2 and $row1 !~ /(^#|^$)/) {
my ($name, #quizzes) = split /[ \t]/, $row2, 4;
$grades{$name}{'grades2'} = sum(#quizzes);
}
}
To print to STDOUT, you could try the following.
print "Name\tMarks 1\tMarks 2", $/;
for (keys %grades) {
my $name = $grades{$_};
print $_, "\t", $name->{grades1} || '?', "\t", $name->{grades2} || '?', "\t", $/;
}
With data1.txt as
# Grades
Bugra 33 41 59
Mary 10 65 23
Lale 79 31 62
and data2.txt as
# Grades 2
Bugra 49 32 57
Lale 79 31 62
Peter 21 34 42
the output is shown below.
Name Marks 1 Marks 2
Peter ? 97
Lale 172 172
Bugra 133 138
Mary 98 ?
(A '?' indicates that no record exists for the specified student in one of the two input files.)
Please don't comment to say I already asked this, It's a logic question, I know it's mostly similar code but there are underlying syntax problems that I cannot decipher and have spent hours debugging this with no hope and I just really need this answered. And that other account was deleted so I did post this half an hour ago but can't view it. Please only comment if you want to help.
It should work everything is in data and it should be turning up results, i've had it working before so it must just be so syntax thing I'm not noticing. I can't get this work. I'm almost certain it's the grep statement.
#!/usr/bin/perl
use warnings;
use strict;
open ("data", "<text.txt") or die "Can't open"; #
my #data = <data>; #file looking into
close "data"; #
while(<>){
chomp;
my $temp = $_;
my ($name, $number, $expression) = split("\t", $temp);
my $pattern = "\t";
my #found = grep ( /(^$name$pattern\|$pattern$number$)/, #data );
if(defined($found[0])){
print $_;
my ($what, $start, $stop, $chr, $who) = split("\t", $found[0]);
print "\t", $chr, $start, $stop;
#found = ();
}
}
print "\n";
Input is of the format
A1B 1 68
A1C 299 0
A2B 547 0
A2L 877 30
A2M 2 7944
And this is the format of the data file
CLDN8 30214006 30216073 21 68
A1C 20808776 20811809 Y
UBE2Q2P5Y 25431156 25437315 Y
OR5M9 56462469 56463401 11 390162
I want to search for the instances of items in the first or second column of the input file in the data file which should match up with the first and 5th column(which may not exist) respectively
Expected output should be for this example
A1B 1 68 21 30214006 30216073
A1C 299 0 Y 20808776 20811809
But I'm getting nothing
I think what you're looking for is this, but it's really very hard to tell because you have described your problem so poorly
I've had to make a lot of assumptions, but at least the output matches what you say you're expecting
use strict;
use warnings 'all';
my $data_file = 'text.txt';
my #data;
{
open my $fh, '<', $data_file or die qq{Unable to open "$data_file" for input: $1};
while ( <$fh> ) {
next unless /\S/;
push #data, [ split ];
}
}
while ( <> ) {
next unless /\S/;
my ($name, $number, $expression) = split;
for my $item ( #data ) {
my ($what, $start, $stop, $chr, $who) = #$item;
if ( $what eq $name or defined $who and $who eq $expression ) {
print join("\t", $name, $number, $expression, $chr, $start, $stop), "\n";
}
}
}
output
A1B 1 68 21 30214006 30216073
A1C 299 0 Y 20808776 20811809
data.txt
Name:xyz
ID:1
Value: 1 2 3 4 5 6 7 8 9 ...
ID:2
Value: 9 8 7 6 5 4 3 2 1..
ID:3
Value: 90 89 88....
Name:abc
ID:11
value:...
Intial file.txt
## Header
..
data
data
data
..
Final expected file.txt
## Header xyz_1,xyz_2,xyz_3,abc_11,...
..
data 1 9 90
data 2 8 89
data 3 7 88
data 4 6
..
Current output file.txt
## Header xyz_1,xyz_2,xyz_3,abc_11,...
...
data, 1 2 3 4 5 6 7 8 9 ..,9 8 7 6 5 4 3 2 1 ..,90 89 88
data
data
...
Code
#!/usr/local/bin/perl
use diagnostics;
use strict;
use warnings;
use Tie::File;
my #name_id;
my %test;
#local $/ = '';
open my $fh, '<', 'data.txt' or die "failed: $!";
my %var;
while (<$fh>) {
chomp;
if (m/^([A-Z:]+):\s*(.*)/) {
$var{$1} = $2;
if (exists($var{Name}) && exists($var{ID}) && exists($var{value}) && $1 eq 'value') {
my $var_name = "$var{Name}_$var{ID}";
push #name_id, $var_name;
$test{$var_name} = $var{value};
}
}
}
# print join "\n\t", #test{#name_id};
my $match = "## Header";
tie my #lines, 'Tie::File', 'file.txt' or die "failed : $!";
for my $line (#lines) {
if ($line =~ /^($match.*)/) {
$line = $1 . "," . join ',', #name_id;
}
}
untie #lines;
my $match = "data";
tie my #lines, 'Tie::File', 'file.txt' or die "failed : $!";
my $i = 0;
for my $line (#lines) {
if ($line =~ /^($match.*)/) {
$line = $1 . "," . join(',', map { $test{$_}->[$i] } #name_id);
$i++;
}
}
untie #lines;
Have a problem with this line $line = $1 . "," . join (',', map { $test{$_}->[$i]} #name_id); it throws the error
Can't use string ("1 2 3 4 5 6 7 8 9 .."...) as an ARRAY ref while "strict refs" in use at test.pl line 46, line 80. at test.pl line 46
I think the hash(%test) value I had is a string and I can't split it as an array. Please let me know how to convert it to an array. I tried doing $test{$var_name} = [qw($var{value})]; it didnt work.
You may be interested in this refactoring of your code that seems to do what you want.
#!/usr/local/bin/perl
use strict;
use warnings;
use Tie::File;
open my $fh, '<', 'data.txt' or die "failed: $!";
my #name_id;
my %test;
my %var;
while (<$fh>) {
chomp;
if (my ($key, $val) = /^(\w+):\s*(.*)/) {
$var{$key} = $val;
if ($key eq 'value') {
my $var_name = "$var{Name}_$var{ID}";
push #name_id, $var_name;
$test{$var_name} = [ split ' ', $var{value} ];
}
}
}
tie my #lines, 'Tie::File', 'file.txt' or die "failed : $!";
my $count = 0;
for my $line (#lines) {
if ($line =~ /^## Header/) {
$line .= ' ' . join ',', #name_id;
}
elsif ($line =~ /^data/) {
$line .= ' ' . join ' ', map { $test{$_}[$count] // '' } #name_id;
$count++;
}
}
untie #lines;
output (file.txt)
## Header xyz_1,xyz_2 ,xyz_3
data 1 9 90
data 2 8 89
data 3 7 88
data 4 6
This is surely not right:
$test{$_}->[$i]
Because $test{$_} can only contain a string of some sort.
If you have a string and want to split into an arrayref so the above works, do this:
$test{$var_name} = [split /\s+/, $var{value}];
I have no idea what the code is supposed to accomplish which means that it may run, but I can't tell if it does what it is meant to. The odd variable names (like $test and $var_name didn't help me to understand the purpose).
I'm not too sure I followed your code, but I thought I'd post how to transpose the numbers (unless your code already does that :-) ).
#!/usr/bin/perl
use strict;
use warnings;
my (%data, $name);
while (<DATA>) {
if (/^Name:(.+)/) {
$name = $1
}
elsif (/^Value/) {
# transpose
my $r = 0;
push #{ $data{$name}[$r++] }, $_ for /\d+/g;
}
}
use Data::Dumper; print Dumper \%data;
__DATA__
Name:xyz
ID:1
Value: 1 2 3 4 5 6 7 8 9
ID:2
Value: 9 8 7 6 5 4 3 2 1
ID:3
Value: 90 89 88 87 86 85 84 83 82
Name:abc
ID:11
The dumped results are:
$VAR1 = {
'xyz' => [
[
'1',
'9',
'90'
],
[
'2',
'8',
'89'
],
[
'3',
'7',
'88'
],
[
'4',
'6',
'87'
],
[
'5',
'5',
'86'
],
[
'6',
'4',
'85'
],
[
'7',
'3',
'84'
],
[
'8',
'2',
'83'
],
[
'9',
'1',
'82'
]
]
};
I have a two tab separated files that I need to align together. for example:
File 1: File 2:
AAA 123 BBB 345
BBB 345 CCC 333
CCC 333 DDD 444
(These are large files, potentially thousands of lines!)
What I would like to do is to have the output look like this:
AAA 123
BBB 345 BBB 345
CCC 333 CCC 333
DDD 444
Preferably I would like to do this in perl, but not sure how. any help would be greatly appreaciated.
If its just about making a data structure, this can be quite easy.
#!/usr/bin/env perl
# usage: script.pl file1 file2 ...
use strict;
use warnings;
my %data;
while (<>) {
chomp;
my ($key, $value) = split;
push #{$data{$key}}, $value;
}
use Data::Dumper;
print Dumper \%data;
You can then output in any format you like. If its really about using the files exactly as they are, then its a little bit more tricky.
Assuming the files are sorted,
sub get {
my ($fh) = #_;
my $line = <$fh>;
return () if !defined($line);
return split(' ', $line);
}
my ($key1, $val1) = get($fh1);
my ($key2, $val2) = get($fh2);
while (defined($key1) && defined($key2)) {
if ($key1 lt $key2) {
print(join("\t", $key1, $val1), "\n");
($key1, $val1) = get($fh1);
}
elsif ($key1 gt $key2) {
print(join("\t", '', '', $key2, $val2), "\n");
($key2, $val2) = get($fh2);
}
else {
print(join("\t", $key1, $val1, $key2, $val2), "\n");
($key1, $val1) = get($fh1);
($key2, $val2) = get($fh2);
}
}
while (defined($key1)) {
print(join("\t", $key1, $val1), "\n");
($key1, $val1) = get($fh1);
}
while (defined($key2)) {
print(join("\t", '', '', $key1, $val1), "\n");
($key2, $val2) = get($fh2);
}
Similar to Joel Berger's answer, but this approach allows to you keep track of whether files did or did not contain a given key:
my %data;
while (my $line = <>){
chomp $line;
my ($k) = $line =~ /^(\S+)/;
$data{$k}{line} = $line;
$data{$k}{$ARGV} = 1;
}
use Data::Dumper;
print Dumper(\%data);
Output:
$VAR1 = {
'CCC' => {
'other.dat' => 1,
'data.dat' => 1,
'line' => 'CCC 333'
},
'BBB' => {
'other.dat' => 1,
'data.dat' => 1,
'line' => 'BBB 345'
},
'DDD' => {
'other.dat' => 1,
'line' => 'DDD 444'
},
'AAA' => {
'data.dat' => 1,
'line' => 'AAA 123'
}
};
As ikegami mentioned, it assumes that the files' contents are arranged as shown in your example.
use strict;
use warnings;
open my $file1, '<file1.txt' or die $!;
open my $file2, '<file2.txt' or die $!;
my $file1_line = <$file1>;
print $file1_line;
while ( my $file2_line = <$file2> ) {
if( defined( $file1_line = <$file1> ) ) {
chomp $file1_line;
print $file1_line;
}
my $tabs = $file1_line ? "\t" : "\t\t";
print "$tabs$file2_line";
}
close $file1;
close $file2;
Reviewing your example, you show some identical key/value pairs in both files. Given this, it looks like you want to show the pair(s) unique to file 1, unique to file 2, and show the common pairs. If this is the case (and you're not trying to match the files' pairs by either keys or values), you can use List::Compare:
use strict;
use warnings;
use List::Compare;
open my $file1, '<file1.txt' or die $!;
my #file1 = <$file1>;
close $file1;
open my $file2, '<file2.txt' or die $!;
my #file2 = <$file2>;
close $file2;
my $lc = List::Compare->new(\#file1, \#file2);
my #file1Only = $lc->get_Lonly; # L(eft array)only
for(#file1Only) { print }
my #bothFiles = $lc->get_intersection;
for(#bothFiles) { chomp; print "$_\t$_\n" }
my #file2Only = $lc->get_Ronly; # R(ight array)only
for(#file2Only) { print "\t\t$_" }