Perl adding Lines into a Multi-Dimensional Hash - perl

Hello I want to split a Line and add the Values in to a multi dimensional Hash. This is how the Lines look like:
__DATA__
49839382;Test1;bgsae;npvxs
49839384;Test2;bgsae;npvxs
49839387;Test3;bgsae;npvxs
So what I am doing now is:
my %prefix = map { chomp; split ';' } <DATA>;
But now I can only access Test1 with:
print $prefix{"49839382"}
But how can I also add the bgsae to the Hash so I can access is with
$prefix{"49839382"}{"Test1"}
Thank you for your help.

What structure are you trying to build?
use Data::Dumper;
my %prefix = map { chomp (my #fields = split /;/); $fields[0] => { #fields[1 .. $#fields] } } <DATA>;
print Dumper \%prefix;
Output:
$VAR1 = {
'49839384' => {
'Test2' => 'bgsae',
'npvxs' => undef
},
'49839382' => {
'Test1' => 'bgsae',
'npvxs' => undef
},
'49839387' => {
'npvxs' => undef,
'Test3' => 'bgsae'
}
};
Or do you need a deeper hash?
my %prefix;
for (<DATA>) {
chomp;
my $ref = \%prefix;
for (split /;/) {
warn "[$_]";
$ref->{$_} = {};
$ref = $ref->{$_};
}
}
Returns:
$VAR1 = {
'49839384' => {
'Test2' => {
'bgsae' => {
'npvxs' => {}
}
}
},
'49839382' => {
'Test1' => {
'bgsae' => {
'npvxs' => {}
}
}
},
'49839387' => {
'Test3' => {
'bgsae' => {
'npvxs' => {}
}
}
}
};

I don't know what you need the data for, but at a guess you want something more like this.
It builds a hash of arrays, using the first field as the key for the data, and the remaining three in an array for the value. So you can access the test number as $data{'49839382'}[0] etc.
use strict;
use warnings;
my %data = map {
chomp;
my #fields = split /;/;
shift #fields => \#fields;
} <DATA>;
use Data::Dumper;
print Data::Dumper->Dump([\%data], ['*data']);
__DATA__
49839382;Test1;bgsae;npvxs
49839384;Test2;bgsae;npvxs
49839387;Test3;bgsae;npvxs
output
%data = (
'49839384' => [
'Test2',
'bgsae',
'npvxs'
],
'49839382' => [
'Test1',
'bgsae',
'npvxs'
],
'49839387' => [
'Test3',
'bgsae',
'npvxs'
]
);

Related

Extract subset of XML with XML::Twig

I'm trying to use
XML::Twig
to extract a subset of an XML document so that I can convert it to CSV.
Here's a sample of my data
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<Actions>
<Click>
<Field1>Data1</Field1>
<Field2>Data2</Field2>
</Click>
<Click>
<Field1>Data3</Field1>
<Field2>Data4</Field2>
</Click>
</Actions>
And here's an attempt at coding the desired outcome
#!/usr/bin/env perl
use strict;
use warnings;
use XML::Twig;
use Text::CSV; # later
use Data::Dumper;
my $file = shift #ARGV or die "Need a file to process: $!";
my $twig = XML::Twig->new();
$twig->parsefile($file);
my $root = $twig->root;
my #data;
for my $node ( $twig->findnodes( '//Click/*' ) ) {
my $key = $node->name;
my $val = $node->text;
push #data, { $key => $val }
}
print Dumper \#data;
which gives
$VAR1 = [
{
'Field1' => 'Data1'
},
{
'Field2' => 'Data2'
},
{
'Field1' => 'Data3'
},
{
'Field2' => 'Data4'
}
];
What I'm looking to create is an array of hashes, if that's best
my #AoH = (
{ Field1 => 'Data1', Field2 => 'Data2' },
{ Field1 => 'Data3', Field2 => 'Data4' },
)
I'm not sure how to loop through the data to extract this.
You structure has two levels, so you need two levels of loops.
my #data;
for my $click_node ( $twig->findnodes( '/Actions/Click' ) ) {
my %click_data;
for my $child_node ( $click_node->findnodes( '*' ) ) {
my $key = $child_node->name;
my $val = $child_node->text;
$click_data{$key} = $val;
}
push #data, \%click_data;
}
local $Data::Dumper::Sortkeys = 1;
print(Dumper(\#data));
Output:
$VAR1 = [
{
'Field1' => 'Data1',
'Field2' => 'Data2'
},
{
'Field1' => 'Data3',
'Field2' => 'Data4'
}
];

getting the sort keys from a hash

This is the data dumper of \%spec_hash.
It is sorted by group which is a national exchange - and symbol.
foohost:~/walt $ vi /tmp/footoo
$VAR1 = {
'ARCX' => {
'IACI' => 1,
'MCHP' => 1,
},
'AMXO' => {
'YUM' => 1,
'SYK' => 1,
},
'XISX' => {
'FCEL' => 1,
'GPS' => 1,
}
};
I was trying to sort by keys these two hashes but cannot. For debugging purposes I really want to see what is getting pumped out of these hashes
foreach my $exch (sort keys %spec_hash) {
foreach my $exch (sort keys %{$spec_hash{$exch}}) {
If I comment out the dumper and try a regular sort :
#print Dumper(\%spec_hash) ;
foreach my $exch (sort keys %spec_hash) {
#foreach my $exch (sort keys %{$spec_hash{$exch}}) {
print "key: $exch, value: $spec_hash{$exch}\n"
}
this i what I get :
key: AMXO, value: HASH(0x9cc88a4)
key: ARCX, value: HASH(0x9cd6f1c)
key: XISX, value: HASH(0x9cbd5f0)
and trying to print this prints nothing at all :
foreach my $exch (sort keys %{$spec_hash{$exch}}) {
print "key: $exch, value: $spec_hash{$exch}\n"
}
If I understand correctly,
for my $exch (sort keys %spec_hash) {
for my $sym (sort keys %{ $spec_hash{$exch} }) {
print "Exchange: $exch, Symbol: $sym\n";
}
}
You want to loop over every symbol, but they are grouped by exchange, so you must first loop over the exchanges.
Data::Dumper doesn't sort its output by default.
Try adding $Data::Dumper::Sortkeys = 1; to your script.
use strict;
use warnings;
use Data::Dumper;
my %hash = (
'ARCX' => { 'IACI' => 1, 'MCHP' => 1, },
'AMXO' => { 'YUM' => 1, 'SYK' => 1, },
'XISX' => { 'FCEL' => 1, 'GPS' => 1, },
);
print do {
local $Data::Dumper::Sortkeys = 1;
Dumper \%hash;
};
Outputs:
$VAR1 = {
'AMXO' => {
'SYK' => 1,
'YUM' => 1
},
'ARCX' => {
'IACI' => 1,
'MCHP' => 1
},
'XISX' => {
'FCEL' => 1,
'GPS' => 1
}
};
Note: This can be modified to include a sort subroutine that you define

Converting HoA to HoH with counting

Have this code:
use 5.020;
use warnings;
use Data::Dumper;
my %h = (
k1 => [qw(aa1 aa2 aa1)],
k2 => [qw(ab1 ab2 ab3)],
k3 => [qw(ac1 ac1 ac1)],
);
my %h2;
for my $k (keys %h) {
$h2{$k}{$_}++ for (#{$h{$k}});
}
say Dumper \%h2;
produces:
$VAR1 = {
'k1' => {
'aa2' => 1,
'aa1' => 2
},
'k3' => {
'ac1' => 3
},
'k2' => {
'ab1' => 1,
'ab3' => 1,
'ab2' => 1
}
};
Is possible to write the above code with "another way"? (e.g. simpler or more compact)?
Honestly, I don't like the number of times $h2{$k} is evaluated.
my %h2;
for my $k (keys %h) {
my $src = $h{$k};
my $dst = $h2{$k} = {};
++$dst->{$_} for #$src;
}
A subroutine can help make the intent more obvious. Maybe.
sub counts { my %c; ++$c{$_} for #_; \%c }
$h2{$_} = counts(#{ $h{$_} }) for keys %h;
That can be simplified if you do the change in-place.
sub counts { my %c; ++$c{$_} for #_; \%c }
$_ = counts(#$_) for values %h;

not able to access hash of hash of array values

I have written the following code in Perl. The code is reading a pdb file and getting some values. Ignore the top part of the code,where everything is working perfect.
Problem is in the sub-routine part, where I try to store arrays in the hash3 with model as key another key position
the array values can be accessed inside the if condition using this :
$hash3{$model}{$coordinates}[1].
but when I go out of all foreach loop and try to access the elements I only get one value.
Please look at the end foreach loop and tell me is it the wrong way to access the hash values.
The pdb file I am using can be downloaded from this link http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=pdb&compression=NO&structureId=1NZS
#!/usr/bin/perl
open(IN,$ARGV[0]);
my #phosphosites;
my $model=1;
my %hash3;
while(<IN>)
{
#findmod(#line);
#finddist;
#findfreq;
if((/^MODRES/) && (/PHOSPHO/))
{
#line=split;
push(#phosphosites, $line[2]);
#print "$line[4]";
}
foreach $elements (#phosphosites){
if(/^HETATM\s+\d+\s+CA\s+$i/)
{
#line1=split;
#print "$line1[5]";
#print "$line1[6] $line1[7] $line1[8]\n";
push(#phosphositesnum, $line1[5]);
}
}
$pos=$line1[5];
#findspatial(\#line,\#line1);
}
my #ori_data=removeDuplicates(#phosphositesnum);
sub removeDuplicates {
my %seen = ();
my #vals = ();
foreach my $i (#_) {
unless ($seen{$i}) {
push #vals, $i;
$seen{$i} = 1;
}
}
return #vals;
}
$a=(#phosphosites);
print "$a\n";
print "#phosphosites\n";
print "#ori_data\n";
close(IN);
open(IN1,$ARGV[0]);
my (#data)=<IN1>;
spatial(\#ori_data);
sub spatial {
my #spatial_array1=#{$_[0]};
foreach $coordinates(#spatial_array1)
{
$model=1;
{foreach $data1(#data){
if($data1=~ m/^HETATM\s+\d+\s+CA\s+[A-Z]*\s+[A-Z]*\s+$coordinates/)
{
#cordivals=split(/\s+/,$data1);
push #{ $sphash{$model} },[$cordivals[6], $cordivals[7], $cordivals[8]];
$hash3{$model}{$coordinates}= \#cordivals;
#print "$model $coordinates $hash3{$model}{$coordinates}[6] $hash3{$model}{$coordinates}[7] $hash3{$model}{$coordinates}[8]\n";
#print "$model $sphash{$model}[$i][0] $sphash{$model}[$i][1] $sphash{$model}[$i][2]\n";
}
elsif($data1=~ m/^ENDMDL/)
{
$model++;
}
#print "$model $coordinates $hash3{$model}{$coordinates}[6] $hash3{$model}{$coordinates}[7] $hash3{$model}{$coordinates}[8]\n";
}
}
}
#foreach $z1 (sort keys %hash3)
# {
# foreach $z2(#spatial_array1){
# print "$z1 $z2";
# print "$hash3{$z1}{$z2}[6]\n";
# print "$z2\n";
# }
# }
}
After using the Data::Dumper option it is giving me this kind of output
$VAR1 = {
'11' => {
'334' => [
'HETATM',
'115',
'CA',
'SEP',
'A',
'343',
'-0.201',
'-2.884',
'1.022',
'1.00',
'99.99',
'C'
],
'342' => $VAR1->{'11'}{'334'},
'338' => $VAR1->{'11'}{'334'},
'335' => $VAR1->{'11'}{'334'},
'340' => $VAR1->{'11'}{'334'},
'343' => $VAR1->{'11'}{'334'},
'336' => $VAR1->{'11'}{'334'}
},
'7' => {
'334' => $VAR1->{'11'}{'334'},
'342' => $VAR1->{'11'}{'334'},
'338' => $VAR1->{'11'}{'334'},
'335' => $VAR1->{'11'}{'334'},
'340' => $VAR1->{'11'}{'334'},
'343' => $VAR1->{'11'}{'334'},
'336' => $VAR1->{'11'}{'334'}
},
'2' => {
'334' => $VAR1->{'11'}{'334'},
'342' => $VAR1->{'11'}{'334'},
...
Change:
#cordivals=split(/\s+/,$data1);
to:
my #cordivals=split(/\s+/,$data1);
What seems to be happening is that all the hash elements contain references to the same array variable, because you're not making the variable local to that iteration.
In general, you should use my with all variables.

How do I sort hash of hashes by value using perl?

I have this code
use strict;
use warnings;
my %hash;
$hash{'1'}= {'Make' => 'Toyota','Color' => 'Red',};
$hash{'2'}= {'Make' => 'Ford','Color' => 'Blue',};
$hash{'3'}= {'Make' => 'Honda','Color' => 'Yellow',};
foreach my $key (keys %hash){
my $a = $hash{$key}{'Make'};
my $b = $hash{$key}{'Color'};
print "$a $b\n";
}
And this out put:
Toyota Red Honda Yellow Ford Blue
Need help sorting it by Make.
#!/usr/bin/perl
use strict;
use warnings;
my %hash = (
1 => { Make => 'Toyota', Color => 'Red', },
2 => { Make => 'Ford', Color => 'Blue', },
3 => { Make => 'Honda', Color => 'Yellow', },
);
# if you still need the keys...
foreach my $key ( #
sort { $hash{$a}->{Make} cmp $hash{$b}->{Make} } #
keys %hash
)
{
my $value = $hash{$key};
printf( "%s %s\n", $value->{Make}, $value->{Color} );
}
# if you don't...
foreach my $value ( #
sort { $a->{Make} cmp $b->{Make} } #
values %hash
)
{
printf( "%s %s\n", $value->{Make}, $value->{Color} );
}
print "$_->{Make} $_->{Color}" for
sort {
$b->{Make} cmp $a->{Make}
} values %hash;
plusplus is right... an array of hashrefs is likely a better choice of data structure. It's more scalable too; add more cars with push:
my #cars = (
{ make => 'Toyota', Color => 'Red' },
{ make => 'Ford' , Color => 'Blue' },
{ make => 'Honda' , Color => 'Yellow' },
);
foreach my $car ( sort { $a->{make} cmp $b->{make} } #cars ) {
foreach my $attribute ( keys %{ $car } ) {
print $attribute, ' : ', $car->{$attribute}, "\n";
}
}