List array find double and add value - perl

the original perl array is sorted and looks like this:
Original ARRARY:
ccc-->2
ccc-->5
abc-->3
abc-->7
cb-->6
and i like to have the following result:
FINAL ARRARY:
ccc-->7
abc-->10
cb-->6
Question:
can you please create a subroutine for that ?
this was the orig. subroutine that i used:
sub read_final_dev_file {
$dfcnt=0;
$DEVICE_ANZSUMZW=0;
$DEVICE_ANZSUM=0;
open(DATA,"$log_dir1/ALLDEVSORT.$log_file_ext1") || die ("Cannot Open Logfile: $log_dir1/$log_DEV_name.$log_file_ext1 !!!!");
#lines = <DATA>;
close(DATA);
chomp(#lines); # erase the last sign from a string
foreach $logline (#lines) {
if ($logline =~ /(.*)-->(.*)/) {
$DEVICE_CODE[$dfcnt] = $1;
$DEVICE_ANZAHL[$dfcnt] = $2;
print "DEVICE_final = $DEVICE_CODE[$dfcnt], D_ANZAHL_final = $DEVICE_ANZAHL[$dfcnt]\n";
if ($dfcnt > 0 ) {
if ( $DEVICE_CODE[$dfcnt] eq $DEVICE_CODE[$dfcnt-1] ) {
$DEVICE_ANZSUM = $DEVICE_ANZAHL[$dfcnt] + $DEVICE_ANZAHL[$dfcnt-1];
$DEVICE_ANZSUMZW = $DEVICE_ANZSUM++;
#$DEVICE_ANZSUM = $DEVICE_ANZAHL[$dfcnt]++;
#print "DEVICE_ANZAHL = $DEVICE_ANZAHL[$dfcnt],DEVICE_ANZAHL -1 = $DEVICE_ANZAHL[$dfcnt-1]\n";
print "DEVICE_eq = $DEVICE_CODE[$dfcnt], D_ANZAHL_eq = $DEVICE_ANZAHL[$dfcnt],DEVANZSUM = $DEVICE_ANZSUM,COUNT = $dfcnt\n";
}#end if
if ( $DEVICE_CODE[$dfcnt] ne $DEVICE_CODE[$dfcnt-1] ) {
#$DEVICE_ANZSUM=0;
#splice(#data3,$dfcnt+2,1) if ($DEVICE_ANZSUM > 1);
push (#data3,$DEVICE_ANZSUMZW) if ($DEVICE_ANZSUM > 1);
push (#data3,$DEVICE_ANZAHL[$dfcnt]) if ($DEVICE_ANZSUM == 0);
if ( $DEVICE_CODE[$dfcnt] ne $DEVICE_CODE[$dfcnt-1] ) {
$DEVICE_ANZSUM=0;
}
print "DEVICE_ne = $DEVICE_CODE[$dfcnt], D_ANZAHL_ne = $DEVICE_ANZAHL[$dfcnt], DEVANZSUM = $DEVICE_ANZSUM\n";
}#end if
}#end if $dfcnt
$dfcnt++;
}#end if logline
}#end for
print "#labels3\n";
print "#data3\n";
}#end sub read_final_dev_file

Probably not the best way, but this is what came to mind after seeing LeoNerd answer, since I don't have CPAN access in production and never have modules lying around:
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my #input = (
[ ccc => 2 ],
[ ccc => 5 ],
[ abc => 3 ],
[ abc => 7 ],
[ cb => 6 ],
);
my %output;
$output{$_->[0]} += $_->[1] for #input;
print Dumper \%output;
my #output = map { [ $_ => $output{$_} ] } keys(%output);
print Dumper \#output;
Output:
$VAR1 = {
'abc' => 10,
'cb' => 6,
'ccc' => 7
};
$VAR1 = [
['abc', 10],
['cb', 6],
['ccc', 7],
];

You could use List::UtilsBy::partition_by to group the original list into partitions, by the first string:
use List::UtilsBy qw( partition_by );
my #input = (
[ ccc => 2 ],
[ ccc => 5 ],
[ abc => 3 ],
[ abc => 7 ],
[ cb => 6 ],
);
my %sets = partition_by { $_->[0] } #input;
Now you have a hash, keyed by the leading strings, whose values are all the ARRAY refs with that key first. You can now sum the values within them, by mapping over $_->[1] which contains the numbers:
use List::Util qw( sum );
my %totals;
foreach my $key ( keys %sets ) {
$totals{$key} = sum map { $_->[1] } #{ $sets{$key} };
}
If you're inclined towards code of a more compact and functional-looking nature, you could instead use the new pairmap here; making the whole thing expressible in one line:
use List::UtilsBy qw( partition_by );
use List::Util qw( pairmap sum );
my %totals = pairmap { $a => sum map { $_->[1] } #$b }
partition_by { $_->[0] } #input;
Edit: I should add that even though you stated in your original question that the array was sorted, this solution doesn't require it sorted. It will happily take the input in any order.

You can simplify your subroutine a lot by using a hash to track the counts instead of an array. The following uses an array #devices to track the order and a hash %device_counts to track the counts:
my #devices;
my %device_counts;
while (<DATA>) { # Read one line at a time from DATA
if (/(.*)-->(.*)/) { # This won't extract newlines so no need to chomp
if (!exists $device_counts{$1}) {
push #devices, $1; # Add to the array the first time we encounter a device
}
$device_counts{$1} += $2; # Add to the count for this device
}
}
for my $device (#devices) {
printf "%s-->%s\n", $device, $device_counts{$device};
}

Related

perl: subroutine returns 0 instead of specified array

I have a hash of hashes like this:
my %HoH = (
flintstones => {
1 => "fred",
2 => "barney",
},
jetsons => {
1 => "george",
2 => "jane",
},
simpsons => {
1 => "homer",
2 => "marge",
},
);
My subroutine is meant to search through the values of a specified key, e.g. search all 2s for e and return the value for key 1 in each case.
It works since it can print those things just fine, and I can also print it to a text file. I also want the same lines to be pushed to an array #output.
Why does my subroutine return zero which is saved in $hej in this case.
sub search_hash {
# Arguments are
#
# $hash=hash ref
# $parameter1=key no. to search in
# $parameter2=value to find
# $parameter3=name of text file to write to
my ( $hash, $parameter1, $parameter2, $parameter3 ) = #_, ;
# Loop over the keys in the hash
foreach ( keys %{$hash} ) {
# Get the value for the current key
my $value = $hash->{$_};
my $value2 = $hash->{'1'};
search_hash( $value, $parameter1, $parameter2, $parameter3 );
for my $key ( $parameter1 ) {
my #output; #create array for loop outputs to be saved
if ( $value =~ $parameter2 ) {
push #output, "$value2"; #push lines to array
print "Value: $value\n";
print "Name: $value2\n";
open( my $fh, '>>', $parameter3 );
print $fh ( "$value2\n" );
close $fh;
}
return #output;
}
}
}
my $hej = search_hash( \%HoH, "2", 'e', 'data3.txt' );
print $hej;
output
Can't use string ("fred") as a HASH ref while "strict refs" in use
There is no key "1" in first loop of your hash. Recursive subroutine is not a good choice here.
my $value2 = $hash->{'1'};
Borodin's one line code is great. But we should search 2 s.
search all 2 s for e and return the value for key 1 in each case.
As a summary, search_hash.pl
use strict;
use warnings;
use utf8;
my %HoH = (
Flintstones => { 1 => "Fred", 2 => "Barney" },
Jetsons => { 1 => "George", 2 => "Jane" },
Simpsons => { 1 => "Homer", 2 => "Marge" }
);
my #output2 = map { $_->{1} } grep { $_->{2} =~ /e/ } values %HoH;
open( my $fh, '>', "data3.txt");
print $fh ( "$_\n" ) foreach #output2;
close $fh;
And
perl search_hash.pl
cat data3.txt
OUTPUT:
Fred
Homer
George
The return expression of subroutine is evaluated in the same context as the subroutine itself. Since you're assuming the result of the subroutine to a scalar, the subroutine is evaluated in scalar context, and #output is evaluated in scalar context. In scalar context, an array returns the number of elements it contains. In this case, #output happened to be empty, so search_hash returned zero.
If you want the elements of #output instead of the number of elements in #output, you will need to call the subroutine in list context. Assigning the result to an array is one way of doing that.
This is how I fixed the problem in the rewrite posted below. Note that I replaced the scalar $hej with the array #hej below.
I also fixed other problems. The values for key 1 from all three
2nd level hashes are now returned, because each of them contains a value for key 2, which contains e (the value to find). See below.
use strict;
use warnings;
my %HoH = (
Flintstones => { 1 => "Fred", 2 => "Barney" },
Jetsons => { 1 => "George", 2 => "Jane" },
Simpsons => { 1 => "Homer", 2 => "Marge" }
);
sub search_hash {
# Arguments:
# $hash: hash ref
# $search_key: key to search in each 2-nd level hash
# $search_string: value to find
my ( $hash, $search_key, $search_string ) = #_;
my #output;
foreach ( keys %{$hash} ) {
#print "Key: $_\n";
my $hash2 = $hash->{$_}; # 2-nd level hash (reference to)
my $search_val = $hash2->{$search_key}; # Value for key == parameter1
#print "Value: $search_val\n";
if ($search_val =~ /\Q$search_string/) {
my $id = $hash2->{'1'};
#print "Name: $id\n";
push #output, $id;
}
}
return #output;
}
my #hej = search_hash( \%HoH, '2', 'e' );
print "Result: #hej\n";

perl: assign interval to number with hash keys, increment counter if number is in interval

I have several pairs of numbers and want to check whether the first one falls into a given interval. If so, I want to increment one of two counters for that interval, depending on whether the second number of the pair is higher or lower than the first.
The intervals are simple and look like 1-10,11-20,21-30 etc. The pairs of numbers look like (5,15),(24,13) etc. But I have several thousand of each. The exact format of input does not matter so much.
The desired outcome looks like this
1-10: higher=1, lower=0
11-20: higher=0,lower=0
21-30: higher=0,lower=1
My idea was to create a hash with the interval start as key and store the two counters in the value.
for(my $i = $start;$i<=$end;$i = ($i+$intervalsize)){
my $counter1 = 0;
my $counter2 = 0;
#{$hash{$i}} = ($counter1,$counter2);
but now I don't know how to compare the numbers with the keys and how to address the counters.
Thank you for any help!
Something like this maybe?
#!/usr/bin/env perl
use strict;
use warnings;
use List::MoreUtils qw( first_index );
sub interval_to_key { sprintf '(%d,%d)', #{ $_[0] } }
my #intervals = (
[ 1, 10 ],
[ 11, 20 ],
[ 21, 30 ],
);
my #upper_bounds = map $_->[0], #intervals;
my #pairs = (
[ 5, 15 ],
[ 24, 13 ],
);
my #keys = qw( ascending descending );
my %counts;
for my $interval ( #intervals ) {
#{ $counts{ interval_to_key($interval) } }{ #keys } = (0) x #keys;
}
for my $pair ( #pairs ) {
my $is_ascending = ($pair->[0] <= $pair->[1]);
my $i = first_index { $pair->[0] <= $_ } #upper_bounds;
++ $counts{ interval_to_key( $intervals[$i]) }{ $keys[1 - $is_ascending] };
}
use Data::Dumper;
print Dumper \%counts;
If the counters are to be kept for each interval we can record them in a data structure of choice. For example, store counters for each interval in a hash with the key being the interval index and low-high counters being in a two-element refarray as its value.
use warnings 'all';
use strict;
my #intervals = ( [1,10], [11,20], [21,30] );
my #pairs = ( [5,15], [24, 13] );
my %counter = map { $_ => [0,0] } (0..$#intervals); # initialize
foreach my $i (0..$#intervals)
{
foreach my $pair (#pairs)
{
my ($beg, $end) = #{$intervals[$i]};
my ($pl, $pr) = #$pair;
if ($pl >= $beg and $pl <= $end)
{
if ($pr > $pl) { ++$counter{$i}[1] }
else { ++$counter{$i}[0] }
}
}
}
print "$_ => #{$counter{$_}}\n" for sort keys %counter;
This is still rudimentary, in order to be easier to adjust. For one thing, since the hash uses the index of #intervals array it may well be an array (with arrayrefs) itself. Another possibility is to use a hash with keys like low and high for counters instead of a two-element array.
In order to use start of interval as a key
my %counter = map { $_->[0] => [0,0] } #intervals; # initialize
for my $inter (#intervals) {
for my $pair (#pairs) {
my ($beg, $end) = #$inter;
my ($pl, $pr) = #$pair;
if ($pl >= $beg and $pr <= $end)
{
if ($pr > $pl) { ++$counter{$beg}[1] }
else { ++$counter{$end}[0] }
}
}
}
printf("%3d => #{$counter{$_}}\n", $_) for sort keys %counter;

Argument "*" isn't numeric in array element

I want to make a hash of array from a file that looks like:
xx500173:56QWER 45 A rtt34 34C
...
I would like to have a unique "key" (e.g. column1_column2)
#!/usr/bin/perl
use warnings;
use strict;
my $seq;
while(<>){
chomp;
my #line = split(/\s+/, $_);
my $key = $line[0] . "_" . $line[1]; #try to make a unique key for each entry
map { $seq->{ $_->[$key] } = [#$_[0..4]] } [ split/\s+/ ];
}
foreach my $s (keys %{$seq} ) {
print $s,": ",join( "\t", #{ $seq->{$s}} ) . "\n";
}
but I get the following error:
Argument "xx500173:56QWER_45" isn't numeric in array element
Does is it matter if key is numeric or string?
An index to an array [] should be numeric, but $key is not numeric. Assuming you want all the white-space-separated tokens as elements of your array:
use warnings;
use strict;
my $seq;
while (<DATA>) {
chomp;
my #line = split;
my $key = $line[0] . "_" . $line[1]; #try to make a unique key for each entry
$seq->{$key} = [ #line ];
}
foreach my $s ( keys %{$seq} ) {
print $s, ": ", join( "\t", #{ $seq->{$s} } ) . "\n";
}
__DATA__
xx500173:56QWER 45 A rtt34 34C
Outputs:
xx500173:56QWER_45: xx500173:56QWER 45 A rtt34 34C
You have confused yourself with the line
map { $seq->{ $_->[$key] } = [#$_[0..4]] } [ split/\s+/ ];
which is wrong because
map is an operator for translating one list into another by performing the same operation on every element of the input list, but you are ignoring the returned value
The input list is only one item long - the array reference returned by [ split/\s+/ ]
What you have written is the same as
$_ = [ split /\s+/ ];
$seq->{ $_->[$key] } = [ #$_[0..4] ];
and the problem is that $_->[$key] tries to index the anonymous array using the string $key, which is clearly wrong.
All you need here is
$seq->{$key} = [ #line[0..4] ];
and your complete program should look like this
#!/usr/bin/perl
use strict;
use warnings;
my $seq;
while ( <> ) {
chomp;
my #line = split;
$seq->{"$line[0]_$line[1]"} = [ #line[0..4] ];
}
for my $s ( keys %{$seq} ) {
printf "%s: %s\n", $s, join("\t", #{ $seq->{$s} } );
}

Generate unordered pairs from list in perl

Perl newbie here.
I want to loop over pairs of words, coming from a list of words:
#words = ("word1", "word2", "word3", "word4");
I want to create and process all pairs of words, but pair ordering is not important, i.e. the pairs
("word1", "word2") and ("word2, "word1") are considered the same and only one of them should be generated.
Is there an easy way to do this? The obvious solution would be to have an nested loop somewhat like:
for my $i1 (0 ... $#words) {
for my $i2 ($i1 + 1 ... $#words) {
process_pair(words[$i1], words[$i2])
}
}
but I am looking for something more Perl-esque than this. Thanks in advance.
For a fixed R=2, the solution you presented is quite appropriate.
for my $i1 (0 ... $#words) {
for my $i2 ($i1 + 1 ... $#words) {
process_pair($words[$i1], $words[$i2])
}
}
But what if R was larger or variable? You can do some powerful stuff using NestedLoops.
use Algorithm::Loops qw( NestedLoops );
my $R = 2;
NestedLoops(
[ [ 0..$#words ],
( sub { [$_+1..$#words] } ) x ($R-1),
],
\&process_pair,
);
Or as an iterator
use Algorithm::Loops qw( NestedLoops );
my $R = 2;
my $iter = NestedLoops([
[ 0..$#words ],
( sub { [$_+1..$#words] } ) x ($R-1),
]);
while (my #combo = $iter->()) {
process_pair(#combo);
}
But that's hardly readable. Solutions that specifically address this problem are going to be cleanest.
use Math::Combinatorics qw( );
my $R = 2;
my $iter = Math::Combinatorics->new( count => $R, data => \#words );
while (my #combo = $iter->next_combination) {
process_pair(#combo);
}

Perl - Hash of hash and columns :(

I've a set of strings with variable sizes, for example:
AAA23
AB1D1
A1BC
AAB212
My goal is have in alphabetical order and unique characters collected for COLUMNS, such as:
first column : AAAA
second column : AB1A
and so on...
For this moment I was able to extract the posts through a hash of hashes. But now, how can I sort data? Could I for each hash of hash make a new array?
Thank you very much for you help!
Al
My code:
#!/usr/bin/perl
use strict;
use warnings;
my #sessions = (
"AAAA",
"AAAC",
"ABAB",
"ABAD"
);
my $length_max = 0;
my $length_tmp = 0;
my %columns;
foreach my $string (#sessions){
my $l = length($string);
if ($l > $length_tmp){
$length_max = $l;
}
}
print "max legth : $length_max\n\n";
my $n = 1;
foreach my $string (#sessions){
my #ch = split("",$string);
for my $col (1..$length_max){
$columns{$n}{$col} = $ch[$col-1];
}
$n++;
}
foreach my $col (keys %columns) {
print "colonna : $col\n";
my $deref = $columns{$col};
foreach my $pos (keys %$deref){
print " posizione : $pos --> $$deref{$pos}\n";
}
print "\n";
}
exit(0);
What you're doing is rotating the array. It doesn't need a hash of hash or anything, just another array. Surprisingly, neither List::Util nor List::MoreUtils supplies one. Here's a straightforward implementation with a test. I presumed you want short entries filled in with spaces so the columns come out correct.
#!/usr/bin/perl
use strict;
use warnings;
use Test::More;
use List::Util qw(max);
my #Things = qw(
AAA23
AB1D1
A1BC
AAB212
);
sub rotate {
my #rows = #_;
my $maxlength = max map { length $_ } #rows;
my #columns;
for my $row (#rows) {
my #chars = split //, $row;
for my $colnum (1..$maxlength) {
my $idx = $colnum - 1;
$columns[$idx] .= $chars[$idx] || ' ';
}
}
return #columns;
}
sub print_columns {
my #columns = #_;
for my $idx (0..$#columns) {
printf "Column %d: %s\n", $idx + 1, $columns[$idx];
}
}
sub test_rotate {
is_deeply [rotate #_], [
"AAAA",
"AB1A",
"A1BB",
"2DC2",
"31 1",
" 2",
];
}
test_rotate(#Things);
print_columns(#Things);
done_testing;
You can sort the output of %columns in your code with
foreach my $i (sort { $a <=> $b } keys %columns) {
print join(" " => sort values %{ $columns{$i} }), "\n";
}
This gives
A A A A
A A A C
A A B B
A A B D
But using index numbers as hash keys screams that you should use an array instead, so let's do that. To get the columns, use
sub columns {
my #strings = #_;
my #columns;
while (#strings) {
push #columns => [ sort map s/^(.)//s ? $1 : (), #strings ];
#strings = grep length, #strings;
}
#columns;
}
Given the strings from your question, it returns
A A A A
1 A A B
1 A B B
2 2 C D
1 1 3
2
As you can see, this is unsorted and repeats characters. With Perl, when you see the word unique, always think of hashes!
sub unique_sorted_columns {
map { my %unique;
++$unique{$_} for #$_;
[ sort keys %unique ];
}
columns #_;
}
If you don't mind destroying information, you can have columns sort and filter duplicates:
sub columns {
my #strings = #_;
my #columns;
while (#strings) {
my %unique;
map { ++$unique{$1} if s/^(.)//s } #strings;
push #columns => [ sort keys %unique ];
#strings = grep length, #strings;
}
#columns;
}
Output:
A
1 A B
1 A B
2 C D
1 3
2