perl: naming hash variables read from YAML - perl

I'm reading some information from a YAML file
groceries.yaml
# comment
fruit:
apples: 1
oranges: 1
grapes: 1
vegetables:
potatoes: 1
onions: 1
leeks: 1
into a perl script
myscript.pl
#!/usr/bin/perl
use strict;
use warnings;
use YAML::Tiny;
# Create a YAML file
my $stuff = YAML::Tiny->new;
# Open the config
$stuff = YAML::Tiny->read( 'groceries.yaml' );
print "Fruit: ", %{($stuff->[0]->{fruit})},"\n";
print "Vegetables: ", %{($stuff->[0]->{vegetables})},"\n";
exit
This works fine, but I would like to have one hash for fruit and one for vegetables. My naive attempt was
my #keys = keys %{($stuff->[0])};
foreach my $key (#keys){
my %{ $key } = %{($stuff->[0]->{$key})},"\n";
}
but clearly this doesn't work.
I'd love to understand what I'm doing wrong, and am open to different work flows that accomplish the same idea :)

Try this :
#!/usr/bin/perl
use strict;
use warnings;
use YAML::Tiny;
use Data::Dumper;
# Create a YAML file
my $stuff = YAML::Tiny->new;
# Open the config
$stuff = YAML::Tiny->read( 'groceries.yaml' );
my %fruits = %{ $stuff->[0]->{fruit} };
my %vegetables = %{ $stuff->[0]->{vegetables} };
I don't know why you put some parentheses in your code :
%{($stuff->[0]->{$key})},"\n";
I think this is the problem.
To iterate over the HASHes,
use Data::Dumper;
# ...
foreach my $key (keys %{ $stuff->[0] }) {
print Dumper $stuff->[0]->{$key};
}
Edit2
#!/usr/bin/perl
use strict;
use warnings;
use YAML::Tiny;
use Data::Dumper;
# Create a YAML file
my $stuff = YAML::Tiny->new;
# Open the config
$stuff = YAML::Tiny->read( 'groceries.yaml' );
my %top_h;
foreach my $key (keys %{ $stuff->[0] }) {
$top_h{$key} = $stuff->[0]->{$key};
}
print Dumper \%top_h;

This solution enables you to access %fruit and %vegetables. They are declared as package global variables using our so that they will be in the symbol table, which then allows you to do use symbolic references or glob assignments. You'll also need to turn off strict refs to enable this. Also see this reference.
use strict;
use warnings;
use YAML::Tiny;
use Data::Dumper;
my $stuff = YAML::Tiny->read('groceries.yml');
my %groceries = %{$stuff->[0]};
our %fruit;
our %vegetables;
{
no strict 'refs';
#no strict 'vars'; # don't need above 'our' declarations with this
while (my ($key, $val) = each %groceries) {
%$key = %$val;
# or *$key = $val;
}
}
print Dumper \%fruit;
If you don't know the keys upfront, then you'll also need to turn off strict vars so you don't need to declare the hashes before assigning to them. But then you might get a warning when you use the hash directly.
But having said all of that, I think it would be simplest to just use %groceries.

my ( $fruit, $vegetables) = #{$stuff->[0]}{ qw<fruit vegetables> };
If you want to do this in a loop, first, I would save the first "document" to a local reference.
my $yaml = $stuff->[0];
And then in a while loop, do this:
while ( my ( $k, $v ) = each %$yaml ) {
say ucfirst( $k ) . ': ' . %$v;
}
You could also use List::Pairwise and do this:
mapp { say ucfirst( $a ) . ': ' . %$b } %{ $stuff->[0] };

Related

How can I combine Data::Dumper and Statistics::Diversity::Shannon into a whole loop?

I want to combine this two functions together to get Shannon Diversity Index.
How can do ?
The first function is using Data::Dumper to get the unique numbers.
#!perl
use warnings;
use strict;
use Data::Dumper;
$Data::Dumper::Sortkeys=1;
my #names = qw(A A A A B B B C D);
my %counts;
$counts{$_}++ for #names;
printf "\$VAR1 = { %s};\n",
join ' ',
map "$_ ",
sort { $b <=> $a }
values(%counts);
exit;
This is the output
$VAR1 = { 4 3 1 1 };
Then I can input it into the second function.
The second function is using Statistics::Diversity::Shannon to get Shannon Diversity Index.
#!perl
use warnings;
use strict;
use Statistics::Diversity::Shannon;
my #data = qw( 4 3 1 1 );
my $d = Statistics::Diversity::Shannon->new( data => \#data );
my $H = $d->index();
my $E = $d->evenness();
print "$d/$H/$E";
exit;
How can I combine this two functions into a whole loop by using the original data set (A A A A B B B C D) to get the Shannon Diversity Index.
Data::Dumper is a debugging tool, not a serializing too. Not a good one, at least.
But you aren't even using Data::Dumper. You're using something far worse.
Let's start by using something acceptable like JSON.
#!/usr/bin/perl
use strict;
use warnings;
use Cpanel::JSON::XS qw( encode_json );
{
my #names = qw( A A A A B B B C D );
my %counts; ++$counts{$_} for #names;
my #data = sort { $b <=> $a } values(%counts);
print encode_json(\#data);
}
(Note that the sort { $b <=> $a } doesn't appear required.)
And this is one way to read it back in:
#!/usr/bin/perl
use strict;
use warnings;
use Cpanel::JSON::XS qw( decode_json );
use Statistics::Diversity::Shannon qw( );
{
my $json = do { local $/; <> };
my $data = decode_json($json);
my $d = Statistics::Diversity::Shannon->new( data => $data );
my $H = $d->index();
my $E = $d->evenness();
print "$H/$E\n";
}
Above, I assumed you meant "work together" when you said "combine into whole loop".
On the other hand, maybe you meant "combine into a single file". If that's the case, then you can use the following:
#!/usr/bin/perl
use strict;
use warnings;
use Statistics::Diversity::Shannon qw( );
{
my #names = qw( A A A A B B B C D );
my %counts; ++$counts{$_} for #names;
my #data = values(%counts);
my $d = Statistics::Diversity::Shannon->new( data => \#data );
my $H = $d->index();
my $E = $d->evenness();
print "$H/$E\n";
}
Your first code snippet does not use Data::Dumper correctly. Data::Dumper mainly provides one function, Dumper, which outputs any data in a format that can be interpreted as Perl code.
# instead of printf "\$VAR1 = ...
print Dumper([values %counts]);
Since the output of Data::Dumper::Dumper is Perl code, you can read it by evaluating it as Perl code (with eval).
So if your first script writes output to a file called some.data, your second script can call
my $VAR1;
open my $fh, "<", "some.data";
eval do { local $/; <$fh> }; # read data from $fh and call eval on it
# now the data from the first script is in $VAR1
my $d = Statistics::Diversity::Shannon->new( data => $VAR1 );
...

if exist a hash key add the new value to existing value

I have a hash structure and I want to add new value to the existing value (not update with new value ).
here is my code.
use strict;
use warnings;
my %hash;
while(<DATA>){
my $line=$_;
my ($ID)=$line=~/ID=(.*?);/;
#make a hash with ID as key
if (!exists $hash{$ID}){
$hash{$ID}= $line;
}
else{
#add $line to the existing value
}
}
for my $key(keys %hash){
print $key.":".$hash{$key}."\n";
}
__DATA__
ID=13_76; gi|386755343
ID=13_75; gi|383750074
ID=13_75; gi|208434224
ID=13_76; gi|410023515
ID=13_77; gi|499086767
else{
$hash{$ID} .= $line;
}
All you need is $hash{$ID} .= $line;. No if-elses.
If there's no key $ID in the hash it would create one and concatenate $line to empty string, resulting exactly what you need.
You should store your data in a hash of arrays:
#!/usr/bin/env perl
use strict;
use warnings;
# --------------------------------------
use charnames qw( :full :short );
use English qw( -no_match_vars ); # Avoids regex performance penalty
use Data::Dumper;
# Make Data::Dumper pretty
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Indent = 1;
# Set maximum depth for Data::Dumper, zero means unlimited
local $Data::Dumper::Maxdepth = 0;
# conditional compile DEBUGging statements
# See http://lookatperl.blogspot.ca/2013/07/a-look-at-conditional-compiling-of.html
use constant DEBUG => $ENV{DEBUG};
# --------------------------------------
my %HoA = ();
while( my $line = <DATA> ){
if( my ( $ID ) = $line =~ m{ ID \= ([^;]+) }msx ){
push #{ $HoA{$ID} }, $line;
}
}
print Dumper \%HoA;
__DATA__
ID=13_76; gi|386755343
ID=13_75; gi|383750074
ID=13_75; gi|208434224
ID=13_76; gi|410023515
ID=13_77; gi|499086767

Populating Automatic Perl Variables when using Quantifiers

I was trying to match the following line
5474c2ef012a759a c11ab88ae8daa276 63693b53799c91f1 be1d8c8738733d80
with
if(/[[:xdigit:]{8}[:xdigit:]{8}\s]{4}/)
Is there anyway I populate the automatic variables $1,$2,$3..$8 etc with half of each of those words.
i.e
$1=5474c2ef
$2=012a759a
$3=c11ab88a
$4=e8daa276
$5=63693b53
$6=799c91f1
$7=be1d8c87
$8=38733d80
You could capture them in an array:
use strict;
use warnings;
use Data::Dumper;
$_ = '5474c2ef012a759a c11ab88ae8daa276 63693b53799c91f1 be1d8c8738733d80 ';
my #nums = /\G(?:([[:xdigit:]]{8})([[:xdigit:]]{8})\s)/g;
if (#nums >= 8) {
print Dumper(\#nums);
}
(may behave differently than the original if there are more than four or if there're earlier 16-hex-digit sequences separated by more than just a space).
How about:
my $pat = '([[:xdigit:]]{8})\s?' x 8;
# produces: ([[:xdigit:]]{8})\s?([[:xdigit:]]{8})\s?....
/$pat/;
Update if you need to be strict on the spacing requirement:
my $pat = join('\s', map{'([[:xdigit:]]{8})' x 2} (1..4));
# produces: ([[:xdigit:]]{8})([[:xdigit:]]{8})\s....
/$pat/;
use strict;
use warnings;
use Data::Dumper;
$_ = '5474c2ef012a759a c11ab88ae8daa276 63693b53799c91f1 be1d8c8738733d80 ';
if (/((?:[[:xdigit:]]{16}\s){4})/) {
my #nums = map { /(.{8})(.{8})/ } split /\s/, $1;
print Dumper(\#nums);
}
__END__
$VAR1 = [
'5474c2ef',
'012a759a',
'c11ab88a',
'e8daa276',
'63693b53',
'799c91f1',
'be1d8c87',
'38733d80'
];
Yes, there is, but you don’t want to.
You just want to do this:
while ( /(\p{ahex}{8})/g ) { print "got $1\n" }

Set a variable in another package

I'd like to set a variable with a chosen name in another package. How can I do this easily?
Something like:
$variable_name = 'x';
$package::$variable_name = '0';
# now $package::x should be == '0'
You can do that, but you would have to disable strictures like so:
package Test;
package main;
use strict;
my $var_name = 'test';
my $package = 'Test';
no strict 'refs';
${"${package}::$var_name"} = 1;
print $Test::test;
So I'd not recommend that. Better to use a hash.
use 5.010;
use strict;
use warnings;
{
no warnings 'once';
$A::B::C::D = 5; # a test subject
}
my $pkg = 'A::B::C';
my $var = 'D';
# tearing down the walls (no warranty for you):
say eval '$'.$pkg."::$var"; # 5
# tearing down the walls but at least feeling bad about it:
say ${eval '\$'.$pkg."::$var" or die $#}; # 5
# entering your house with a key (but still carrying a bomb):
say ${eval "package $pkg; *$var" or die $#}; # 5
# using `Symbol`:
use Symbol 'qualify_to_ref';
say $${ qualify_to_ref $pkg.'::'.$var }; # 5
# letting us know you plan mild shenanigans
# of all of the methods here, this one is best
{
no strict 'refs';
say ${$pkg.'::'.$var}; # 5
}
and if the following make sense to you, party on:
# with a recursive function:
sub lookup {
#_ == 2 or unshift #_, \%::;
my ($head, $tail) = $_[1] =~ /^([^:]+:*)(.*)$/;
length $tail
? lookup($_[0]{$head}, $tail)
: $_[0]{$head}
}
say ${ lookup $pkg.'::'.$var }; # 5
# as a reduction of the symbol table:
use List::Util 'reduce';
our ($a, $b);
say ${+ reduce {$$a{$b}} \%::, split /(?<=::)/ => $pkg.'::'.$var }; # 5
And of course you can assign to any of these methods instead of saying them.
Given that $variable_name was validated, you could do:
eval "\$package::$variable_name = '0'";

perl: iterate over a typeglob

Given a typeglob, how can I find which types are actually defined?
In my application, we user PERL as a simple configuration format.
I'd like to require() the user config file, then be able to see which variables are defined, as well as what types they are.
Code: (questionable quality advisory)
#!/usr/bin/env perl
use strict;
use warnings;
my %before = %main::;
require "/path/to/my.config";
my %after = %main::;
foreach my $key (sort keys %after) {
next if exists $before{$symbol};
local *myglob = $after{$symbol};
#the SCALAR glob is always defined, so we check the value instead
if ( defined ${ *myglob{SCALAR} } ) {
my $val = ${ *myglob{SCALAR} };
print "\$$symbol = '".$val."'\n" ;
}
if ( defined *myglob{ARRAY} ) {
my #val = #{ *myglob{ARRAY} };
print "\#$symbol = ( '". join("', '", #val) . "' )\n" ;
}
if ( defined *myglob{HASH} ) {
my %val = %{ *myglob{HASH} };
print "\%$symbol = ( ";
while( my ($key, $val) = each %val ) {
print "$key=>'$val', ";
}
print ")\n" ;
}
}
my.config:
#A = ( a, b, c );
%B = ( b=>'bee' );
$C = 'see';
output:
#A = ( 'a', 'b', 'c' )
%B = ( b=>'bee', )
$C = 'see'
$_<my.config = 'my.config'
In the fully general case, you can't do what you want thanks to the following excerpt from perlref:
*foo{THING} returns undef if that particular THING hasn't been used yet, except in the case of scalars. *foo{SCALAR} returns a reference to an anonymous scalar if $foo hasn't been used yet. This might change in a future release.
But if you're willing to accept the restriction that any scalar must have a defined value to be detected, then you might use code such as
#! /usr/bin/perl
use strict;
use warnings;
open my $fh, "<", \$_; # get DynaLoader out of the way
my %before = %main::;
require "my.config";
my %after = %main::;
foreach my $name (sort keys %after) {
unless (exists $before{$name}) {
no strict 'refs';
my $glob = $after{$name};
print "\$$name\n" if defined ${ *{$glob}{SCALAR} };
print "\#$name\n" if defined *{$glob}{ARRAY};
print "%$name\n" if defined *{$glob}{HASH};
print "&$name\n" if defined *{$glob}{CODE};
print "$name (format)\n" if defined *{$glob}{FORMAT};
print "$name (filehandle)\n" if defined *{$glob}{IO};
}
}
will get you there.
With my.config of
$JACKPOT = 3_756_788;
$YOU_CANT_SEE_ME = undef;
#OPTIONS = qw/ apple cherries bar orange lemon /;
%CREDITS = (1 => 1, 5 => 6, 10 => 15);
sub is_jackpot {
local $" = ""; # " fix Stack Overflow highlighting
"#_[0,1,2]" eq "barbarbar";
}
open FH, "<", \$JACKPOT;
format WinMessage =
You win!
.
the output is
%CREDITS
FH (filehandle)
$JACKPOT
#OPTIONS
WinMessage (format)
&is_jackpot
Printing the names takes a little work, but we can use the Data::Dumper module to take part of the burden. The front matter is similar:
#! /usr/bin/perl
use warnings;
use strict;
use Data::Dumper;
sub _dump {
my($ref) = #_;
local $Data::Dumper::Indent = 0;
local $Data::Dumper::Terse = 1;
scalar Dumper $ref;
}
open my $fh, "<", \$_; # get DynaLoader out of the way
my %before = %main::;
require "my.config";
my %after = %main::;
We need to dump the various slots slightly differently and in each case remove the trappings of references:
my %dump = (
SCALAR => sub {
my($ref,$name) = #_;
return unless defined $$ref;
"\$$name = " . substr _dump($ref), 1;
},
ARRAY => sub {
my($ref,$name) = #_;
return unless defined $ref;
for ("\#$name = " . _dump $ref) {
s/= \[/= (/;
s/\]$/)/;
return $_;
}
},
HASH => sub {
my($ref,$name) = #_;
return unless defined $ref;
for ("%$name = " . _dump $ref) {
s/= \{/= (/;
s/\}$/)/;
return $_;
}
},
);
Finally, we loop over the set-difference between %before and %after:
foreach my $name (sort keys %after) {
unless (exists $before{$name}) {
no strict 'refs';
my $glob = $after{$name};
foreach my $slot (keys %dump) {
my $var = $dump{$slot}(*{$glob}{$slot},$name);
print $var, "\n" if defined $var;
}
}
}
Using the my.config from your question, the output is
$ ./prog.pl
#A = ('a','b','c')
%B = ('b' => 'bee')
$C = 'see'
Working code using a CPAN module that gets some of the hair out of the way, Package::Stash. As noted in my comment to gbacon's answer, this is blind to the config file doing $someval = undef but that seems to be unavoidable, and at least the other cases are caught. It also limits itself to the SCALAR, ARRAY, HASH, CODE, and IO types -- getting GLOB and FORMAT is possible but it makes the code less pretty and also creates noise in the output :)
#!perl
use strict;
use warnings;
use Package::Stash;
sub all_vars_in {
my ($package) = #_;
my #ret;
my $stash = Package::Stash->new($package);
for my $sym ($stash->list_all_package_symbols) {
for my $sigil (qw($ # % &), '') {
my $fullsym = "$sigil$sym";
push #ret, $fullsym if $stash->has_package_symbol($fullsym);
}
}
#ret;
}
my %before;
$before{$_} ++ for all_vars_in('main');
require "my.config";
for my $var (all_vars_in('main')) {
print "$var\n" unless exists $before{$var};
}
Beginning in 5.010, you can distinguish whether a SCALAR exists using the B introspection module; see Detecting declared package variables in perl
Update: example copied from that answer:
# package main;
our $f;
sub f {}
sub g {}
use B;
use 5.010;
if ( ${ B::svref_2object(\*f)->SV } ) {
say "f: Thar be a scalar tharrr!";
}
if ( ${ B::svref_2object(\*g)->SV } ) {
say "g: Thar be a scalar tharrr!";
}
1;
UPDATE:
gbacon is right. *glob{SCALAR} is defined.
Here is the output I get using your code:
Name "main::glob" used only once:
possible typo at
test_glob_foo_thing.pl line 13.
'FOO1' (SCALAR)
'FOO1' (GLOB)
'FOO2' (SCALAR)
'FOO2' (GLOB)
'_<my.config' (SCALAR)
'_<my.config' (GLOB)
This is despite FOO2 being defined as a hash, but not as a scalar.
ORIGINAL ANSWER:
If I understand you correctly, you simply need to use the defined built-in.
#!/usr/bin/env perl
use strict;
use warnings;
my %before = %main::;
require "/path/to/my.config";
my %after = %main::;
foreach my $key (sort keys %after) {
if (not exists $before{$key}) {
if(defined($after{$key}){
my $val = $after{$key};
my $what = ref($val);
print "'$key' ($what)\n";
}
}
}
I hate to ask, but instead of messing around with typeglobs, why not switch to a real configuration format? e.g. check out Config::Simple and YAML.
I wouldn't recommend messing around with typeglobs and symbol tables in normal cases (some CPAN modules do that, but only at the bottom levels of large systems - e.g. Moose in the lowest levels of Class::MOP). Perl gives you a lot of rope to work with, but that rope is also quite happy to self-noosify and self-tie-around-your-neck if you're not careful :)
See also: How do you manage configuration files in Perl?
no strict 'refs';
my $func_name = 'myfunc';
*{$func_name}{CODE}()
use strict 'refs';
If you don't mind parsing Data::Dump output, you could use it to tease out the differences.
use strict;
use warnings;
use Data::Dump qw{ dump };
my %before = %main::;
require "my.config";
my %after = %main::;
foreach my $key ( sort keys %after ) {
if ( not exists $before{$key} ) {
my $glob = $after{$key};
print "'$key' " . dump( $glob) . "\n";
}
}
Using this code with the following config file:
$FOO1 = 3;
$FOO2 = 'my_scalar';
%FOO2 = ( a=>'b', c=>'d' );
#FOO3 = ( 1 .. 5);
$FOO4 = [ 1 .. 5 ];
I believe that this output provides enough information to be able to figure out which parts of each type glob are defined:
'FOO1' do {
my $a = *main::FOO1;
$a = \3;
$a;
}
'FOO2' do {
my $a = *main::FOO2;
$a = \"my_scalar";
$a = { a => "b", c => "d" };
$a;
}
'FOO3' do {
my $a = *main::FOO3;
$a = [1 .. 5];
$a;
}
'FOO4' do {
my $a = *main::FOO4;
$a = \[1 .. 5];
$a;
}
'_<my.config' do {
my $a = *main::_<my.config;
$a = \"my.config";
$a;
}