perl defensive programming (die, assert, croak) - perl

What is the best (or recommended) approach to do defensive programming in perl?
For example if I have a sub which must be called with a (defined) SCALAR, an ARRAYREF and an optional HASHREF.
Three of the approaches I have seen:
sub test1 {
die if !(#_ == 2 || #_ == 3);
my ($scalar, $arrayref, $hashref) = #_;
die if !defined($scalar) || ref($scalar);
die if ref($arrayref) ne 'ARRAY';
die if defined($hashref) && ref($hashref) ne 'HASH';
#do s.th with scalar, arrayref and hashref
}
sub test2 {
Carp::assert(#_ == 2 || #_ == 3) if DEBUG;
my ($scalar, $arrayref, $hashref) = #_;
if(DEBUG) {
Carp::assert defined($scalar) && !ref($scalar);
Carp::assert ref($arrayref) eq 'ARRAY';
Carp::assert !defined($hashref) || ref($hashref) eq 'HASH';
}
#do s.th with scalar, arrayref and hashref
}
sub test3 {
my ($scalar, $arrayref, $hashref) = #_;
(#_ == 2 || #_ == 3 && defined($scalar) && !ref($scalar) && ref($arrayref) eq 'ARRAY' && (!defined($hashref) || ref($hashref) eq 'HASH'))
or Carp::croak 'usage: test3(SCALAR, ARRAYREF, [HASHREF])';
#do s.th with scalar, arrayref and hashref
}

use Params::Validate qw(:all);
sub Yada {
my (...)=validate_pos(#_,{ type=>SCALAR },{ type=>ARRAYREF },{ type=>HASHREF,optional=>1 });
...
}

I wouldn't use any of them. Aside from not not accepting many array and hash references, the checks you used are almost always redundant.
>perl -we"use strict; sub { my ($x) = #_; my $y = $x->[0] }->( 'abc' )"
Can't use string ("abc") as an ARRAY ref nda"strict refs" in use at -e line 1.
>perl -we"use strict; sub { my ($x) = #_; my $y = $x->[0] }->( {} )"
Not an ARRAY reference at -e line 1.
The only advantage to checking is that you can use croak to show the caller in the error message.
Proper way to check if you have an reference to an array:
defined($x) && eval { #$x; 1 }
Proper way to check if you have an reference to a hash:
defined($x) && eval { %$x; 1 }

None of the options you show display any message to give a reason for the failure, which I think is paramount.
It is also preferable to use croak instead of die from within library subroutines, so that the error is reported from the point of view of the caller.
I would replace all occurrences of if ! with unless. The former is a C programmer's habit.
I suggest something like this
sub test1 {
croak "Incorrect number of parameters" unless #_ == 2 or #_ == 3;
my ($scalar, $arrayref, $hashref) = #_;
croak "Invalid first parameter" unless $scalar and not ref $scalar;
croak "Invalid second parameter" unless $arrayref eq 'ARRAY';
croak "Invalid third parameter" if defined $hashref and ref $hashref ne 'HASH';
# do s.th with scalar, arrayref and hashref
}

Related

Perl: undefined value as a HASH reference [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 2 months ago.
Improve this question
I have inherited an older script that uses hash references that I don't understand. It results in:
Can't use an undefined value as a HASH reference at
./make_quar_dbfile.pl line 65.
63 my $bucket = sprintf('%02x', $i);
64 my $file = sprintf('%s/%02x.db', $qdir, $i);
65 %{$hashes{$bucket}} ? 1 : next;
66 tie (my %hash, 'DB_File', $file, O_RDWR, 0600) || die "Can't open db file: $! \n ";
67 %hash = %{$hashes{$bucket}};
68 untie %hash;
The script reads through a number of gzipd emails to identify the sender/recip/subject/date etc., then writes that info to a DB_File hash.
This script used to work with older versions of Perl, but looks like it now is no longer compliant.
I'd really like to understand how this works, but I don't fully understand reference/dereference, why it's even necessary here, and the %{$var} notation. All of the references I've studied talk about hash references in terms of $hash_ref = \%author; not %hash_ref = %{$author}, for example.
Ideas on how to get this to work with hash references would be greatly appreciated.
#!/usr/bin/perl -w
use DB_File;
use File::Basename qw(basename);
use vars qw($verbose);
use strict;
use warnings;
sub DBG($);
$verbose = shift || 1;
my $qdir = '/var/spool/amavisd/qdb';
my $source_dir = '/var/spool/amavisd/quarantine';
my $uid = getpwnam('amavis');
my $gid = getgrnam('amavis');
my %hashes = ( );
my $me = basename($0);
my $version = '1.9';
my $steps = 100;
my $cnt = 0;
DBG("- Creating initial database files...");
for (my $i = 0; $i < 256; $i++) {
my $file = sprintf('%s/%02x.db', $qdir, $i);
unlink $file || DBG("Could not unlink $file to empty db: $! \n");
tie (my %hash, "DB_File", $file, O_CREAT, 0600) || die "Can't open db file: $! \n";
untie %hash;
chown($uid, $gid, $file) || die "Unable to set attributes on file: $! \n";
}
DBG("done\n");
opendir SOURCEDIR, $source_dir || die "Cannot open $source_dir: $! \n";
DBG("- Building hashes... ");
foreach my $f (sort readdir SOURCEDIR) {
next if ($f eq "." || $f eq "..");
if ($f =~ m/^(spam|virus)\-([^\-]+)\-([^\-]+)(\.gz)?/) {
my $type = $1;
my $key = $3;
my $bucket = substr($key, 0, 2);
my $d = $2;
my $subj = '';
my $to = '';
my $from = '';
my $size = '';
my $score = '0.0';
if (($cnt % $steps) == 0) { DBG(sprintf("\e[8D%-8d", $cnt)); } $cnt++;
if ($f =~ /\.gz$/ && open IN, "zcat $source_dir/$f |") {
while(<IN>) {
last if ($_ eq "\n");
$subj = $1 if (/^Subject:\s*(.*)$/);
$to = $1 if (/^To:\s*(.*)$/);
$from = $1 if (/^From:\s*(.*)$/);
$score = $1 if (/score=(\d{1,3}\.\d)/);
}
close IN;
$to =~ s/^.*\<(.*)\>.*$/$1/;
$from =~ s/^.*\<(.*)\>.*$/$1/;
$size = (stat("$source_dir/$f"))[7];
$hashes{$bucket}->{$f} = "$type\t$d\t$size\t$from\t$to\t$subj\t$score";
}
}
}
closedir SOURCEDIR;
DBG("...done\n\n- Populating database files...");
for (my $i = 0; $i < 256; $i++) {
my $bucket = sprintf('%02x', $i);
my $file = sprintf('%s/%02x.db', $qdir, $i);
%{$hashes{$bucket}} ? 1 : next;
tie (my %hash, 'DB_File', $file, O_RDWR, 0600) || die "Can't open db file: $! \n ";
%hash = %{$hashes{$bucket}};
untie %hash;
}
exit(0);
sub DBG($) { my $msg = shift; print $msg if ($verbose); }
What is $hash{$key}? A value associated with the (value of) $key, which must be a scalar. So we get the $value out of my %hash = ( $key => $value ).
That's a string, or a number. Or a filehandle. Or, an array reference, or a hash reference. (Or an object perhaps, normally a blessed hash reference.) They are all scalars, single-valued things, and as such are a legitimate value in a hash.
The syntax %{ ... } de-references a hash reference† so judged by %{ $hashes{$bucket} } that code expects there to be a hash reference. So the error says that there is actually nothing in %hashes for that value of a would-be key ($bucket), so it cannot "de-reference" it. There is either no key that is the value of $bucket at that point in the loop, or there is such a key but it has never been assigned anything.
So go debug it. Add printing statements through the loops so you can see what values are there and what they are, and which ones aren't even as they are assumed to be. Hard to tell what fails without running that program.
Then, the line %{$hashes{$bucket}} ? 1 : next; is a little silly. The condition of the ternary operator evaluates to a boolean, "true" (not undefined, not 0, not empty string '') or false. So it tests whether $hashes{$bucket} has a hashref with at least some keys, and if it does then it returns 1; so, the for loop continues. Otherwise it skips to the next iteration.
Well, then skip to next if there is not a (non-empty) hashref there:
next if not defined $hashes{$bucket} or not %{ $hashes{$bucket} };
Note how we first test whether there is such a key, and only then attempt to derefence it.
† Whatever expression may be inside the curlies must evaluate to a hash reference. (If it's else, like a number or a string, the code would still exit with an error but with a different one.)
So, in this code, the hash %hashes must have a key that is the value of $bucket at that point, and the value for that key must be a hash reference. Then, the ternary operator tests whether the hash obtained from that hash reference has any keys.
You need to understand references first, this is a kind of how-to :
#!/usr/bin/perl
use strict; use warnings;
use feature qw/say/;
use Data::Dumper;
my $var = {}; # I create a HASH ref explicitly
say "I created a HASH ref explicitly:";
say ref($var);
say "Now, let's add any type of content:";
say "Adding a ARRAY:";
push #{ $var->{arr} }, (0..5);
say Dumper $var;
say "Now, I add a new HASH";
$var->{new_hash} = {
foo => "value",
bar => "other"
};
say Dumper $var;
say 'To access the data in $var without Data::Dumper, we need to dereference what we want to retrieve';
say "to retrieve a HASH ref, we need to dereference with %:";
while (my ($key, $value) = each %{ $var->{new_hash} }) {
say "key=$key value=$value";
}
say "To retrieve the ARRAY ref:";
say join "\n", #{ $var->{arr} };
Output
I created a HASH ref explicitely:
HASH
Now, let's add any type of content:
Adding a ARRAY:
$VAR1 = {
'arr' => [
0,
1,
2,
3,
4,
5
]
};
Now, I add a new HASH
$VAR1 = {
'new_hash' => {
'foo' => 'value',
'bar' => 'other'
},
'arr' => [
0,
1,
2,
3,
4,
5
]
};
To access the data in $var without Data::Dumper, we need to dereference what we want to retrieve
to retrieve a HASH ref, we need to dereference with %:
key=foo value=value
key=bar value=other
To retrieve the ARRAY ref:
0
1
2
3
4
5
Now with your code, instead of
%{$hashes{$bucket}} ? 1 : next;
You should test the HASH ref first, because Perl say it's undefined, let's debug a bit:
use Data::Dumper;
print Dumper $hashes;
print "bucket=$bucket\n";
if (defined $hashes{$bucket}) {
print "Defined array\n";
}
else {
print "NOT defined array\n";
}

How to overload operator in non-class package?

In my situation I don't need warnings Use of uninitialized value in string while comparing string equality. So I tought that instead silencing all such warnings in the scope with no warnings 'uninitialized' would be better to overload eq-operator with my own subroutine, like:
use overload 'eq' => \&undefined_equal;
sub undefined_equal {
my ( $left, $right ) = #_;
no warnings 'uninitialized';
if ( $left eq $right ) {
return 1;
}
return 0;
}
Of course, overloading does not work, because according to the docs, overload is meant to use with classes, but I have plain procedural packages.
So I did try with overloading built-in functions, like:
package REeq;
use strict; use warnings; use 5.014;
BEGIN {
use Exporter ();
#REeq::ISA = qw( Exporter );
#REeq::EXPORT = qw( eq );
}
sub eq {
my ( $left, $right ) = #_;
no warnings 'uninitialized';
if ( $left CORE::eq $right ) {
return 1;
}
return 0;
}
1;
I can call my eq but can't use it as operator.
I need it because I want instead
if ( defined $some_scalar && $some_scalar eq 'literal string' ){
....
}
to use just
if ( $some_scalar eq 'literal string' ){
....
}
How could I achieve my goal?
Changing the behaviour of eq is possible, but it requires writing an XS modules that creates an op checker that replaces the code perl executes for the eq ops in scope. This is the approach used by no autovivification;, for example.
Seems you can - I haven't tried this but the perl monks have
sure, if you see it that way... you just have to bless your variable,
just like you did with your Number-package.
use overload ...;
my $n = bless {number => 23}, "main";
print $n >> 2;
i think that's not what you want, just wanted to make clear that it's
not a problem of the package name but that you must have a blessed
object.
Edit: taking zdim's onboard...
use strict;
use warnings;
use overload 'eq' => \&undefined_equal;
sub undefined_equal {
my ( $left, $right ) = #_;
no warnings 'uninitialized';
if ( ${$left} eq $right ) {
return 1;
}
return 0;
}
my $a = "abcde";
my $n = bless \$a, "main";
print "a eq undef -->";
print $a eq undef;
print "<--\nn eq undef -->";
print $n eq undef;
print "<--\n";
which gives
$ perl overload.pl
Use of uninitialized value in string eq at overload.pl line 20.
a eq undef --><--
n eq undef -->0<--
Don't forget the double $$ in the sub or you disappear into recursion. And the scalar reference for bless as you can only bless references, it seems
It still has a bless but hey

Perl line doesn't work when moved to separate function

I have an array that may or may not look like [0, 1] and I want to test for that.
This code (inside a method) works:
sub some_other_method {
my $self = shift;
...
if (scalar #myArray == 2 && #myArray[0] == 0 && #myArray[1] == 1) {
# this will successfully catch arrays that look like [0, 1]
}
}
If I move the contents of the if into a separate method and then call it, it doesn't work.
sub is_warning {
my $self = shift;
my #array = shift;
return scalar #array == 2 && #array[0] == 0 && #array[1] == 1;
}
...
sub some_other_method {
my $self = shift;
...
if ($self->is_warning(#myArray)) {
# this will not catch arrays that look like [0, 1]
}
}
If I add a print #array; to is_warning, it just prints a single number.
What am I doing wrong?
You've missed something crucial about Perl - a subroutine is only ever passed a list of scalar values in #_. So to pass an array, you need to use one of the techniques in subroutines stuff and other below.
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
sub stuff {
my ( $arg, #other_args ) = #_;
print Dumper \#other_args;
print "$_\n" for #other_args;
}
sub other {
my ( $arg, $array_ref ) = #_;
print Dumper $array_ref;
print "$_\n" for #$array_ref;
}
my $param = "fish";
my #array = ( "wiggle", "wobble", "boo" );
stuff( $param, #array );
other( $param, \#array );
In stuff the subroutine is handed a list of values to do with what it will. In the other, it's given two values - $param and a reference to #array.
The reason you're only getting 1 in your case, is that shift is only pulling a single value off #_. So any extra arguments are getting left behind. You'll be able to see this with;
print Dumper \#_;
Please remove the my $self = shift; line from is_warning function and test it again.
Please try below script:
#!/usr/bin/perl
use Data::Dumper;
sub is_warning {
my #array = #_;
print Dumper \#_;
return scalar #array == 2 && #array[0] == 0 && #array[1] == 1;
}
sub some_other_method {
my #myArray = (0,1);
if (is_warning(#myArray)) {
print "\nif inside some : #myArray\n";
}
}
some_other_method();

Use of logical operators AND OR when comparing conditionals

I am facing some doubts about the use (and return values I guess) of the logical operators &&, and, ||, or.
$number = 5;
$numberA = 5;
$numberB = 1;
$string = "x";
$stringA = "x";
$stringB = "y";
If two numbers are compared:
$x=5;
if ( $x == $number ) { print '$x == $number', "\n"; }
If two strings are compared:
$x="x";
if ( $x eq $string ) { print '$x eq $string', "\n"; }
But I'm not sure what is the best way to evaluate two numbers/strings to a number/string. Is this correct?
$x=5; $y=5;
if ( ($x && $y) == $number ) { print '($x && $y) == $number', "\n"; }
$x="x"; $y="x";
if ( ($x and $y) eq $string ) { print '($x and $y) eq $string', "\n"; }
And what is the rule when two logicals are evaluated in the same condition? Should the conditions itself be compared as numbers (&&,||) or strings (and,or)?
$x=5; $y=1;
if ( ($x == $numberA) && ($y == $numberB) ) { print '&& or and here?', "\n"; }
$x="x"; $y="y";
if ( ($x eq $stringA) and ($y eq $stringB) ) { print 'and or or here?', "\n"; }
( $foo && $bar ) == $baz
does not do what you think it does; it first evaluates the && operation, getting the value of $foo if $foo is true and otherwise getting the value of $bar, then compares that to $baz. You need to explicitly spell it out as $foo == $baz && $bar == $baz to test both.
If you have many values (preferably in an array, not a bunch of separate variables), grep can be useful:
if ( 2 == grep $_ == $baz, $foo, $bar ) {
List::MoreUtils provides a convenient all method, too:
use List::MoreUtils 'all';
if ( all { $_ == $baz } $foo, $bar ) {
and/or and &&/|| are not string or numeric operators; the alphabetic ones function exactly the same as the equivalent symbolic ones. The only difference is that they have different precedence; &&/|| have a higher precedence, such that they are useful within an expression; and/or have a lower precedence, such that they are useful for flow control between what are essentially different expressions. Some examples:
my $x = $y || 'default_value';
equivalent to:
my $x = ( $y || 'default_value' );
vs.
my #a = get_lines() or die "expected some lines!";
equivalent to:
( my #a = get_lines() ) or die "expected some lines!";

How can I check if all elements of an array are identical in Perl?

I have an array #test. What's the best way to check if each element of the array is the same string?
I know I can do it with a foreach loop but is there a better way to do this? I checked out the map function but I'm not sure if that's what I need.
If the string is known, you can use grep in scalar context:
if (#test == grep { $_ eq $string } #test) {
# all equal
}
Otherwise, use a hash:
my %string = map { $_, 1 } #test;
if (keys %string == 1) {
# all equal
}
or a shorter version:
if (keys %{{ map {$_, 1} #test }} == 1) {
# all equal
}
NOTE: The undefined value behaves like the empty string ("") when used as a string in Perl. Therefore, the checks will return true if the array contains only empty strings and undefs.
Here's a solution that takes this into account:
my $is_equal = 0;
my $string = $test[0]; # the first element
for my $i (0..$#test) {
last unless defined $string == defined $test[$i];
last if defined $test[$i] && $test[$i] ne $string;
$is_equal = 1 if $i == $#test;
}
Both methods in the accepted post give you the wrong answer if #test = (undef, ''). That is, they declare an undefined value to be equal to the empty string.
That might be acceptable. In addition, using grep goes through all elements of the array even if a mismatch is found early on and using the hash more than doubles the memory used by elements of array. Neither of these would be a problem if you have small arrays. And, grep is likely to be fast enough for reasonable list sizes.
However, here is an alternative that 1) returns false for (undef, '') and (undef, 0), 2) does not increase the memory footprint of your program and 3) short-circuits as soon as a mismatch is found:
#!/usr/bin/perl
use strict; use warnings;
# Returns true for an empty array as there exist
# no elements of an empty set that are different
# than each other (see
# http://en.wikipedia.org/wiki/Vacuous_truth)
sub all_the_same {
my ($ref) = #_;
return 1 unless #$ref;
my $cmpv = \ $ref->[-1];
for my $i (0 .. $#$ref - 1) {
my $this = \ $ref->[$i];
return unless defined $$cmpv == defined $$this;
return if defined $$this
and ( $$cmpv ne $$this );
}
return 1;
}
However, using List::MoreUtils::first_index is likely to be faster:
use List::MoreUtils qw( first_index );
sub all_the_same {
my ($ref) = #_;
my $first = \ $ref->[0];
return -1 == first_index {
(defined $$first != defined)
or (defined and $_ ne $$first)
} #$ref;
}
TIMTOWTDI, and I've been reading a lot of Mark Jason Dominus lately.
use strict;
use warnings;
sub all_the_same {
my $ref = shift;
return 1 unless #$ref;
my $cmp = $ref->[0];
my $equal = defined $cmp ?
sub { defined($_[0]) and $_[0] eq $cmp } :
sub { not defined $_[0] };
for my $v (#$ref){
return 0 unless $equal->($v);
}
return 1;
}
my #tests = (
[ qw(foo foo foo) ],
[ '', '', ''],
[ undef, undef, undef ],
[ qw(foo foo bar) ],
[ '', undef ],
[ undef, '' ]
);
for my $i (0 .. $#tests){
print "$i. ", all_the_same($tests[$i]) ? 'equal' : '', "\n";
}
You can check how many times the element in the array (#test) is repeated by counting it in a hash (%seen). You can check how many keys ($size) are present in the hash (%seen). If more than 1 key is present, you know that the elements in the array are not identical.
sub all_the_same {
my #test = #_;
my %seen;
foreach my $item (#test){
$seen{$item}++
}
my $size = keys %seen;
if ($size == 1){
return 1;
}
else{
return 0;
}
}
I think, we can use List::MoreUtils qw(uniq)
my #uniq_array = uniq #array;
my $array_length = #uniq_array;
$array_length == 1 ? return 1 : return 0;
I use List::Util::first for all similar purposes.
# try #0: $ok = !first { $_ ne $string } #test;
# try #1: $ok = !first { (defined $_ != defined $string) || !/\A\Q$string\E\z/ } #test;
# final solution
use List::Util 'first';
my $str = shift #test;
my $ok = !first { defined $$_ != defined $str || defined $str && $$_ ne $str } map \$_, #test;
I used map \$_, #test here to avoid problems with values that evaluate to false.
Note. As cjm noted fairly, using map defeats the advantage of first short-circuiting. So I tip my hat to Sinan with his first_index solution.