Perl subroutine doesn't return value - perl

Sorry to disturb you with such a silly question, I'm new at Perl.
I'm trying to modify parsing subroutine, written by my colleague and have problems with functions in perl.
It returns empty value, I don't understand why? Have already read reference sites, seen examples and they are obvious. Here's the code of function:
sub parseHTML
{
my ($node, $depth) = #_;
my $str = ' ';
if (ref $node)
{
if ($node->tag () ne "script" && $node->tag () ne "style")
{
my #children = $node->content_list ();
for my $child_node (#children)
{
parseHTML ($child_node, $depth + 1);
}
}
}
else
{
$str = $str.$node."\n";
#print $str;
}
return $str;
}
And then I try to use it:
my $parser = HTML::TreeBuilder->new ();
$parser->parse ($cont);
my $Parsed = parseHTML ($parser, 0);
print "$Parsed\n";
#parseHTML ($parser, 0);
The return value is empty. However, if I decide to print data right in function, uncomment string:print $str; and use parseHTML ($parser, 0); instead, it works, and there's an output.
Where could be the mistake? Data in function seems to be local.
Here's the complete code listing as well.

You have to concat the $str returning from parseHTML
$str .= parseHTML ($child_node, $depth + 1);
or you can use a pointer this way:
...
my $Parsed;
parseHTML ($parser, 0,\$Parsed);
....
sub parseHTML
{
my ($node, $depth, $out) = #_;
my $str = ' ';
if (ref $node)
{
if ($node->tag() ne "script" && $node->tag() ne "style")
{
my #children = $node->content_list ();
for my $child_node (#children)
{
parseHTML ($child_node, $depth + 1,$out);
}
}
}
else
{
$$out .= $node."\n";
}
}

You forgot to add to $str in the "then" part of the if.
parseHTML ($child_node, $depth + 1);
should be
$str .= parseHTML ($child_node, $depth + 1);

Related

Tail call Recursion "Optimising"

I have a weird problem I can't figure out. I created a simple sequence in Perl with anonymous functions.
sub{($data, sub{($data, sub{($data, sub{($data, empty)})})})};
And it works but I tired to implement tail optimizing and got some weird behaviour. Example. The iter function below works.
sub iter {
my ($func, $seq) = #_;
my ($data, $next) = $seq->();
if (defined $data) {
$func->($data);
#_ = ($func, $next);#This #_ update works fine
goto &iter;
}
}
while this implementation of iter fails.
sub iter {
my ($func, $seq) = #_;
my ($data, $next) = $seq->();
if (defined $data) {
$func->($data);
$_[1] = $next; #This #_ update fails
goto &iter;
}
}
Both updates of #_ yield the same values for #_ but the code behaves differently when it continues.. To see what I'm talking about try running the complete code below.
#! /usr/bin/env perl
package Seq;
use 5.006;
use strict;
use warnings;
sub empty {
sub{undef};
}
sub add {
my ($data, $seq) = #_;
sub{($data, $seq)};
}
sub iter {
my ($func, $seq) = #_;
my ($data, $next) = $seq->();
if (defined $data) {
$func->($data);
#_ = ($func, $next);#This works fine
#$_[1] = $next; #This fails
goto &iter;
}
}
sub smap {
my ($func, $seq) = #_;
my ($data, $next) = $seq->();
if (defined $data) {
sub{($func->($data), Seq::smap($func, $next))};
}else {
empty();
}
}
sub fold {
my ($func, $acc, $seq) = #_;
my ($data, $next) = $seq->();
if (defined $data) {
#_ = ($func, $func->($acc, $data), $next);
goto &Seq::fold;
}else {
$acc;
}
}
1;
package main;
use warnings;
use strict;
use utf8;
use List::Util qw(reduce);
my $seq =
reduce
{Seq::add($b, $a)}
Seq::empty,
(4143, 1234, 4321, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
Seq::iter(sub{my ($data) = #_; STDOUT->print("$data\n")}, $seq);
my $seq2 = Seq::smap(sub{my ($data) = #_; $data * 2}, $seq);
STDOUT->print("\n\n");
Seq::iter(sub{my ($data) = #_; STDOUT->print("$data\n")}, $seq2);
STDOUT->print("\n\n");
my $ans = Seq::fold(sub{my ($acc, $data) = #_; $acc + $data}, 0, $seq);
my $ans2 = Seq::fold(sub{my ($acc, $data) = #_; $acc + $data}, 0, $seq2);
STDOUT->print("$ans\n");
STDOUT->print("$ans2\n");
exit (0);
The code should work for both examples of iter but it doesn't.. Any pointers why?
Writing to $_[1] writes to the second scalar passed to the sub.
$ perl -E'$x = "abc"; say $x; sub { $_[0] = "def"; say $_[0]; }->($x); say $x;'
abc
def
def
So you are clobbering the caller's variables. Assigning to #_ replaces the scalars it contains rather than writing to them.
$ perl -E'$x = "abc"; say $x; sub { #_ = "def"; say $_[0]; }->($x); say $x;'
abc
def
abc
You can replace a specific element using splice.
$ perl -E'$x = "abc"; say $x; sub { splice(#_, 0, 1, "def"); say $_[0]; }->($x); say $x;'
abc
def
abc
It's far more convenient for iterators to return an empty list when they are exhausted. For starters, it allows them to return undef.
Furthermore, I'd remove the expensive recursive calls with quicker loops. These loops can be made particularly simple because of the change mentioned above.
The module becomes:
package Seq;
use strict;
use warnings;
sub empty { sub { } }
sub add {
my ($data, $seq) = #_;
return sub { $data, $seq };
}
sub iter {
my ($func, $seq) = #_;
while ( (my $data, $seq) = $seq->() ) {
$func->($data);
}
}
sub smap {
my ($func, $seq) = #_;
if ( (my $data, $seq) = $seq->() ) {
return sub { $func->($data), smap($func, $seq) };
} else {
return sub { };
}
}
sub fold {
my ($func, $acc, $seq) = #_;
while ( (my $data, $seq) = $seq->() ) {
$acc = $func->($acc, $data);
}
return $acc;
}
1;
Also, for speed reasons, replace
sub { my ($data) = #_; $data * 2 }
sub { my ($acc, $data) = #_; $acc + $data }
with
sub { $_[0] * 2 }
sub { $_[0] + $_[1] }

Creating a hash using a function

I am trying to create a hash using a function in perl. Actually i was working on creating a binary search tree in perl. Below is the code :
sub newhash {
$data = shift;
$left = undef;
$right = undef;
%node = ("data"=>$data,"left"=>$left,"right"=>$right);
return (\%node);
}
$firstele = newhash(2);
foreach ( keys %$firstele )
{
print "$_:$firstele->{$_}\n";
}
$node = newhash(1);
foreach ( keys %$node )
{
print "$_:$node->{$_} \n";
}
foreach ( keys %$firstele )
{
print "$_:$firstele->{$_}\n";
}
The trouble is that when i am printing the original hash, the data key gets replaced by whatever i am passing to the newhash function . The output:
left:
right:
data:2
left:
right:
data:1
left:
right:
data:1
Any ideas why is the data key getting replaced?
use strict; would tell you about a bunch of undeclared variables; lexicalize them with my and it should solve your problem. As it stands, there's only one %node and you overwrite it with every call to newhash.
use strict;
sub newhash {
my $data = shift;
my $left;
my $right;
my %node = ( # <-- a brand new %node every time
data => $data,
left => $left,
right => $right,
);
return (\%node); # new %node, new reference
}
my $firstele = newhash(2);
print "firstele data: $firstele->{data}\n";
my $node = newhash(1);
print "node data: $node->{data}\n";
print "firstele data: $firstele->{data}\n";
Here is the code for adding elements in BT structure.
use strict;
use List::Util qw(first);
my (#input,$data);
print "Enter the data for being in a BST structure: ";
$data=<>;
chomp($data);
my $root=$data;
push(#input,$data);
while($data =~ m/\b-?\d{1,3}\b/){
my $idx=first { $input[$_] == $root } 0..$#input;
if($data<$root) {
for(my $i=0;$i<=$idx;$i++) {
next if($data>$input[$i]) ;
if($data<$input[$i]) {
splice(#input,$i,0,$data);
}
last;
}
}
if($data>$root) {
for(my $i=$idx;$i<=$#input;$i++) {
if($data>$input[$i]) {
if(($i+1==scalar(#input)) or ($data<$input[$i+1] && $i+1 !=
scalar(#input))) {
splice(#input,$i+1,0,$data);
last;
}
else {
next;
}
}
last;
}
}
print "Enter the number for being in a BT structure: ";
$data=<>;
chomp($data);
}
print "Final BT Array:\n",join(',', #input),"\n";

Recursively remove adjacent duplicate characters

I am having a string say
$str = "hhiiishs aappllee eerrffdd"
I want to remove adjacent duplicate characters recursively from a string. I dont know how to write recursion. I have written a code that is not recursive but working if we pass string by string
use strict;
use warnings;
my $str = "AABBCCDEEFDDS asdwdwws ffoorr";
sub remove {
my $var1 = "";
my $str = $_[0];
my #arr = split (//, $str);
my $f = "";
foreach (0..$#arr) {
if ( $arr[$_] eq $var1) {
next;
#substr ( $str, $_)
} else {
$var1 = $arr[$_];
$f = "$f"."$arr[$_]";
}
}
$f = "$f"." ";
return $f;
}
Please guide me how to write recursive in Perl.
You can try,
$str =~ s/(.)\1+/$1/g;
gives
hishs aple erfd
Using recursion probably isn't the best choice for this, but here is a recursive function below.
#!/usr/bin/perl
use strict;
use warnings;
my $foo = "aabbccddeeffgg hhiijjkkllmmnnoo pp";
print reDup($foo), "\n";
sub reDup {
my #string = split ('', shift); #split string into array of characters
my $val;
for my $i( 0..$#string){
if(defined($val) && $string[$i] eq $val){
#string[$i..$#string] = #string[($i+1)..$#string]; #if last char checked = current char, shift the array to the left.
pop #string; #Above leaves unwanted element at the end, so pop it off
my $str = join('', #string);
return reDup($str); #do it all again
}
$val = $string[$i];
}
return join('', #string); #when the for loops if statement is never executed, it must contain no duplicates.
}
sub _remove_adjacent {
my $out = shift;
if (#_ == 0) {
return $out;
}
elsif (#_ == 1) {
return $out.$_[0];
}
elsif ($_[0] eq $_[1]) {
shift;
return _remove_adjacent($out.shift(#_), #_);
} else {
return _remove_adjacent($out.shift(#_), #_);
}
}
sub remove_adjacent {
my ($in) = #_;
return _remove_adjacent('', split(//, $in));
}
Of course, that's purely tail-recursive, so it can be inlined into a loop.
sub remove_adjacent {
my ($in) = #_;
my #in = split(//, $in);
my $out = '';
while (1) {
if (#in == 0) {
last;
}
elsif (#in == 1) {
$out .= $in[0];
last;
}
elsif ($in[0] eq $in[1]) {
shift(#in);
$out .= shift(#in);
} else {
$out .= shift(#in);
}
}
return $out;
}
This can be cleaned up further, but it shows that recursion would be a pure waste here.

Find unused "use'd" Perl modules

I am working on a very large, very old "historically grown" codebase. In the past, there were often people thinking "Oh, I may need this and that module, so I just include it...", and later, people often "cached" Data inside of modules ("use ThisAndThat" needing a few seconds to load some hundred MB from DB to RAM, yeah, its really a stupid Idea, we are working on that too) and so, often, we have a small module use'ing like 20 or 30 modules, from who 90% are totally unused in the source itself, and, because of "caching" in several use'd submodules, modules tend to take up one minute to load or even more, which is, of course, not acceptable.
So, Im trying to get that done better. Right now, my way is looking through all the modules, understanding them as much as possible and I look at all the modules including them and see whether they are needed or not.
Is there any easier way? I mean: There are functions returning all subs a module has like
...
return grep { defined &{"$module\::$_"} } keys %{"$module\::"}
, so, aint there any simple way to see which ones are exported by default and which ones come from where and are used in the other modules?
A simple example is Data::Dumper, which is included in nearly every file, even, when all debug-warns and prints and so on arent in the script anymore. But still the module has to load Data::Dumper.
Is there any simple way to check that?
Thanks!
The following code could be part of your solution - it will show you which symbols are imported for each instance of use:
package traceuse;
use strict;
use warnings;
use Devel::Symdump;
sub import {
my $class = shift;
my $module = shift;
my $caller = caller();
my $before = Devel::Symdump->new($caller);
my $args = \#_;
# more robust way of emulating use?
eval "package $caller; require $module; $module\->import(\#\$args)";
my $after = Devel::Symdump->new($caller);
my #added;
my #after_subs = $after->functions;
my %before_subs = map { ($_,1) } $before->functions;
for my $k (#after_subs) {
push(#added, $k) unless $before_subs{$k};
}
if (#added) {
warn "using module $module added: ".join(' ', #added)."\n";
} else {
warn "no new symbols from using module $module\n";
}
}
1;
Then just replace "use module ..." with "use traceuse module ...", and you'll get a list of the functions that were imported.
Usage example:
package main;
sub foo { print "debug: foo called with: ".Dumper(\#_)."\n"; }
use traceuse Data::Dumper;
This will output:
using module Data::Dumper added: main::Dumper
i.e. you can tell which functions were imported in robust way. And you can easily extend this to report on imported scalar, array and hash variables - check the docs on Devel::Symdump.
Determine which functions are actually used is the other half of the equation. For that you might be able to get away with a simple grep of your source code - i.e. does Dumper appear in the module's source code that's not on a use line. It depends on what you know about your source code.
Notes:
there may be a module which does what traceuse does - I haven't checked
there might be a better way to emulate "use" from another package
I kind of got of got it to work with PPI. It looks like this:
#!/usr/local/bin/perl
use strict;
use warnings;
use Data::Dumper;
use Term::ANSIColor;
use PPI;
use PPI::Dumper;
my %doneAlready = ();
$" = ", ";
our $maxDepth = 2;
my $showStuffOtherThanUsedOrNot = 0;
parse("/modules/Test.pm", undef, undef, 0);
sub parse {
my $file = shift;
my $indent = shift || 0;
my $caller = shift || $file;
my $depth = shift || 0;
if($depth && $depth >= $maxDepth) {
return;
}
return unless -e $file;
if(exists($doneAlready{$file}) == 1) {
return;
}
$doneAlready{$file} = 1;
my $skript = PPI::Document->new($file);
my #included = ();
eval {
foreach my $x (#{$skript->find("PPI::Statement::Include")}) {
foreach my $y (#{$x->{children}}) {
push #included, $y->{content} if (ref $y eq "PPI::Token::Word" && $y->{content} !~ /^(use|vars|constant|strict|warnings|base|Carp|no)$/);
}
}
};
my %double = ();
print "===== $file".($file ne $caller ? " (Aufgerufen von $caller)" : "")."\n" if $showStuffOtherThanUsedOrNot;
if($showStuffOtherThanUsedOrNot) {
foreach my $modul (#included) {
next unless -e createFileName($modul);
my $is_crap = ((exists($double{$modul})) ? 1 : 0);
print "\t" x $indent;
print color("blink red") if($is_crap);
print $modul;
print color("reset") if($is_crap);
print "\n";
$double{$modul} = 1;
}
}
foreach my $modul (#included) {
next unless -e createFileName($modul);
my $anyUsed = 0;
my $modulDoc = parse(createFileName($modul), $indent + 1, $file, $depth + 1);
if($modulDoc) {
my #exported = getExported($modulDoc);
print "Exported: \n" if(scalar #exported && $showStuffOtherThanUsedOrNot);
foreach (#exported) {
print(("\t" x $indent)."\t");
if(callerUsesIt($_, $file)) {
$anyUsed = 1;
print color("green"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot;
} else {
print color("red"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot;
}
print "\n" if $showStuffOtherThanUsedOrNot;
}
print(("\t" x $indent)."\t") if $showStuffOtherThanUsedOrNot;
print "Subs: " if $showStuffOtherThanUsedOrNot;
foreach my $s (findAllSubs($modulDoc)) {
my $isExported = grep($s eq $_, #exported) ? 1 : 0;
my $rot = callerUsesIt($s, $caller, $modul, $isExported) ? 0 : 1;
$anyUsed = 1 unless $rot;
if($showStuffOtherThanUsedOrNot) {
print color("red") if $rot;
print color("green") if !$rot;
print "$s, ";
print color("reset");
}
}
print "\n" if $showStuffOtherThanUsedOrNot;
print color("red"), "=========== $modul wahrscheinlich nicht in Benutzung!!!\n", color("reset") unless $anyUsed;
print color("green"), "=========== $modul in Benutzung!!!\n", color("reset") if $anyUsed;
}
}
return $skript;
}
sub createFileName {
my $file = shift;
$file =~ s#::#/#g;
$file .= ".pm";
$file = "/modules/$file";
return $file;
}
sub getExported {
my $doc = shift;
my #exported = ();
eval {
foreach my $x (#{$doc->find("PPI::Statement")}) {
my $worthATry = 0;
my $isMatch = 0;
foreach my $y (#{$x->{children}}) {
$worthATry = 1 if(ref $y eq "PPI::Token::Symbol");
if($y eq '#EXPORT') {
$isMatch = 1;
} elsif($isMatch && ref($y) ne "PPI::Token::Whitespace" && ref($y) ne "PPI::Token::Operator" && $y->{content} ne ";") {
push #exported, $y->{content};
}
}
}
};
my #realExported = ();
foreach (#exported) {
eval "\#realExported = $_";
}
return #realExported;
}
sub callerUsesIt {
my $subname = shift;
my $caller = shift;
my $namespace = shift || undef;
my $isExported = shift || 0;
$caller = `cat $caller`;
unless($namespace) {
return 1 if($caller =~ /\b$subname\b/);
} else {
$namespace = createPackageName($namespace);
my $regex = qr#$namespace(?:::|->)$subname#;
if($caller =~ $regex) {
return 1;
}
}
return 0;
}
sub findAllSubs {
my $doc = shift;
my #subs = ();
eval {
foreach my $x (#{$doc->find("PPI::Statement::Sub")}) {
my $foundName = 0;
foreach my $y (#{$x->{children}}) {
no warnings;
if($y->{content} ne "sub" && ref($y) eq "PPI::Token::Word") {
push #subs, $y;
}
use warnings;
}
}
};
return #subs;
}
sub createPackageName {
my $name = shift;
$name =~ s#/modules/##g;
$name =~ s/\.pm$//g;
$name =~ s/\//::/g;
return $name;
}
Its really ugly and maybe not 100% working, but it seems, with the tests that Ive done now, that its good for a beginning.

removing deplicate entries based on first column and keeping the latest entry, using Perl

i need some perl code for the following problem. thanks in advance for your efforts.
my input is in a file in this format: 'name' 'version number'
tech-sgla-zustand-ts.ini 1.1
tech-sgla-zustand-ts-feld.ini 1.1
tech-sgla-stamm-cds-feld.ini 1.1
tech-sgla-zustand-ts-feld.ini 1.2
tech-sgla-zustand-ts-feld.ini 1.4
tech-sgla-zustand-ts-feld.ini 1.3
i need it in the format (without blank lines in between):
the 'name' should be unique with maximum 'version number'
tech-sgla-zustand-ts.ini 1.1
tech-sgla-zustand-ts-feld.ini 1.4
tech-sgla-stamm-cds-feld.ini 1.1
You could use :
my %iniFiles = ();
while (<>) {
my ($ini, $vers) = split / +/, $_;
if (exists $iniFiles{$ini}) {
$iniFiles{$ini} = $vers if ($iniFiles{$ini} < $vers);
} else { $iniFiles{$ini} = $vers }
}
while (my ($k,$v) = each %iniFiles) { print "$k $v\n" }
Or if the input order is important :
my #inis = ();
my %iniFiles = ();
while (<>) {
my ($ini, $vers) = split / +/, $_;
if (exists $iniFiles{$ini}) {
$iniFiles{$ini} = $vers if ($iniFiles{$ini} < $vers);
} else { push #inis, $ini; $iniFiles{$ini} = $vers }
}
foreach (#inis) { print "$_ $iniFiles{$_}\n" }
If the output order doesn't matter you can use this one-liner:
perl -ane '$h{$F[0]} = $F[1] if $F[1] > $h{$F[0]};
END { print "$_ $h{$_}\n" for keys %h }' file
Otherwise this script should do it:
my (%h, #a);
while (<>) {
my ($name, $ver) = split;
push #a, $name unless exists $h{$name};
$h{$name} = $ver if $ver > $h{$name} ;
}
print "$_ $h{$_}\n" for #a;
open(TOUT, "temp.txt");
while($line = <TOUT>){
my ($ini, $vers) = split / +/, $line;
print "ini $ini vers $vers";
if (exists $iniFiles{$ini}) {
$iniFiles{$ini} = $vers if ($iniFiles{$ini} < $vers);
}
else {
$iniFiles{$ini} = $vers;
}
}
print "\n";
while (my ($k,$v) = each %iniFiles) {
print "$k $v";
$ssldata = $k . " " . $v;
}
Thanks OMG_peanuts for ur responce, but i needed to modify it a little bit to get it working according to my requirement.
Thanks to eugene y as well for ur responce.