I'd written a Perl script to generate a table of contents from HTML pages which is working fine (and generating valid HTML) except for that the Perl output is removing closing tags for some elements like p. This is not validating against DocType of strict.
Please scroll down the post to see the Perl code.
What should I do to correct it?
#!/usr/bin/perl -w
#Copyright anurag gupta ; free to use under GNU GPL License
use strict;
use feature "switch";
use Common;
use HTML::Element;
use HTML::TreeBuilder;
#"F:/anurag/work/indiacustomercare/airtel/recharge.html";
my $filename="F:/tmp/t9.html";
my $index=0;
my $labelprefix="anu555ltg-";
my $tocIndex=100001;
my $toc;
my #stack;
my $prevHtag="h2";
sub hTagEncountered($)
{
my $hTag=shift;
my $currLevel=(split //, $hTag)[1];
given($hTag)
{
when(/h1/)
{
break;
}
default{
my $countCurr= (split /h/,$hTag)[1];
my $countPrev= (split /h/,$prevHtag)[1];
if($countCurr>$countPrev)
{
push #stack,($currLevel);
$toc.="<ul>";
}
elsif($countCurr<$countPrev)
{
# Now check in the stack
while ( #stack and $currLevel < $stack[$#stack])
{
pop #stack;
$toc.="</ul>";
}
}
}
}
$prevHtag=$hTag;
}
sub getLabel
{
my $name=$labelprefix.++$tocIndex;
}
sub traversehtml
{
my $node=$_[0];
# $node->dump();
# print "-----------------\n";
# print $node->tag()."\n";
# print ref($node),"->\n";
if((ref(\$node) ne "SCALAR" )and ($node->tag() =~m/^h[2-7]$/i)) #it's an H Element!
{
my #h = $node->content_list();
if(#h==1 and ref(\$h[0]) eq "SCALAR") #H1 contains simple string and nothing else
{
hTagEncountered($node->tag());
my $label=getLabel();
my $a = HTML::Element->new('a', name => $label);
my $text=$node->as_trimmed_text();
$a->push_content($text);
$node->delete_content();
$text=HTML::Entities::encode_entities($text);
$node->push_content($a);
$toc.=<<EOF;
<li>$text
EOF
}
elsif ( #h==1 and ($h[0]->tag() eq "a")) # <h1>ttt</h1> case
{
#See if any previous label already exists
my $prevlabel = $h[0]->attr("name");
$h[0]->attr("name",undef) if(defined($prevlabel) and $prevlabel=~m/$labelprefix/); #delete previous name tag if any
#set the new label
my $label=getLabel();
$h[0]->attr("name",$label);
hTagEncountered($node->tag());
my $text=HTML::Entities::encode_entities($node->as_trimmed_text());
$toc.=<<EOF;
<li>$text
EOF
}
elsif (#h>1) #<h1>some text herettt</h1> case
{
die "h1 must not contain any html elements";
}
}
my #h = $node->content_list();
foreach my $item (#h)
{
if(ref(\$item) ne "SCALAR") {traversehtml($item); } #skip scalar items
}
}
die "File $filename not found" if !-r $filename;
my $tree = HTML::TreeBuilder->new();
$tree->parse_file($filename);
my #h = $tree->content_list();
traversehtml($h[1]);
while(pop #stack)
{
$toc.="</ul>";
}
$toc="<ul>$toc</ul>";
print qq{<div id="icctoc"><h2>TOC</h2>$toc</div>};
my #list1=$tree->content_list();
my #list2=$list1[1]->content_list();
for(my $i=0;$i<#list2;++$i){
if(ref(\$list2[$i]) eq "SCALAR")
{
print $list2[$i]
}
else{
print $list2[$i]->as_HTML();
}
}
# Finally:
Try passing {} for the \%optional_end_tags argument to as_HTML. See the documentation for details.
Related
I've been trying to create a config file parser to parse Cisco IOS configs and such. The final objective would be to show relevant data in contexts based on filters in a configuration file. For example, with such a config file it would display all interfaces where we've found the line "access vlan" as a child of the "interface" context and only show lines containing "speed", "duplex" and "description".
{
'Context' => '^interface',
'Types' => [
'Switch',
],
'Condition' => 'access vlan',
'Filter' => [
'speed',
'duplex',
'description'
]
};
So far, so good. I read the "running-config" and I index the lines depth (given that a non-empty line , not beginning with a space (\s) has a depth of 0) in an array.
Then, in another read I use that index to read the data again, this time using relative position based on depth to create the "childs" of a context. Here's the function :
sub getDeep {
my #data = (#_);
my ($bighash,$hash);
#First read
foreach my $idx (0.. $#data) {
my ($spaces, $content) = ($data[$idx] =~ m/^(\s*)(.*)/);
my $depth = length $spaces;
$bighash->{node}{$idx}{depth} = $depth;
}
# Variables for the first read
my $ldepth = 0;
my $lcontext;
my $lid;
# Second read
foreach my $id (0 .. $#data) {
$data[$id] =~ s/^\s*//;
next if ($data[$id] =~ /^!/);
my $depth = $bighash->{node}{$id}{depth};
if ($depth eq 0) {
push (#{$hash->{global}} , $data[$id]);
$lcontext = $data[$id];
$lid = $id;
}
if (($depth gt 0) && ($id - $lid eq 1)) {
push (#{$hash->{$lcontext}}, (" " x $depth. $data[$id]));
$lid = $id;
}
}
return $hash;
}
Using this sub, I can return a hash, then based on the presence of an arrayref for a given key, apply filters as explained. This works pretty well, so far very proud of this piece of code.
Problem comes when I want to find childs of childs. In the example below, the childs of "given param2" would reprensent my next challenge.
interface XYZ
given param1 -> child of "interface XYZ"
given param2 -> child of "interface XYZ"
given param2.1 -> child of "given param2"
given param2.2 -> child of "given param2"
given param3 -> child of "interface XYZ"
So after thinking about this for a while and failing with different approaches, my question comes in 2 separate parts :
1) Is there a better way to do this that I'm not seeing ?
2) How could I keep tagging childs of childs as the lines dig deeper and identify them properly in a data structure ?
Thank you for reading up to this line :)
This thread contains the solution I was hoping for :) Since it might benefit to others, here's the link :
https://perlmonks.org/?node_id=1224600
Cheers !
I have written something to do exactly this. I can't figure out how to put it on metacpan. However, im sure there is better already on there if I knew where to look. It's on of the first i wrote in perl, so it's a bit of a mess. But basically you can type "gettree -l Node interface" and on a XR device it'll pull all of the config. "gettree -s Node Description_keyword" will pull all of a single interface config. you can also use it with STDIN e.g., "cat file | gettree -l interface".
Program
#!/usr/bin/perl
use lib '/PATH_TO_Gettree.pm_MODULE/';
use strict;
use warnings;
use Gettree;
use Getopt::Std;
my %opts;
getopts('eislnpm' , \%opts);
my $searchstr;
my $filename;
my $debug=0;
if($ARGV[0]=~m/help/ || $ARGV[0]=~m/^\?$/ )
{ die usage(); }
if($#ARGV<0||$#ARGV>1)
{
usage();
killapp("Please specifiy node and search term, use --help for the help menu");
}
elsif($#ARGV==0)
{
Gettree::setopts( \%opts , \$ARGV[0] );
while(<STDIN>)
{
Gettree::gettree_stream_passline( \$_ );
}
print Gettree::gettree_getreturnstring();
}
else
{
$filename= $ARGV[0];
$filename="/CONFIGS_DIR/".lc $filename if ! $opts{e};
print Gettree::gettree_file ( \%opts , \$filename , \$ARGV[1]) ; #\$filename , $searchstring
}
sub killapp
{
print $_[0]."\n";
exit;
}
sub usage
{
print "
Usage: gettree [OPTION]... [NODE] STRING
Search for PATTERN in each FILE or standard input.
usage gettree <options> <node> <string>
-s include same level
-l include lower levels
-n show line numbers (do not use with STDIN, it wont work)
-i case insensitive
-e exact file location (rather than just the nodename)
-p print parent's same level lines
-m minimal print, do not print parents
Examples:
gettree Node text
gettree -sln NODE CCT_Ref
gettree -l NODE POS8/0
\n\n";
exit;
}
Module
#!/usr/bin/perl
package Gettree;
use strict;
use warnings;
my $line;
my $wsdiff = 0;
my $iopt = 0;
my $sopt = 0;
my $lopt = 0;
my $nopt = 0;
my $popt = 0;
my $mopt = 0;
my $linecounter = 0;
my $matched = -1;
my $debug = 0; ##remove later
my #arr;
my #sopt_arr;
my #popt_arr;
my $searchstr;
my $returnstring;
sub setopts # \%opthash , $searchstring
{
cleardata();
push #arr, [ 0, "",0];
my %hash=%{$_[0]};
$iopt = 1 if $hash{i};
$sopt = 1 if $hash{s};
$lopt = 1 if $hash{l};
$nopt = 1 if $hash{n};
$popt = 1 if $hash{p};
$mopt = 1 if $hash{m};
if ( defined $hash{qopts} )
{
$iopt = 1 if $hash{qopts} =~ /i/;
$lopt = 1 if $hash{qopts} =~ /l/;
$nopt = 1 if $hash{qopts} =~ /n/;
$sopt = 1 if $hash{qopts} =~ /s/;
$popt = 1 if $hash{qopts} =~ /p/;
$mopt = 1 if $hash{qopts} =~ /m/;
}
if ( ref($_[1]) ) { $searchstr=$iopt? qr/${$_[1]}/i : qr/${$_[1]}/ ; }
else { $searchstr=$iopt? qr/$_[1]/i : qr/$_[1]/ ; }
}
sub gettree_stream_passline # \$line
{
process_line(${$_[0]});
}
sub gettree_getreturnstring
{
return $returnstring;
}
sub gettree_varable # \%opthash , \$text , $searchstring
{
setopts($_[0] , $_[2]);
my $str=${$_[1]};
while($str=~m#(.*\n)#g)
{
process_line($1);
}
return $returnstring;
}
sub gettree_file # \%opthash , \$filename , $searchstring
{
setopts($_[0] , $_[2]);
my $filename;
if ( ref($_[1]) ) { $filename=${$_[1]}; }
else { $filename=$_[1] ; }
open FH, "<", $filename or die "\nFile ".$filename." cannot be found\nerror : ".$!."\n";
while(my $text=<FH>)
{
process_line($text);
}
close FH;
return $returnstring;
}
sub process_line
{
$line=shift;
if($line=~m/^([ \t]+)/) { $wsdiff=length($1) }
else { $wsdiff=0 };
if($wsdiff>$arr[$#arr][0])
{
push #arr, [ $wsdiff , $line , $linecounter ];
if ( $sopt || $popt )
{
#popt_arr=#sopt_arr if $popt;
#sopt_arr=() if defined $sopt_arr[0];
}
}
else
{
while( #arr && $arr[$#arr][0]>$wsdiff )
{
pop #arr;
#sopt_arr=#popt_arr if ( $sopt || $popt );
#popt_arr=() if $popt;
}
if($#arr<0)
{
push #arr, [ $wsdiff , $line, $linecounter ];
}
else
{
push #sopt_arr, $arr[$#arr] if $sopt || $popt ;
$arr[$#arr]=[ $wsdiff , $line , $linecounter ];
}
}
#sopt_arr=() if $#sopt_arr>200; ## to avoid filling the memory
#popt_arr=() if $#popt_arr>200; ## to avoid filling the memory
##used in l and s opts to print lines after match
if($matched>=0)
{
if($wsdiff>$matched)
{
printline(\$line) if $lopt==1 ;
}
elsif ($wsdiff<$matched)
{
$matched=-1;
}
else
{
if ($sopt )
{ printline(\$line) }
else
{ $matched=-1 }
}
}
if( $matched==-1 && $line=~m/$searchstr/ )
{
printtree();
$matched=$wsdiff if $sopt || $lopt;
}
$linecounter++;
}
sub printtree
{
if(!$mopt)
{
for (0..$#arr-(1+$popt))
{
printline( \$arr[$_][1] , \$arr[$_][2] );
}
}
if($popt)
{
for (0..$#popt_arr)
{
printline( \$popt_arr[$_][1] , \$popt_arr[$_][2] );
}
printline( \$arr[$#arr-1][1] , \$arr[$#arr-1][2] ); #print the parent
#popt_arr=() ;
}
if($sopt)
{
for (0..$#sopt_arr)
{
printline( \$sopt_arr[$_][1] , \$sopt_arr[$_][2] );
}
#sopt_arr=() ;
}
printline( \$arr[$#arr][1] , \$arr[$#arr][2] );
#arr=();
push #arr, [ $wsdiff , $line , $linecounter ];
}
sub printline
{
$nopt==1? $returnstring.= ${$_[1]}+1 ." : ".${$_[0]} : $returnstring.= ${$_[0]};
}
sub cleardata
{
$line="";
$wsdiff = 0;
$iopt = 0;
$sopt = 0;
$lopt = 0;
$nopt = 0;
$popt = 0;
$mopt = 0;
$linecounter = 0;
$matched = -1;
#arr=();
#sopt_arr=();
#popt_arr=();
$searchstr="";
$returnstring="";
}
1;
Breif explanation how it works
The program is just a link to the module. I's a module because i've used it in many programs and stand alone. Gettree.pm will send data line by line to process_line(). process line will get the white-space ($wsdiff) and use this as a marker. any line directly before an increment in whitespace will be stored in #arr. for printing if a match is found later. so the parent is stored. #sopt_arr is for the same line, so it stores previous lines of the same white-space. #popt_arr is for the parent match, but this doesn't work very well (i dont really use it, it could be removed). when a match for the search string is made, #arr,Sopt_arr, & #popt_arr are printed, $matched is set, this will be used for -l option. all lines after the match are printed until the white-space is < the matched white-space. so in summary, it will take each unique white-space before an increment. it works for Juniper and Alcatel too. I'm sure it would work on others too.
remember to modify CONFIGS_DIR and PATH_TO_Gettree.pm_MODULE to match your filesystem paths
I would like to sort my module subroutines alphabetically (I have a lot of subroutines, and I think it will be easier to edit the file if the subroutines are ordered in the file). For example given A.pm:
package A;
use warnings;
use strict;
sub subA {
print "A\n";
}
sub subC {
print "C\n";
}
sub subB {
print "B\n";
}
1;
I would like to run a sortSub A.pm the gives:
package A;
use warnings;
use strict;
sub subA {
print "A\n";
}
sub subB {
print "B\n";
}
sub subC {
print "C\n";
}
1;
Is there any CPAN resource that can help with this task?
To parse and reformat Perl code, you should use PPI.
This is the same tool that Perl::Critic and Perl::Tidy use to accomplish all of their feats.
In this case, I studied the code for PPI::Dumper to get a sense of how to navigate the Document Tree that PPI returns.
The following will parse source code and separate out sections containing subroutines and comments. It will tie the comments, pod, and whitespace before a subroutine with it, and then it will sort all the neighboring subs by their names.
use strict;
use warnings;
use PPI;
use Data::Dump;
my $src = do { local $/; <DATA> };
# Load a document
my $doc = PPI::Document->new( \$src );
# Save Sub locations for later sorting
my #group = ();
my #subs = ();
for my $i ( 0 .. $#{ $doc->{children} } ) {
my $child = $doc->{children}[$i];
my ( $subtype, $subname )
= $child->isa('PPI::Statement::Sub')
? grep { $_->isa('PPI::Token::Word') } #{ $child->{children} }
: ( '', '' );
# Look for grouped subs, whitespace and comments. Sort each group separately.
my $is_related = ($subtype eq 'sub') || grep { $child->isa("PPI::Token::$_") } qw(Whitespace Comment Pod);
# State change or end of stream
if ( my $range = $is_related .. ( !$is_related || ( $i == $#{ $doc->{children} } ) ) ) {
if ($is_related) {
push #group, $child;
if ( $subtype ) {
push #subs, { name => "$subname", children => [#group] };
#group = ();
}
}
if ( $range =~ /E/ ) {
#group = ();
if (#subs) {
# Sort and Flatten
my #sorted = map { #{ $_->{children} } } sort { $a->{name} cmp $b->{name} } #subs;
# Assign back to document, and then reset group
my $min_index = $i - $range + 1;
#{ $doc->{children} }[ $min_index .. $min_index + $#sorted ] = #sorted;
#subs = ();
}
}
}
}
print $doc->serialize;
1;
__DATA__
package A;
use warnings;
use strict;
=comment
Pod describing subC
=cut
sub subC {
print "C\n";
}
INIT {
print "Hello World";
}
sub subB {
print "B\n";
}
# Hello subA comment
sub subA {
print "A\n";
}
1;
Output:
package A;
use warnings;
use strict;
=comment
Pod describing subC
=cut
sub subC {
print "C\n";
}
INIT {
print "Hello World";
}
# Hello subA comment
sub subA {
print "A\n";
}
sub subB {
print "B\n";
}
1;
First, here's my solution;
#!/bin/sh
TOKEN=sub
gsed -e ':a;N;$!ba;s/\n/__newline__/g' "$1" > "$1.out"
gsed -i "s/__newline__\\s*$TOKEN\W/\\nsub /g" "$1.out"
sort $1.out -o $1.out
gsed -i 's/__newline__/\n/g' $1.out
Usage: token_sort.sh myfile.pl
This is how it works;
Replace all newlines with a placeholder, __newline__
break out all $TOKENS, in this case subs, to their own line
Sort the lines using unix sort
Replace back all the newlines
You should now have a sorted copy of your file in myfile.pl.out
A few caveats;
Add a comment, "# Something", or "#!/usr/bin/env perl" to the top of the file; this will ensure that the header block remains sorted at the top.
The sorted block will be the start of the current sub to the next sub - comments at above the sub will get sorted with the previous sub.
You need to use gnu-sed for this to work, on a mac this means doing a "brew install gnu-sed"
This one really has me confused and I don't know how to accurately title it.
I am writing a program, the purpose is irrelevant, but some of you may know as I've been asking a few questions about it recently.
I'm going to post the entire program but I don't think that's necessary. The part you need to look at is the nested loops where it says "beginning search algorithm."
The program takes a long time to complete, so after every iteration of the outermost loop, I print a '.'. after every 7 dots a new line is printed.
for some reason, however no dots will get printed, until a newline is printed.
heres the code:
#!/usr/bin/perl
use v5.14;
use warnings;
# this is a cgi implementation of a theorum proover.
# the program uses resolution refutation, using a breadth-first and set of support strategy
# to generate a proof(if possible) and relay the results to the user.
########################################################################################
#Algorithm:
#1.) Get size(i) of knowledge base
#2.) untill you have i clauses
# 3.) get the clause, add to knowledge base
#4.) get the conclusion variable(conjecture)
#5.) add the negation of the conjecture to the knowledge base
#6.) add the negation of the conjecture to the SOS set.
#7.) compare the SOS set to ever other clause
# 8.) if resolution is possible, add the new clause to the knowledge base if it does not already exist.
# 9.) add the new clause to the SOS set.
#10.) repeat 7-9 untill the null clause is generated or no more resolution is possible.
########################################################################################
my $conclusion;
my $conclusion2;
my #conclusion;
my #SOS;
my #clauses;
my $found=0;
#batch mode
if($ARGV[0])
{
my $filename = $ARGV[0];
open(IN, "<", $filename);
chomp(#clauses=<IN>);
close(IN);
for(#clauses)
{
$_ =~ s/[^A-Za-z~,]//g;
}
#negate the negation to get the desired conclusion for later
$conclusion2=$clauses[$#clauses];
print "$conclusion2";
#conclusion = split("", $conclusion2);
if($conclusion[0] eq '~')
{
splice(#conclusion, 0, 1);
$found=1;
}
if (!$found)
{
$conclusion = "~$conclusion2";
}
else
{
$conclusion = join("", #conclusion);
}
#now break up each line and make #clauses 2d
$_ = [split /,/ ] for #clauses;
}
#interactive mode
else
{
my $count=0;
say "Welcome to my Theorum Proover!";
say "How many clauses are in your knowledge base?";
say "(this does not include the conclusion)";
print "Amount: ";
my $amt = <>;
say "Enter your clauses: ";
say "Negations can be indicated with a '~'.";
say "Variable names must contain only letters.";
say "Separate each literal with a ','<br>";
my $clauses;
while($count < $amt)
{
print "clause $count:";
$clauses .= <>;
$clauses =~ s/[^A-Za-z~,]//g;
$clauses .= ";";
$count++;
print "\n";
}
print "\n \n \n Enter the conclusion, your conclusion should be a literal:";
$conclusion = <>;
$conclusion =~ s/[^A-Za-z~]//g;
print "\n";
#negate the conclusion and add it to the set of clauses.
#conclusion = split("", $conclusion);
if($conclusion[0] eq '~')
{
splice(#conclusion, 0, 1);
$found=1;
}
if (!$found)
{
$conclusion2 = "~$conclusion";
}
else
{
$conclusion2 = join("", #conclusion);
}
# split up the contents of the clause string and add them to a 2d array.
#then, add the negated conclusion to the list.
my #PartClauses= split(';', $clauses);
my $last=#PartClauses;
for my $i (0 .. $#PartClauses)
{
my #tmp=split(',', $PartClauses[$i]);
for my $j (0 .. #tmp)
{
$clauses[$i][$j] = $tmp[$j];
}
}
$clauses[$last][0] = $conclusion2;
}
open(RESULTS, ">", 'results.txt');
for my $i (0 .. $#clauses)
{
print RESULTS "clause $i: {";
for my $j (0 .. $#{ $clauses[$i] })
{
print RESULTS "$clauses[$i][$j]";
if($j != $#{ $clauses[$i] })
{
print RESULTS ",";
}
}
print RESULTS "}\n";
}
print RESULTS "_____________________________\n";
print "Beginning search ....";
##################################################
#begin breadthfirst/sos search/add algorithm
$SOS[0][0]=$conclusion2;
my $cSize=$#clauses;
say "\nworking......";
my $sAdd=0;
my $cAdd=0;
my $res=0;
my $flag=0;
my $dots=0;
SOSROW:
for (my $a=0; $a<=$#SOS; $a++)
{
&update;
CLAUSEROW:
for (my $i=0; $i<=$#clauses; $i++)
{
SOSCOL:
for (my $b=0; $b<=$#{ $SOS[$a] }; $b++)
{
CLAUSECOL:
for my $j (0 .. $#{ $clauses[$i] })
{
if($SOS[$a][$b] eq "~$clauses[$i][$j]"
|| $clauses[$i][$j] eq "~$SOS[$a][$b]")
{
my #tmp;
#found a resolution, so add all other literals from
#both clauses to each set as a single clause
#start with the SOS literals(use a hash to keep track of duplicates)
my %seen;
for my $x (0 .. $#{ $SOS[$a] })
{
if($x != $b)
{
$seen{$SOS[$a][$x]}=1;
push #tmp, "$SOS[$a][$x]";
}
}
#now add the literals from the non-SOS clause
for my $y (0 .. $#{ $clauses[$i] })
{
if($y != $j)
{
if(! $seen{ $clauses[$i][$y] })
{
push(#tmp, "$clauses[$i][$y]");
}
}
}
#check to see if the clause is already listed
my $dupl = 0;
my #a1 = sort(#tmp);
my $s1 = join("", #a1);
MATCH:
for my $i (0 .. $#clauses)
{
my #a2= sort(#{ $clauses[$i] });
my $s2= join("", #a2);
if($s1 eq $s2 )
{
$dupl = 1;
last MATCH;
}
}
#if it isn't, go ahead and add it in
if(! $dupl)
{
$res++;
$sAdd++;
$cAdd++;
my $s = $cSize + $cAdd;
push(#SOS, \#tmp);
push(#clauses, \#tmp);
#print out the new clauses.
print RESULTS"clause $s: ";
my $clause = $cSize+$a;
print RESULTS "{";
if($SOS[$sAdd][0])
{
for my $j(0 .. $#{ $clauses[$s] })
{
if($clauses[$s][$j])
{
print RESULTS "$clauses[$s][$j]";
}
if($j!= $#{ $clauses[$s] })
{
print RESULTS ",";
}
}
print RESULTS "} ($i,$clause)\n";
}
#if you found a new res, but there was nothing to push, you found
# the contradiction, so signal and break.
else
{
print RESULTS "} ($i,$clause)\n";
$flag=1;
last SOSROW;
}
}
}
}
}
}
}
close(RESULTS);
if($flag)
{
say "After $res resolutions, a resolvent was found and the empty set was generated.";
say "This indicates that when '$conclusion' is false, the entire knowledge base is false.";
say "Because we know that the clauses in the knowledge base are actually true, we can soundly conclude that '$conclusion must also be true.";
say "The clauses generated by each resolution can be found below.\n\n";
}
else
{
say "We were not able to generate the empty clause.";
say "this means that adding the negation of the desired conclusion does not render the theorum false.";
say "Therefore, we can not safely conclude that '$conclusion' is true.";
say "Any clauses that we were able to generate through a resoluton can be viewed below.\n\n";
}
print `more results.txt`;
sub update
{
if((($dots % 7) == 0))
{
print "\n";
}
if($dots==14)
{
print "You might want to get some coffee.\n";
}
if($dots==35)
{
print "I'm being VERY Thorough.\n";
}
if($dots==63 || $dots==140)
{
print "Hows that coffee?\n";
}
if($dots==105)
{
print "I think it might be time for a second cup of coffee\n"
}
if($dots==210)
{
print "Like I said, VERY thorough\n";
}
if($dots==630)
{
print "My O is bigger than you can imagine\n"
}
$dots++;
print ".";
}
I can't figure out why this is happening. could it have something to do with buffering?
If instead of calling the subroutine, i just say print "."; nothing will be printed until, the prog finishes execution.
Yes, filehandles are buffered by default. If STDOUT points to a terminal it will be line-buffered (nothing is output until a newline is printed), otherwise it will be block-buffered (nothing is output until a certain number of bytes is printed). The easiest way to change that is to set $|=1, which will make the current output filehandle (usually STDOUT unbuffered), so it will flush after every print.
I was given the assignment to implement a linked list in perl without using the built-in push,pop,shift and unshift. This is my first time learning perl, coming from c++ and java, this is what I came up with:
#!/usr/bin/perl
sub node {
my (#value) = #_;
sub get {
$next;
}
sub push {
#my $next = \#_;
if(defined($next))
{
$next->push(#_);
}
else
{
my $next = \#_;
}
}
sub size {
if(defined($next))
{
$next->size($_[0]);
}
else
{
$_[0]+1;
}
}
sub myprint {
print "$_[0]: ";
foreach (#value) {
print "$_, ";
}
print "\n";
if(defined($next)) {
$next->print($_[0]+1);
}
}
}
while(!defined($done))
{
print "what do you want to do?\n";
print "1 -- push\n";
print "2 -- print nodes\n";
print "3 -- pop\n";
print "4 -- quit\n";
my $val = <STDIN>;
if ($val == 1)
{
print "Type something: ";
$input = <STDIN>;
if(defined($top))
{
$top->push(node($input));
}
else
{
$top = node($input);
}
}
elsif ($val == 2)
{
if(defined($top))
{
$top->myprint(1);
}
}
elsif ($val == 3)
{
if(defined($top))
{
if(defined($top->next))
{
$top=$top->next;
}
}
}
elsif ($val == 4)
{
$done=true;
}
else
{
print "Invalid option\n";
}
}
output:
what do you want to do?
1 -- push
2 -- print nodes
3 -- pop
4 -- quit
1
Type something: q
what do you want to do?
1 -- push
2 -- print nodes
3 -- pop
4 -- quit
1
Type something: w
Can't call method "push" without a package or object reference at ./linkedlistattempt1.pl line 76, <STDIN> line 4.
I'm guessing the "->" operator can only be used with a module or package.
I haven't gotten around to testing the other methods, I'm still working on push. I feel like the best way of doing this is to simply have a holder sub like `sub holder { $value = \#_; } but I don't understand how I would (could?) add more variables, like the next node, hence the sub within a sub design. So without the help of perl's built in functions, how would I do this?
Its important to mention I'm interested in methods that can run on the older versions, going down to 5.10. Most if not all of the tutorials are showing stuff for 5.16 or 5.18
A typical Perl implementation using classic Perl OO would look something like this. Read the man pages perlootut and perlobj to learn how it works.
#!/usr/bin/perl
use strict;
use warnings;
package LinkedList::Node;
# Constructor.
sub new {
my ($class, $item) = #_;
my $self = { item => $item };
return bless($self, $class);
}
# Read-only accessor.
sub item {
my $self = shift;
return $self->{item};
}
# Read-write accessor.
sub next {
my $self = shift;
my $next = $self->{next};
if (#_ > 0) {
$self->{next} = shift;
}
return $next;
}
package LinkedList;
# Constructor. Creates an empty linked list.
sub new {
my $class = shift;
return bless({}, $class);
}
# Read-only accessor.
sub head {
my $self = shift;
return $self->{head};
}
# Insert an item at the beginning.
sub push {
my ($self, $item) = #_;
my $node = LinkedList::Node->new($item);
my $head = $self->{head};
if ($head) {
$node->next($head);
}
$self->{head} = $node;
}
package main;
my $list = LinkedList->new;
$list->push(2);
$list->push(5);
$list->push(9);
for (my $node = $list->head; $node; $node = $node->next) {
print($node->item, "\n");
}
I am working on a very large, very old "historically grown" codebase. In the past, there were often people thinking "Oh, I may need this and that module, so I just include it...", and later, people often "cached" Data inside of modules ("use ThisAndThat" needing a few seconds to load some hundred MB from DB to RAM, yeah, its really a stupid Idea, we are working on that too) and so, often, we have a small module use'ing like 20 or 30 modules, from who 90% are totally unused in the source itself, and, because of "caching" in several use'd submodules, modules tend to take up one minute to load or even more, which is, of course, not acceptable.
So, Im trying to get that done better. Right now, my way is looking through all the modules, understanding them as much as possible and I look at all the modules including them and see whether they are needed or not.
Is there any easier way? I mean: There are functions returning all subs a module has like
...
return grep { defined &{"$module\::$_"} } keys %{"$module\::"}
, so, aint there any simple way to see which ones are exported by default and which ones come from where and are used in the other modules?
A simple example is Data::Dumper, which is included in nearly every file, even, when all debug-warns and prints and so on arent in the script anymore. But still the module has to load Data::Dumper.
Is there any simple way to check that?
Thanks!
The following code could be part of your solution - it will show you which symbols are imported for each instance of use:
package traceuse;
use strict;
use warnings;
use Devel::Symdump;
sub import {
my $class = shift;
my $module = shift;
my $caller = caller();
my $before = Devel::Symdump->new($caller);
my $args = \#_;
# more robust way of emulating use?
eval "package $caller; require $module; $module\->import(\#\$args)";
my $after = Devel::Symdump->new($caller);
my #added;
my #after_subs = $after->functions;
my %before_subs = map { ($_,1) } $before->functions;
for my $k (#after_subs) {
push(#added, $k) unless $before_subs{$k};
}
if (#added) {
warn "using module $module added: ".join(' ', #added)."\n";
} else {
warn "no new symbols from using module $module\n";
}
}
1;
Then just replace "use module ..." with "use traceuse module ...", and you'll get a list of the functions that were imported.
Usage example:
package main;
sub foo { print "debug: foo called with: ".Dumper(\#_)."\n"; }
use traceuse Data::Dumper;
This will output:
using module Data::Dumper added: main::Dumper
i.e. you can tell which functions were imported in robust way. And you can easily extend this to report on imported scalar, array and hash variables - check the docs on Devel::Symdump.
Determine which functions are actually used is the other half of the equation. For that you might be able to get away with a simple grep of your source code - i.e. does Dumper appear in the module's source code that's not on a use line. It depends on what you know about your source code.
Notes:
there may be a module which does what traceuse does - I haven't checked
there might be a better way to emulate "use" from another package
I kind of got of got it to work with PPI. It looks like this:
#!/usr/local/bin/perl
use strict;
use warnings;
use Data::Dumper;
use Term::ANSIColor;
use PPI;
use PPI::Dumper;
my %doneAlready = ();
$" = ", ";
our $maxDepth = 2;
my $showStuffOtherThanUsedOrNot = 0;
parse("/modules/Test.pm", undef, undef, 0);
sub parse {
my $file = shift;
my $indent = shift || 0;
my $caller = shift || $file;
my $depth = shift || 0;
if($depth && $depth >= $maxDepth) {
return;
}
return unless -e $file;
if(exists($doneAlready{$file}) == 1) {
return;
}
$doneAlready{$file} = 1;
my $skript = PPI::Document->new($file);
my #included = ();
eval {
foreach my $x (#{$skript->find("PPI::Statement::Include")}) {
foreach my $y (#{$x->{children}}) {
push #included, $y->{content} if (ref $y eq "PPI::Token::Word" && $y->{content} !~ /^(use|vars|constant|strict|warnings|base|Carp|no)$/);
}
}
};
my %double = ();
print "===== $file".($file ne $caller ? " (Aufgerufen von $caller)" : "")."\n" if $showStuffOtherThanUsedOrNot;
if($showStuffOtherThanUsedOrNot) {
foreach my $modul (#included) {
next unless -e createFileName($modul);
my $is_crap = ((exists($double{$modul})) ? 1 : 0);
print "\t" x $indent;
print color("blink red") if($is_crap);
print $modul;
print color("reset") if($is_crap);
print "\n";
$double{$modul} = 1;
}
}
foreach my $modul (#included) {
next unless -e createFileName($modul);
my $anyUsed = 0;
my $modulDoc = parse(createFileName($modul), $indent + 1, $file, $depth + 1);
if($modulDoc) {
my #exported = getExported($modulDoc);
print "Exported: \n" if(scalar #exported && $showStuffOtherThanUsedOrNot);
foreach (#exported) {
print(("\t" x $indent)."\t");
if(callerUsesIt($_, $file)) {
$anyUsed = 1;
print color("green"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot;
} else {
print color("red"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot;
}
print "\n" if $showStuffOtherThanUsedOrNot;
}
print(("\t" x $indent)."\t") if $showStuffOtherThanUsedOrNot;
print "Subs: " if $showStuffOtherThanUsedOrNot;
foreach my $s (findAllSubs($modulDoc)) {
my $isExported = grep($s eq $_, #exported) ? 1 : 0;
my $rot = callerUsesIt($s, $caller, $modul, $isExported) ? 0 : 1;
$anyUsed = 1 unless $rot;
if($showStuffOtherThanUsedOrNot) {
print color("red") if $rot;
print color("green") if !$rot;
print "$s, ";
print color("reset");
}
}
print "\n" if $showStuffOtherThanUsedOrNot;
print color("red"), "=========== $modul wahrscheinlich nicht in Benutzung!!!\n", color("reset") unless $anyUsed;
print color("green"), "=========== $modul in Benutzung!!!\n", color("reset") if $anyUsed;
}
}
return $skript;
}
sub createFileName {
my $file = shift;
$file =~ s#::#/#g;
$file .= ".pm";
$file = "/modules/$file";
return $file;
}
sub getExported {
my $doc = shift;
my #exported = ();
eval {
foreach my $x (#{$doc->find("PPI::Statement")}) {
my $worthATry = 0;
my $isMatch = 0;
foreach my $y (#{$x->{children}}) {
$worthATry = 1 if(ref $y eq "PPI::Token::Symbol");
if($y eq '#EXPORT') {
$isMatch = 1;
} elsif($isMatch && ref($y) ne "PPI::Token::Whitespace" && ref($y) ne "PPI::Token::Operator" && $y->{content} ne ";") {
push #exported, $y->{content};
}
}
}
};
my #realExported = ();
foreach (#exported) {
eval "\#realExported = $_";
}
return #realExported;
}
sub callerUsesIt {
my $subname = shift;
my $caller = shift;
my $namespace = shift || undef;
my $isExported = shift || 0;
$caller = `cat $caller`;
unless($namespace) {
return 1 if($caller =~ /\b$subname\b/);
} else {
$namespace = createPackageName($namespace);
my $regex = qr#$namespace(?:::|->)$subname#;
if($caller =~ $regex) {
return 1;
}
}
return 0;
}
sub findAllSubs {
my $doc = shift;
my #subs = ();
eval {
foreach my $x (#{$doc->find("PPI::Statement::Sub")}) {
my $foundName = 0;
foreach my $y (#{$x->{children}}) {
no warnings;
if($y->{content} ne "sub" && ref($y) eq "PPI::Token::Word") {
push #subs, $y;
}
use warnings;
}
}
};
return #subs;
}
sub createPackageName {
my $name = shift;
$name =~ s#/modules/##g;
$name =~ s/\.pm$//g;
$name =~ s/\//::/g;
return $name;
}
Its really ugly and maybe not 100% working, but it seems, with the tests that Ive done now, that its good for a beginning.