I've been trying to create a config file parser to parse Cisco IOS configs and such. The final objective would be to show relevant data in contexts based on filters in a configuration file. For example, with such a config file it would display all interfaces where we've found the line "access vlan" as a child of the "interface" context and only show lines containing "speed", "duplex" and "description".
{
'Context' => '^interface',
'Types' => [
'Switch',
],
'Condition' => 'access vlan',
'Filter' => [
'speed',
'duplex',
'description'
]
};
So far, so good. I read the "running-config" and I index the lines depth (given that a non-empty line , not beginning with a space (\s) has a depth of 0) in an array.
Then, in another read I use that index to read the data again, this time using relative position based on depth to create the "childs" of a context. Here's the function :
sub getDeep {
my #data = (#_);
my ($bighash,$hash);
#First read
foreach my $idx (0.. $#data) {
my ($spaces, $content) = ($data[$idx] =~ m/^(\s*)(.*)/);
my $depth = length $spaces;
$bighash->{node}{$idx}{depth} = $depth;
}
# Variables for the first read
my $ldepth = 0;
my $lcontext;
my $lid;
# Second read
foreach my $id (0 .. $#data) {
$data[$id] =~ s/^\s*//;
next if ($data[$id] =~ /^!/);
my $depth = $bighash->{node}{$id}{depth};
if ($depth eq 0) {
push (#{$hash->{global}} , $data[$id]);
$lcontext = $data[$id];
$lid = $id;
}
if (($depth gt 0) && ($id - $lid eq 1)) {
push (#{$hash->{$lcontext}}, (" " x $depth. $data[$id]));
$lid = $id;
}
}
return $hash;
}
Using this sub, I can return a hash, then based on the presence of an arrayref for a given key, apply filters as explained. This works pretty well, so far very proud of this piece of code.
Problem comes when I want to find childs of childs. In the example below, the childs of "given param2" would reprensent my next challenge.
interface XYZ
given param1 -> child of "interface XYZ"
given param2 -> child of "interface XYZ"
given param2.1 -> child of "given param2"
given param2.2 -> child of "given param2"
given param3 -> child of "interface XYZ"
So after thinking about this for a while and failing with different approaches, my question comes in 2 separate parts :
1) Is there a better way to do this that I'm not seeing ?
2) How could I keep tagging childs of childs as the lines dig deeper and identify them properly in a data structure ?
Thank you for reading up to this line :)
This thread contains the solution I was hoping for :) Since it might benefit to others, here's the link :
https://perlmonks.org/?node_id=1224600
Cheers !
I have written something to do exactly this. I can't figure out how to put it on metacpan. However, im sure there is better already on there if I knew where to look. It's on of the first i wrote in perl, so it's a bit of a mess. But basically you can type "gettree -l Node interface" and on a XR device it'll pull all of the config. "gettree -s Node Description_keyword" will pull all of a single interface config. you can also use it with STDIN e.g., "cat file | gettree -l interface".
Program
#!/usr/bin/perl
use lib '/PATH_TO_Gettree.pm_MODULE/';
use strict;
use warnings;
use Gettree;
use Getopt::Std;
my %opts;
getopts('eislnpm' , \%opts);
my $searchstr;
my $filename;
my $debug=0;
if($ARGV[0]=~m/help/ || $ARGV[0]=~m/^\?$/ )
{ die usage(); }
if($#ARGV<0||$#ARGV>1)
{
usage();
killapp("Please specifiy node and search term, use --help for the help menu");
}
elsif($#ARGV==0)
{
Gettree::setopts( \%opts , \$ARGV[0] );
while(<STDIN>)
{
Gettree::gettree_stream_passline( \$_ );
}
print Gettree::gettree_getreturnstring();
}
else
{
$filename= $ARGV[0];
$filename="/CONFIGS_DIR/".lc $filename if ! $opts{e};
print Gettree::gettree_file ( \%opts , \$filename , \$ARGV[1]) ; #\$filename , $searchstring
}
sub killapp
{
print $_[0]."\n";
exit;
}
sub usage
{
print "
Usage: gettree [OPTION]... [NODE] STRING
Search for PATTERN in each FILE or standard input.
usage gettree <options> <node> <string>
-s include same level
-l include lower levels
-n show line numbers (do not use with STDIN, it wont work)
-i case insensitive
-e exact file location (rather than just the nodename)
-p print parent's same level lines
-m minimal print, do not print parents
Examples:
gettree Node text
gettree -sln NODE CCT_Ref
gettree -l NODE POS8/0
\n\n";
exit;
}
Module
#!/usr/bin/perl
package Gettree;
use strict;
use warnings;
my $line;
my $wsdiff = 0;
my $iopt = 0;
my $sopt = 0;
my $lopt = 0;
my $nopt = 0;
my $popt = 0;
my $mopt = 0;
my $linecounter = 0;
my $matched = -1;
my $debug = 0; ##remove later
my #arr;
my #sopt_arr;
my #popt_arr;
my $searchstr;
my $returnstring;
sub setopts # \%opthash , $searchstring
{
cleardata();
push #arr, [ 0, "",0];
my %hash=%{$_[0]};
$iopt = 1 if $hash{i};
$sopt = 1 if $hash{s};
$lopt = 1 if $hash{l};
$nopt = 1 if $hash{n};
$popt = 1 if $hash{p};
$mopt = 1 if $hash{m};
if ( defined $hash{qopts} )
{
$iopt = 1 if $hash{qopts} =~ /i/;
$lopt = 1 if $hash{qopts} =~ /l/;
$nopt = 1 if $hash{qopts} =~ /n/;
$sopt = 1 if $hash{qopts} =~ /s/;
$popt = 1 if $hash{qopts} =~ /p/;
$mopt = 1 if $hash{qopts} =~ /m/;
}
if ( ref($_[1]) ) { $searchstr=$iopt? qr/${$_[1]}/i : qr/${$_[1]}/ ; }
else { $searchstr=$iopt? qr/$_[1]/i : qr/$_[1]/ ; }
}
sub gettree_stream_passline # \$line
{
process_line(${$_[0]});
}
sub gettree_getreturnstring
{
return $returnstring;
}
sub gettree_varable # \%opthash , \$text , $searchstring
{
setopts($_[0] , $_[2]);
my $str=${$_[1]};
while($str=~m#(.*\n)#g)
{
process_line($1);
}
return $returnstring;
}
sub gettree_file # \%opthash , \$filename , $searchstring
{
setopts($_[0] , $_[2]);
my $filename;
if ( ref($_[1]) ) { $filename=${$_[1]}; }
else { $filename=$_[1] ; }
open FH, "<", $filename or die "\nFile ".$filename." cannot be found\nerror : ".$!."\n";
while(my $text=<FH>)
{
process_line($text);
}
close FH;
return $returnstring;
}
sub process_line
{
$line=shift;
if($line=~m/^([ \t]+)/) { $wsdiff=length($1) }
else { $wsdiff=0 };
if($wsdiff>$arr[$#arr][0])
{
push #arr, [ $wsdiff , $line , $linecounter ];
if ( $sopt || $popt )
{
#popt_arr=#sopt_arr if $popt;
#sopt_arr=() if defined $sopt_arr[0];
}
}
else
{
while( #arr && $arr[$#arr][0]>$wsdiff )
{
pop #arr;
#sopt_arr=#popt_arr if ( $sopt || $popt );
#popt_arr=() if $popt;
}
if($#arr<0)
{
push #arr, [ $wsdiff , $line, $linecounter ];
}
else
{
push #sopt_arr, $arr[$#arr] if $sopt || $popt ;
$arr[$#arr]=[ $wsdiff , $line , $linecounter ];
}
}
#sopt_arr=() if $#sopt_arr>200; ## to avoid filling the memory
#popt_arr=() if $#popt_arr>200; ## to avoid filling the memory
##used in l and s opts to print lines after match
if($matched>=0)
{
if($wsdiff>$matched)
{
printline(\$line) if $lopt==1 ;
}
elsif ($wsdiff<$matched)
{
$matched=-1;
}
else
{
if ($sopt )
{ printline(\$line) }
else
{ $matched=-1 }
}
}
if( $matched==-1 && $line=~m/$searchstr/ )
{
printtree();
$matched=$wsdiff if $sopt || $lopt;
}
$linecounter++;
}
sub printtree
{
if(!$mopt)
{
for (0..$#arr-(1+$popt))
{
printline( \$arr[$_][1] , \$arr[$_][2] );
}
}
if($popt)
{
for (0..$#popt_arr)
{
printline( \$popt_arr[$_][1] , \$popt_arr[$_][2] );
}
printline( \$arr[$#arr-1][1] , \$arr[$#arr-1][2] ); #print the parent
#popt_arr=() ;
}
if($sopt)
{
for (0..$#sopt_arr)
{
printline( \$sopt_arr[$_][1] , \$sopt_arr[$_][2] );
}
#sopt_arr=() ;
}
printline( \$arr[$#arr][1] , \$arr[$#arr][2] );
#arr=();
push #arr, [ $wsdiff , $line , $linecounter ];
}
sub printline
{
$nopt==1? $returnstring.= ${$_[1]}+1 ." : ".${$_[0]} : $returnstring.= ${$_[0]};
}
sub cleardata
{
$line="";
$wsdiff = 0;
$iopt = 0;
$sopt = 0;
$lopt = 0;
$nopt = 0;
$popt = 0;
$mopt = 0;
$linecounter = 0;
$matched = -1;
#arr=();
#sopt_arr=();
#popt_arr=();
$searchstr="";
$returnstring="";
}
1;
Breif explanation how it works
The program is just a link to the module. I's a module because i've used it in many programs and stand alone. Gettree.pm will send data line by line to process_line(). process line will get the white-space ($wsdiff) and use this as a marker. any line directly before an increment in whitespace will be stored in #arr. for printing if a match is found later. so the parent is stored. #sopt_arr is for the same line, so it stores previous lines of the same white-space. #popt_arr is for the parent match, but this doesn't work very well (i dont really use it, it could be removed). when a match for the search string is made, #arr,Sopt_arr, & #popt_arr are printed, $matched is set, this will be used for -l option. all lines after the match are printed until the white-space is < the matched white-space. so in summary, it will take each unique white-space before an increment. it works for Juniper and Alcatel too. I'm sure it would work on others too.
remember to modify CONFIGS_DIR and PATH_TO_Gettree.pm_MODULE to match your filesystem paths
Related
ALL,
I have following:
This is perl 5, version 26, subversion 3 (v5.26.3) built for x86_64-linux-thread-multi
(with 57 registered patches, see perl -V for more detail)
Copyright 1987-2018, Larry Wall
Perl may be copied only under the terms of either the Artistic License or the
GNU General Public License, which may be found in the Perl 5 source kit.
Complete documentation for Perl, including FAQ lists, should be found on
this system using "man perl" or "perldoc perl". If you have access to the
Internet, point your browser at http://www.perl.org/, the Perl Home Page.
And the script I'm using have given and when.
Trying to Google I found this, which says it will be changed in an incompatible way.
I'm on RHEL8 and moving forward.
I'd like to fix the script, so that other get advantage of the script as well and don't see those warnings anymore.
How do I rewrite it to get rid of the warning?
TIA!
EDIT:
Sorry for not being explicit enough - by fixing the script I meant eliminating the problematic call/rewriting it.
EDIT2:
given($pl) {
when(0) { print "A"; }
when(1) { print "B"; }
when(2) { print "C"; }
when(3) { print "D"; }
when(4) { print "E"; }
default { print "Illegal "; }
}
where $pl is just an integer (unsigned short in C).
The script reads some data from the file.
The file contains some hex values. Those values are parsed - some bits are extracted ` and then the string representation of those bits are printed on screen.
EDIT3:
I tried to use following code:
my %table_pl = (
0 => sub { print "A"; },
1 => sub { print "B"; },
2 => sub { print "C"; },
3 => sub { print "D"; },
4 => sub { print "E"; },
);
for $pl( 0..5 )
{
if( exists $table_pl{$pl} )
{
$table_pl{$pl}->();
}
else
{
print "Illegal ";
}
}
but I got an error:
Number found where operator expected near 1
(Missing semicolon on the previous line?)
I like to use dispatch tables for things like this. Essentially it's a hash where the keys are the valid options, and the value of the key is a subroutine that does what you want to do when a valid option is supplied. Doing it this way eliminates a long chain of if/else statements.
You can put the subroutines inline in the table itself, or reference a subroutine defined elsewhere. The example here shows both:
use warnings;
use strict;
my %table = (
0 => sub { print "A\n"; },
1 => sub { print "B\n"; },
2 => sub { print "C\n"; },
3 => sub { print "D\n"; },
4 => \&code_reference_example,
);
for my $pl (0..5) {
if (exists $table{$pl}) {
# Put the option into the table and execute
# the assigned subroutine
$table{$pl}->();
} else {
print "'$pl' is not a valid option\n";
}
}
sub code_reference_example {
print "E\n";
}
Output:
A
B
C
D
E
'5' is not a valid option
given ( $pl ) {
when ( 0 ) { print "A"; }
when ( 1 ) { print "B"; }
when ( 2 ) { print "C"; }
when ( 3 ) { print "D"; }
when ( 4 ) { print "E"; }
default { print "Illegal "; }
}
is short for
given ( $pl ) {
when ( $_ ~~ 0 ) { print "A"; }
when ( $_ ~~ 1 ) { print "B"; }
when ( $_ ~~ 2 ) { print "C"; }
when ( $_ ~~ 3 ) { print "D"; }
when ( $_ ~~ 4 ) { print "E"; }
default { print "Illegal "; }
}
It can be written as
for ( $pl ) {
if ( $_ ~~ 0 ) { print "A"; }
elsif ( $_ ~~ 1 ) { print "B"; }
elsif ( $_ ~~ 2 ) { print "C"; }
elsif ( $_ ~~ 3 ) { print "D"; }
elsif ( $_ ~~ 4 ) { print "E"; }
else { print "Illegal "; }
}
But the whole point of the question is to avoid the broken/experimental smart matching feature. This would be the "equivalent":
for ( $pl ) {
if ( !looks_like_number( $_ ) ) {
print "Illegal ";
next;
}
if ( $_ == 0 ) { print "A"; }
elsif ( $_ == 1 ) { print "B"; }
elsif ( $_ == 2 ) { print "C"; }
elsif ( $_ == 3 ) { print "D"; }
elsif ( $_ == 4 ) { print "E"; }
else { print "Illegal "; }
}
It's not completely equivalent. The difference is that this last one treats the number 2 and the string 2 identically. The fact that smart matching treats them differently is the entire reason it's an unacceptable design, so this is desirable.
Finally, a dispatch table would work better here. A hash is often used, but an array could be used for the example given.
use Scalar::Util qw( looks_like_number );
sub is_nonneg_int {
my $x = shift;
return looks_like_number( $x ) && int( $x ) == $x && $x >= 0;
}
my #dispatch = (
sub { print "A"; },
sub { print "B"; },
sub { print "C"; },
sub { print "D"; },
sub { print "E"; },
);
my $handler = is_nonneg_int( $pl ) ? $dispatch[ $pl ] : undef;
if ( $handler ) {
$handler->();
} else {
print "Illegal ";
}
The advantage of a dispatch table is that a single lookup is done instead of multiple comparisons.
So I have stumbled upon a little issue when trying to build out a simple "Airport Search Script" in Perl.
my $filename = '/home/student/perl-basic/topic-07/iata_airports.csv';
my $number = '1';
my $matching;
my $latitude;
my $longitude;
my $word = 'false';
GetOptions (
"filename=s" => \$filename,
"number=i" => \$number,
"matching=s" => \$matching,
"latitude=f" => \$latitude,
"longitude=f" => \$longitude,
"word=s" => \$word
);
sub parse_airports {
my $file = shift;
my $csv = Text::CSV->new( { binary => 1, eol => $/ } );
open ( my $fh, "<", $file ), or die "Error opening input file: $!";
my $ra_colnames = $csv->getline ( $fh );
$csv->column_names( #$ra_colnames );
my $ra_airports = $csv->getline_hr_all( $fh );
close ( $fh );
return $ra_airports;
}
sub get_name_matching_airports {
}
my $rah_airports = parse_airports( $filename );
my $rah_airports_found = [];
if ($matching) {
say "Up to $number airports matching $matching in $filename:";
$rah_airports_found = get_name_matching_airports(
airports => $rah_airports,
matching_string => $matching,
word => $word,
);
}
elsif ($latitude && $longitude) {
say "Up to $number airports near [$latitude, $longitude] in $filename:"
}
else {
say "Must have at least --matching, or --latitude and --longitude as arguments";
}
print pp($rah_airports_found);
So where I am struggling is in the "sub get_name_matching_airports"
Because you do not have the file let me explain the file structure.
It is a hash (ALL IATA Airports) with hashes (DETAILS of each airport). There are around 15 keys in each airport hash and one of the keys titles is (NAME). I have opened the file and parsed all the info into a hash ref which is returned at the end of the sub "parse_airports".
In the sub "get_name_matching_airports" I need to find additional airports with similar names based on the argument I passed in, into ($matching).
EXAMPLE: I parse (case-insensitive) "London" as an argument from the command line e.g. ./search_airports2 --matching London. In the sub "get_name_matching_airports" I will need to respond with any airport that has london (case-insensitive) in key(name).
Then push these newly found airports which are similar into the array "rah_airports_found" and in the end print this out.
SO I SOLVED MY PROBLEM WITH THE FOLLOWING CODE:
sub get_name_matching_airports {
my %params = (
airports => undef,
matching_string => undef,
word => undef,
#_
);
my #rah_airports_found;
my $ra_airports = $params{airports};
my $counter = 0;
foreach my $i ( #$ra_airports ) {
if ( $params{word} ) {
if ( $i->{name} eq $params{matching_string} ) {
push #rah_airports_found, $i;
$counter++;
}
}
else {
if ( $i->{name} =~ /$params{matching_string}/i ) {
push #rah_airports_found, $i;
$counter++;
}
if ( defined( $number ) && $counter == $number ) {
return \#rah_airports_found;
}
}
}
return \#rah_airports_found;
}
Example:
for my $Airport_rf (keys %{$rah_airports}) {
if ( $Airport_rf->{NAME} =~ m{\Q$matching\E}xi) {
# do your stuff here
}
}
If you donĀ“t know the exact key of the hashref, you have to match the CLI parameter against all values.
For example, I am running standalone Blast+ for thousands of EST sequences with remote (NCBI) server. I am not getting any status message like 15 of 100 sequence is running. Is it possible to get any status message like that? or any other way to send one after another sequence using perl scripts?
Many thanks!
I suggest using Bioperl (http://metacpan.org/pod/BioPerl) and the Bio::Tools::Run::RemoteBlast module. See http://metacpan.org/pod/Bio::Tools::Run::RemoteBlast and here is the code example they give in the RemoteBlast.pm module
while (my $input = $str->next_seq()){
#Blast a sequence against a database:
#Alternatively, you could pass in a file with many
#sequences rather than loop through sequence one at a time
#Remove the loop starting 'while (my $input = $str->next_seq())'
#and swap the two lines below for an example of that.
my $r = $factory->submit_blast($input);
#my $r = $factory->submit_blast('amino.fa');
print STDERR "waiting..." if( $v > 0 );
while ( my #rids = $factory->each_rid ) {
foreach my $rid ( #rids ) {
my $rc = $factory->retrieve_blast($rid);
if( !ref($rc) ) {
if( $rc < 0 ) {
$factory->remove_rid($rid);
}
print STDERR "." if ( $v > 0 );
sleep 5;
} else {
my $result = $rc->next_result();
#save the output
my $filename = $result->query_name()."\.out";
$factory->save_output($filename);
$factory->remove_rid($rid);
print "\nQuery Name: ", $result->query_name(), "\n";
while ( my $hit = $result->next_hit ) {
next unless ( $v > 0);
print "\thit name is ", $hit->name, "\n";
while( my $hsp = $hit->next_hsp ) {
print "\t\tscore is ", $hsp->score, "\n";
}
}
}
}
}
}
Look at the method retrieve_blast (http://metacpan.org/pod/Bio::Tools::Run::RemoteBlast#retrieve_blast). It will return a status code to let you know if the blast job is finished. Let me know if you have more questions and I will try to clarify further.
Paul
I am working on a very large, very old "historically grown" codebase. In the past, there were often people thinking "Oh, I may need this and that module, so I just include it...", and later, people often "cached" Data inside of modules ("use ThisAndThat" needing a few seconds to load some hundred MB from DB to RAM, yeah, its really a stupid Idea, we are working on that too) and so, often, we have a small module use'ing like 20 or 30 modules, from who 90% are totally unused in the source itself, and, because of "caching" in several use'd submodules, modules tend to take up one minute to load or even more, which is, of course, not acceptable.
So, Im trying to get that done better. Right now, my way is looking through all the modules, understanding them as much as possible and I look at all the modules including them and see whether they are needed or not.
Is there any easier way? I mean: There are functions returning all subs a module has like
...
return grep { defined &{"$module\::$_"} } keys %{"$module\::"}
, so, aint there any simple way to see which ones are exported by default and which ones come from where and are used in the other modules?
A simple example is Data::Dumper, which is included in nearly every file, even, when all debug-warns and prints and so on arent in the script anymore. But still the module has to load Data::Dumper.
Is there any simple way to check that?
Thanks!
The following code could be part of your solution - it will show you which symbols are imported for each instance of use:
package traceuse;
use strict;
use warnings;
use Devel::Symdump;
sub import {
my $class = shift;
my $module = shift;
my $caller = caller();
my $before = Devel::Symdump->new($caller);
my $args = \#_;
# more robust way of emulating use?
eval "package $caller; require $module; $module\->import(\#\$args)";
my $after = Devel::Symdump->new($caller);
my #added;
my #after_subs = $after->functions;
my %before_subs = map { ($_,1) } $before->functions;
for my $k (#after_subs) {
push(#added, $k) unless $before_subs{$k};
}
if (#added) {
warn "using module $module added: ".join(' ', #added)."\n";
} else {
warn "no new symbols from using module $module\n";
}
}
1;
Then just replace "use module ..." with "use traceuse module ...", and you'll get a list of the functions that were imported.
Usage example:
package main;
sub foo { print "debug: foo called with: ".Dumper(\#_)."\n"; }
use traceuse Data::Dumper;
This will output:
using module Data::Dumper added: main::Dumper
i.e. you can tell which functions were imported in robust way. And you can easily extend this to report on imported scalar, array and hash variables - check the docs on Devel::Symdump.
Determine which functions are actually used is the other half of the equation. For that you might be able to get away with a simple grep of your source code - i.e. does Dumper appear in the module's source code that's not on a use line. It depends on what you know about your source code.
Notes:
there may be a module which does what traceuse does - I haven't checked
there might be a better way to emulate "use" from another package
I kind of got of got it to work with PPI. It looks like this:
#!/usr/local/bin/perl
use strict;
use warnings;
use Data::Dumper;
use Term::ANSIColor;
use PPI;
use PPI::Dumper;
my %doneAlready = ();
$" = ", ";
our $maxDepth = 2;
my $showStuffOtherThanUsedOrNot = 0;
parse("/modules/Test.pm", undef, undef, 0);
sub parse {
my $file = shift;
my $indent = shift || 0;
my $caller = shift || $file;
my $depth = shift || 0;
if($depth && $depth >= $maxDepth) {
return;
}
return unless -e $file;
if(exists($doneAlready{$file}) == 1) {
return;
}
$doneAlready{$file} = 1;
my $skript = PPI::Document->new($file);
my #included = ();
eval {
foreach my $x (#{$skript->find("PPI::Statement::Include")}) {
foreach my $y (#{$x->{children}}) {
push #included, $y->{content} if (ref $y eq "PPI::Token::Word" && $y->{content} !~ /^(use|vars|constant|strict|warnings|base|Carp|no)$/);
}
}
};
my %double = ();
print "===== $file".($file ne $caller ? " (Aufgerufen von $caller)" : "")."\n" if $showStuffOtherThanUsedOrNot;
if($showStuffOtherThanUsedOrNot) {
foreach my $modul (#included) {
next unless -e createFileName($modul);
my $is_crap = ((exists($double{$modul})) ? 1 : 0);
print "\t" x $indent;
print color("blink red") if($is_crap);
print $modul;
print color("reset") if($is_crap);
print "\n";
$double{$modul} = 1;
}
}
foreach my $modul (#included) {
next unless -e createFileName($modul);
my $anyUsed = 0;
my $modulDoc = parse(createFileName($modul), $indent + 1, $file, $depth + 1);
if($modulDoc) {
my #exported = getExported($modulDoc);
print "Exported: \n" if(scalar #exported && $showStuffOtherThanUsedOrNot);
foreach (#exported) {
print(("\t" x $indent)."\t");
if(callerUsesIt($_, $file)) {
$anyUsed = 1;
print color("green"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot;
} else {
print color("red"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot;
}
print "\n" if $showStuffOtherThanUsedOrNot;
}
print(("\t" x $indent)."\t") if $showStuffOtherThanUsedOrNot;
print "Subs: " if $showStuffOtherThanUsedOrNot;
foreach my $s (findAllSubs($modulDoc)) {
my $isExported = grep($s eq $_, #exported) ? 1 : 0;
my $rot = callerUsesIt($s, $caller, $modul, $isExported) ? 0 : 1;
$anyUsed = 1 unless $rot;
if($showStuffOtherThanUsedOrNot) {
print color("red") if $rot;
print color("green") if !$rot;
print "$s, ";
print color("reset");
}
}
print "\n" if $showStuffOtherThanUsedOrNot;
print color("red"), "=========== $modul wahrscheinlich nicht in Benutzung!!!\n", color("reset") unless $anyUsed;
print color("green"), "=========== $modul in Benutzung!!!\n", color("reset") if $anyUsed;
}
}
return $skript;
}
sub createFileName {
my $file = shift;
$file =~ s#::#/#g;
$file .= ".pm";
$file = "/modules/$file";
return $file;
}
sub getExported {
my $doc = shift;
my #exported = ();
eval {
foreach my $x (#{$doc->find("PPI::Statement")}) {
my $worthATry = 0;
my $isMatch = 0;
foreach my $y (#{$x->{children}}) {
$worthATry = 1 if(ref $y eq "PPI::Token::Symbol");
if($y eq '#EXPORT') {
$isMatch = 1;
} elsif($isMatch && ref($y) ne "PPI::Token::Whitespace" && ref($y) ne "PPI::Token::Operator" && $y->{content} ne ";") {
push #exported, $y->{content};
}
}
}
};
my #realExported = ();
foreach (#exported) {
eval "\#realExported = $_";
}
return #realExported;
}
sub callerUsesIt {
my $subname = shift;
my $caller = shift;
my $namespace = shift || undef;
my $isExported = shift || 0;
$caller = `cat $caller`;
unless($namespace) {
return 1 if($caller =~ /\b$subname\b/);
} else {
$namespace = createPackageName($namespace);
my $regex = qr#$namespace(?:::|->)$subname#;
if($caller =~ $regex) {
return 1;
}
}
return 0;
}
sub findAllSubs {
my $doc = shift;
my #subs = ();
eval {
foreach my $x (#{$doc->find("PPI::Statement::Sub")}) {
my $foundName = 0;
foreach my $y (#{$x->{children}}) {
no warnings;
if($y->{content} ne "sub" && ref($y) eq "PPI::Token::Word") {
push #subs, $y;
}
use warnings;
}
}
};
return #subs;
}
sub createPackageName {
my $name = shift;
$name =~ s#/modules/##g;
$name =~ s/\.pm$//g;
$name =~ s/\//::/g;
return $name;
}
Its really ugly and maybe not 100% working, but it seems, with the tests that Ive done now, that its good for a beginning.
I'd written a Perl script to generate a table of contents from HTML pages which is working fine (and generating valid HTML) except for that the Perl output is removing closing tags for some elements like p. This is not validating against DocType of strict.
Please scroll down the post to see the Perl code.
What should I do to correct it?
#!/usr/bin/perl -w
#Copyright anurag gupta ; free to use under GNU GPL License
use strict;
use feature "switch";
use Common;
use HTML::Element;
use HTML::TreeBuilder;
#"F:/anurag/work/indiacustomercare/airtel/recharge.html";
my $filename="F:/tmp/t9.html";
my $index=0;
my $labelprefix="anu555ltg-";
my $tocIndex=100001;
my $toc;
my #stack;
my $prevHtag="h2";
sub hTagEncountered($)
{
my $hTag=shift;
my $currLevel=(split //, $hTag)[1];
given($hTag)
{
when(/h1/)
{
break;
}
default{
my $countCurr= (split /h/,$hTag)[1];
my $countPrev= (split /h/,$prevHtag)[1];
if($countCurr>$countPrev)
{
push #stack,($currLevel);
$toc.="<ul>";
}
elsif($countCurr<$countPrev)
{
# Now check in the stack
while ( #stack and $currLevel < $stack[$#stack])
{
pop #stack;
$toc.="</ul>";
}
}
}
}
$prevHtag=$hTag;
}
sub getLabel
{
my $name=$labelprefix.++$tocIndex;
}
sub traversehtml
{
my $node=$_[0];
# $node->dump();
# print "-----------------\n";
# print $node->tag()."\n";
# print ref($node),"->\n";
if((ref(\$node) ne "SCALAR" )and ($node->tag() =~m/^h[2-7]$/i)) #it's an H Element!
{
my #h = $node->content_list();
if(#h==1 and ref(\$h[0]) eq "SCALAR") #H1 contains simple string and nothing else
{
hTagEncountered($node->tag());
my $label=getLabel();
my $a = HTML::Element->new('a', name => $label);
my $text=$node->as_trimmed_text();
$a->push_content($text);
$node->delete_content();
$text=HTML::Entities::encode_entities($text);
$node->push_content($a);
$toc.=<<EOF;
<li>$text
EOF
}
elsif ( #h==1 and ($h[0]->tag() eq "a")) # <h1>ttt</h1> case
{
#See if any previous label already exists
my $prevlabel = $h[0]->attr("name");
$h[0]->attr("name",undef) if(defined($prevlabel) and $prevlabel=~m/$labelprefix/); #delete previous name tag if any
#set the new label
my $label=getLabel();
$h[0]->attr("name",$label);
hTagEncountered($node->tag());
my $text=HTML::Entities::encode_entities($node->as_trimmed_text());
$toc.=<<EOF;
<li>$text
EOF
}
elsif (#h>1) #<h1>some text herettt</h1> case
{
die "h1 must not contain any html elements";
}
}
my #h = $node->content_list();
foreach my $item (#h)
{
if(ref(\$item) ne "SCALAR") {traversehtml($item); } #skip scalar items
}
}
die "File $filename not found" if !-r $filename;
my $tree = HTML::TreeBuilder->new();
$tree->parse_file($filename);
my #h = $tree->content_list();
traversehtml($h[1]);
while(pop #stack)
{
$toc.="</ul>";
}
$toc="<ul>$toc</ul>";
print qq{<div id="icctoc"><h2>TOC</h2>$toc</div>};
my #list1=$tree->content_list();
my #list2=$list1[1]->content_list();
for(my $i=0;$i<#list2;++$i){
if(ref(\$list2[$i]) eq "SCALAR")
{
print $list2[$i]
}
else{
print $list2[$i]->as_HTML();
}
}
# Finally:
Try passing {} for the \%optional_end_tags argument to as_HTML. See the documentation for details.