Perl subroutine returning unexpected value - perl

I am extending CGI and trying to add a simple router to it, just for fun.
Here are my Test::More tests
use strict;
use warnings;
use Data::Dumper;
use Test::More tests => 4;
use CGI::Router;
my $router = CGI::Router->new;
my $resp;
## 1. test ##
$ENV{'REQUEST_URI'} = '/';
$ENV{'REQUEST_METHOD'} = 'GET';
$resp = $router->connect('GET /', sub {
# print Dumper #_;
return 'Hello 1';
});
# print Dumper $resp;
ok( $resp eq 'Hello 1' );
## 2. test ##
$ENV{'REQUEST_URI'} = '/hello';
$ENV{'REQUEST_METHOD'} = 'GET';
$resp = $router->connect('GET /hello', sub {
# print Dumper #_;
return 'Hello 2';
});
# print Dumper $resp;
ok( $resp eq 'Hello 2' );
## 3. test ##
$ENV{'REQUEST_URI'} = '/hello/kitty';
$ENV{'REQUEST_METHOD'} = 'GET';
$resp = $router->connect('GET /hello/:who', sub {
# print Dumper #_;
return 'Hello 3';
});
# print Dumper $resp;
ok( $resp eq 'Hello 3' );
## 4. test ##
$ENV{'REQUEST_URI'} = '/hello/kitty/kat';
$ENV{'REQUEST_METHOD'} = 'GET';
$resp = $router->connect('GET /hello/:who/:what', sub {
# print Dumper #_;
return 'Hello kitty kat';
});
# print Dumper $resp;
ok( $resp eq 'Hello kitty kat' );
$router->run;
Here is my module
package CGI::Router;
use strict;
use warnings;
use parent 'CGI';
use Carp;
use Data::Dumper;
sub connect {
my ( $self, $req, $subr ) = #_;
$self->{routes} //= {};
$self->{env} //= \%ENV;
if ( ! exists $self->{routes}->{$req} ) {
$self->{routes}->{$req} = {
handler => $subr,
pattern => $self->build_pattern( $req ),
method => $req =~ /^(GET|PUT|POST|DELETE)/
};
} else {
Carp::croak( "Similar request already exists $req!" );
}
# Get current request destination
# TODO: Add that stupid IIS HTTP header
$self->{destination} = $self->{env}->{REQUEST_URI};
$self->{method} = $self->{env}->{REQUEST_METHOD};
return $self->mapper();
}
sub mapper {
my $self = shift;
my $router;
my #params;
foreach my $key ( keys %{ $self->{routes} } ) {
my $route = $self->{routes}->{$key};
if ( $self->{method} eq $route->{method} &&
$self->{destination} =~ $route->{pattern} ) {
#params = $self->{destination} =~ $route->{pattern};
$router = $route;
}
}
return $router->{handler}->( #params );
}
sub build_pattern {
my ( $self, $pattern ) = #_;
$pattern =~ s/(GET|POST|PUT|DELETE)\s?//i;
$pattern = do {
# Replace something like /word/:token with /word/(^:([a-z]+))
$pattern =~ s!
(\:([a-z]+))
!
if ( $2 ) {
"([^/]+)"
}
!gex;
"^$pattern\$";
};
return $pattern;
}
sub run {
}
1;
When the test cases run and I Dump e.g $resp in ## 4. test ## the returned value is not some version of "Hello kitty" but 'GET'.
Here is the output of the test
1..4
ok 1
ok 2
ok 3
ok 4
Why do all the subroutines return 'GET', I don't see where I generate this output.
I know a ton of similar frameworks exists, I am just doing this for fun :)

I modified your code to work/not error. Take it or leave it ;)
CGI/Router.pm:
Things changed:
build_pattern returns a compiled regex via qr/$pattern/
connect param handling is less confusing. You were taking $self, #args off #_, but then taking $req, $subr from #args and doing nothing else with it. So I moved them up
connect returns the value of run
$foo = $bar if !defined $foo; is better written as $foo //= $bar;. Similar to $foo ||= $bar but checks for definedness rather than truth.
Code:
package CGI::Router;
use strict;
use warnings;
use parent 'CGI';
use Carp;
use Data::Dumper;
sub connect {
my ( $self, $req, $subr ) = #_;
$self->{routes} //= {};
$self->{env} //= \%ENV;
if ( !exists $self->{routes}->{$req} ) {
$self->{routes}->{$req} = {
handler => $subr,
pattern => $self->build_pattern($req),
method => $req =~ /^(GET|PUT|POST|DELETE)/
};
}
else {
Carp::croak("Similar request already exists $req!");
}
# Get current request destination
# TODO: Add that stupid IIS HTTP header
$self->{destination} = $self->{env}->{REQUEST_URI};
$self->{method} = $self->{env}->{REQUEST_METHOD};
return $self->run();
}
sub build_pattern {
my ( $self, $pattern ) = #_;
$pattern =~ s/(GET|POST|PUT|DELETE)\s?//i;
$pattern = do {
# Replace something like /word/:token with /word/(^:([a-z]+))
$pattern =~ s!
(\:([a-z]+))
!
if ( $2 ) {
"([^/]+)"
}
!gex;
"^$pattern\$";
};
return qr/$pattern/;
}
sub run {
my $self = shift;
my $router;
my #params;
foreach my $key ( keys %{ $self->{routes} } ) {
my $route = $self->{routes}->{$key};
if ( $self->{method} eq $route->{method}
&& $self->{destination} =~ $route->{pattern} )
{
#params =
$self->{destination} =~ $route->{pattern}; # Not fully working yet
$router = $route;
}
}
return $router->{handler}->(#params);
}
1;
test-router.pl:
Things changed:
The BEGIN block was doing setup that you ought not do for a test script. I.e. randomising the flow.. so I ditched that off
Added the environment variables for each test case
Code:
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use Test::More tests => 4;
use CGI::Router;
my $router = CGI::Router->new;
my $resp;
## 1. test ##
$ENV{'REQUEST_URI'} = '/';
$ENV{'REQUEST_METHOD'} = 'GET';
$resp = $router->connect('GET /', sub {
# print Dumper #_;
return 'Hello 1';
});
print Dumper $resp;
ok( $resp eq 'Hello 1' );
## 2. test ##
$ENV{'REQUEST_URI'} = '/hello';
$ENV{'REQUEST_METHOD'} = 'GET';
$resp = $router->connect('GET /hello', sub {
# print Dumper #_;
return 'Hello 2';
});
print Dumper $resp;
ok( $resp eq 'Hello 2' );
## 3. test ##
$ENV{'REQUEST_URI'} = '/hello/kitty';
$ENV{'REQUEST_METHOD'} = 'GET';
$resp = $router->connect('GET /hello/:who', sub {
# print Dumper #_;
return 'Hello 3';
});
print Dumper $resp;
ok( $resp eq 'Hello 3' );
## 4. test ##
$ENV{'REQUEST_URI'} = '/hello/kitty/kat';
$ENV{'REQUEST_METHOD'} = 'GET';
$resp = $router->connect('GET /hello/:who/:what', sub {
# print Dumper #_;
return 'Hello kitty kat';
});
print Dumper $resp;
ok( $resp eq 'Hello kitty kat' );

Related

Unable to retrieve multiple column values from file in Perl

I have a file with following contents:
TIME
DATE TIME DAY
191227 055526 FRI
RC DEV SERVER
RC1 SERVER1
RC2 SERVER2
RC3 SERVER3
END
I am fetching argument values from this file, say if I pass DATE as an argument to the script I am getting corresponding value of the DATE. i.e., 191227
When I pass multiple arguments say DATE, DAY I should get values:
DATE=191227
DAY=FRI
But what I am getting here is:
DATE=191227
DAY=NULL
And if I pass RC as an argument I should get:
RC=RC1,RC2,RC3
The script looks below:
#!/usr/bin/perl
use strict;
use Data::Dumper;
print Dumper(\#ARGV);
foreach my $name(#ARGV){
print "NAME:$name\n";
my ($result, $start, $stop, $width) = "";
while(my $head = <STDIN>)
{
if( $head =~ (m/\b$name\b/g))
{
$start = (pos $head) - length($name);
$stop = (pos $head);
my $line = <STDIN>;
pos $head = $stop+1;
$head =~ (m/\b/g);
$width = (pos $head) - $start;
$result = substr($line,$start,$width);
}
}
$result =~ s/^\s*(.*?)\s*$/$1/;
print "$name=";
$result = "NULL" if ( $result eq "" );
print "$result\n";
}
Can someone please help me to get values if I pass multiple arguments also if suppose argument value have data in multiple lines it should be printed comma separated values (ex: for RC, RC=RC1,RC2,RC3).
Here is an example, assuming the input file is named file.txt and the values are starting at the same horizontal position as the keys:
package Main;
use feature qw(say);
use strict;
use warnings;
use Data::Dumper qw(Dumper);
my $self = Main->new(fn => 'file.txt', params => [#ARGV]);
$self->read_file();
$self->print_values();
sub read_file {
my ( $self ) = #_;
my $fn = $self->{fn};
open ( my $fh, '<', $fn ) or die "Could not open file '$fn': $!";
local $/ = ""; #Paragraph mode
my #blocks = <$fh>;
close $fh;
$self->{values} = {};
for my $block (#blocks) {
$self->parse_block( $block );
}
}
sub parse_block {
my ( $self, $block ) = #_;
my #lines = split /\n/, $block;
my $header = shift #lines;
my ($keys, $startpos) = $self->get_block_keys( $header );
for my $line ( #lines ) {
for my $key (#$keys) {
my $startpos = $startpos->{$key};
my $str = substr $line, $startpos;
my ( $value ) = $str =~ /^(\S+)/;
if ( defined $value ) {
push #{$self->{values}{$key}}, $value;
}
}
}
}
sub get_block_keys {
my ( $self, $header ) = #_;
my $values = $self->{values};
my #keys;
my %spos;
while ($header =~ /(\S+)/g) {
my $key = $1;
my $startpos = $-[1];
$spos{$key} = $startpos;
push #keys, $key;
}
for my $key (#keys) {
if ( !(exists $values->{$key}) ) {
$values->{$key} = [];
}
}
return (\#keys, \%spos);
}
sub new {
my ( $class, %args ) = #_;
return bless \%args, $class;
}
sub print_values {
my ( $self ) = #_;
my $values = $self->{values};
for my $key (#{$self->{params}}) {
my $value = "<NO VALUE FOUND>";
if ( exists $values->{$key} ) {
$value = join ",", #{$values->{$key}};
}
say "$key=$value";
}
}
Edit
If you want to read the file from STDIN instead, change the following part of the code:
# [...]
my $self = Main->new(params => [#ARGV]);
$self->read_file();
$self->print_values();
sub read_file {
my ( $self ) = #_;
local $/ = ""; #Paragraph mode
my #blocks = <STDIN>;
$self->{values} = {};
for my $block (#blocks) {
$self->parse_block( $block );
}
}
# [...]

Retrieve online data and generate an xml file

Whenever I run the following Perl script I got the errors below
Use of uninitialized value $date in concatenation (.) or string at D:\sagar\toc\Online_TOC.pl line 111, <> line 1.
Use of uninitialized value $first_page in concatenation (.) or string at D:\sagar\toc\Online_TOC.pl line 111, <> line 1.
Use of uninitialized value $last_page in concatenation (.) or string at D:\sagar\toc\Online_TOC.pl line 111, <> line 1.
The following code is run at the command prmpt by giving URL
http://ajpheart.physiology.org/content/309/11
It generates the meta_issue11.xml file but does not give proper output.
#!/usr/bin/perl
use warnings;
use strict;
use feature qw{ say };
use HTML::Parser;
use WWW::Mechanize;
my ( $date, $first_page, $last_page, #toc );
sub get_date {
my ( $self, $tag, $attr ) = #_;
if ( 'span' eq $tag
and $attr->{class}
and 'highwire-cite-metadata-date' eq $attr->{class}
and not defined $date )
{
$self->handler( text => \&next_text_to_date, 'self, text' );
}
elsif ( 'span' eq $tag
and $attr->{class}
and 'highwire-cite-metadata-pages' eq $attr->{class} )
{
if ( not defined $first_page ) {
$self->handler( text => \&parse_first_page, 'self, text' );
}
else {
$self->handler( text => \&parse_last_page, 'self, text' );
}
}
elsif ( 'span' eq $tag
and $attr->{class}
and 'highwire-cite-metadata-doi' eq $attr->{class} )
{
$self->handler( text => \&retrieve_doi, 'self, text' );
}
elsif ( 'div' eq $tag
and $attr->{class}
and $attr->{class} =~ /\bissue-toc-section\b/ )
{
$self->handler( text => \&next_text_to_toc, 'self, text' );
}
}
sub next_text_to_date {
my ( $self, $text ) = #_;
$text =~ s/^\s+|\s+$//g;
$date = $text;
$self->handler( text => undef );
}
sub parse_first_page {
my ( $self, $text ) = #_;
if ( $text =~ /([A-Z0-9]+)(?:-[0-9A-Z]+)?/ ) {
$first_page = $1;
$self->handler( text => undef );
}
}
sub parse_last_page {
my ( $self, $text ) = #_;
if ( $text =~ /(?:[A-Z0-9]+-)?([0-9A-Z]+)/ ) {
$last_page = $1;
$self->handler( text => undef );
}
}
sub next_text_to_toc {
my ( $self, $text ) = #_;
push #toc, [$text];
$self->handler( text => undef );
}
sub retrieve_doi {
my ( $self, $text ) = #_;
if ( 'DOI:' ne $text ) {
$text =~ s/^\s+|\s+$//g;
push #{ $toc[-1] }, $text;
$self->handler( text => undef );
}
}
print STDERR 'Enter the URL: ';
chomp( my $url = <> );
my ( $volume, $issue ) = ( split m(/), $url )[ -2, -1 ];
my $p = 'HTML::Parser'->new(
api_version => 3,
start_h => [ \&get_date, 'self, tagname, attr' ],
);
my $mech = 'WWW::Mechanize'->new( agent => 'Mozilla' );
$mech->get( $url );
my $contents = $mech->content;
$p->parse( $contents );
$p->eof;
my $toc;
for my $section ( #toc ) {
$toc .= "<TocSection>\n";
$toc .= "<Heading>" . shift( #$section ) . "</Heading>\n";
$toc .= join q(), map "<DOI>$_</DOI>\n", #$section;
$toc .= "</TocSection>\n";
}
open( F6, ">meta_issue_$issue.xml" );
print F6 <<"__HTML__";
<!DOCTYPE MetaIssue SYSTEM "http://schema.highwire.org/public/toc/MetaIssue.pubids.dtd">
<MetaIssue volume="$volume" issue="$issue">
<Provider>Cadmus</Provider>
<IssueDate>$date</IssueDate>
<PageRange>$first_page-$last_page</PageRange>
<TOC>$toc</TOC>
</MetaIssue>
__HTML__
The primary problem is that you're checking the class string for equality, whereas the required class may be just one of several space-separated class names
But there are a number of other issues, such as using WWW::Mechanize just to fetch a web page when LWP::Simple will do fine. And checking three times for 'span' eq $tag
Here's a working version. I would prefer to see XML::Writer used to create the output XML, but I have kept to using simple print statements, as in your own code
Note that comments like #/ are just there to persuade the Stack Overflow syntax highlighter to colour the text correctly. You should remove them in the live code
#!/usr/bin/perl
use strict;
use warnings 'all';
use LWP::Simple 'get';
use HTML::Parser;
my ( $date, $first_page, $last_page, #toc );
print 'Enter the URL: ';
my $url = <>;
$url ||= 'http://ajpheart.physiology.org/content/309/11';
chomp $url;
my ( $volume, $issue ) = ( split m(/), $url )[ -2, -1 ]; #/
my $p = 'HTML::Parser'->new(
api_version => 3,
start_h => [ \&get_span_div, 'self, tagname, attr' ],
);
my $contents = get($url);
$p->parse( $contents );
$p->eof;
my $toc = '';
for my $section ( #toc ) {
$toc .= "\n";
$toc .= " <TocSection>\n";
$toc .= " <Heading>" . shift( #$section ) . "</Heading>\n";
$toc .= " <DOI>$_</DOI>\n" for #$section;
$toc .= " </TocSection>";
}
open my $out_fh, '>', "meta_issue_$issue.xml" or die $!;
print { $out_fh } <<"__HTML__";
<!DOCTYPE MetaIssue SYSTEM "http://schema.highwire.org/public/toc/MetaIssue.pubids.dtd">
<MetaIssue volume="$volume" issue="$issue">
<Provider>Cadmus</Provider>
<IssueDate>$date</IssueDate>
<PageRange>$first_page-$last_page</PageRange>
<TOC>$toc
</TOC>
</MetaIssue>
__HTML__
#/
sub get_span_div {
my ( $self, $tag, $attr ) = #_;
my $class = $attr->{class};
my %class;
%class = map { $_ => 1 } split ' ', $class if $class;
if ( $tag eq 'span' ) {
if ( $class{'highwire-cite-metadata-date'} ) {
$self->handler( text => \&next_text_to_date, 'self, text' ) unless $date;
}
elsif ( $class{'highwire-cite-metadata-pages'} ) {
if ( not defined $first_page ) {
$self->handler( text => \&parse_first_page, 'self, text' );
}
else {
$self->handler( text => \&parse_last_page, 'self, text' );
}
}
elsif ( $class{'highwire-cite-metadata-doi'} ) {
$self->handler( text => \&retrieve_doi, 'self, text' );
}
}
elsif ( $tag eq 'div' ) {
if ( $class{'issue-toc-section'} ) {
$self->handler( text => \&next_text_to_toc, 'self, text' );
}
}
}
sub next_text_to_date {
my ( $self, $text ) = #_;
($date = $text) =~ s/^\s+|\s+$//g; #/
$self->handler( text => undef );
}
sub parse_first_page {
my ( $self, $text ) = #_;
return unless $text =~ /(\w+)(-\w+)?/; #/
$first_page = $1;
$self->handler( text => undef );
}
sub parse_last_page {
my ( $self, $text ) = #_;
return unless $text =~ /\w+-(\w+)/; #/
$last_page = $1;
$self->handler( text => undef );
}
sub next_text_to_toc {
my ( $self, $text ) = #_;
push #toc, [ $text ];
$self->handler( text => undef );
}
sub retrieve_doi {
my ( $self, $text ) = #_;
return unless $text =~ /\d+/; #/
$text =~ s/^\s+|\s+$//g;
push #{ $toc[-1] }, $text;
$self->handler( text => undef );
}
output
<!DOCTYPE MetaIssue SYSTEM "http://schema.highwire.org/public/toc/MetaIssue.pubids.dtd">
<MetaIssue volume="309" issue="11">
<Provider>Cadmus</Provider>
<IssueDate>December 1, 2015</IssueDate>
<PageRange>H1793-H1996</PageRange>
<TOC>
<TocSection>
<Heading>CALL FOR PAPERS | Cardiovascular Responses to Environmental Stress</Heading>
<DOI>10.1152/ajpheart.00199.2015</DOI>
</TocSection>
<TocSection>
<Heading>CALL FOR PAPERS | Autophagy in the Cardiovascular System</Heading>
<DOI>10.1152/ajpheart.00709.2014</DOI>
</TocSection>
<TocSection>
<Heading>CALL FOR PAPERS | Mechanisms of Diastolic Dysfunction in Cardiovascular Disease</Heading>
<DOI>10.1152/ajpheart.00608.2015</DOI>
</TocSection>
<TocSection>
<Heading>CALL FOR PAPERS | Small Vessels–Big Problems: Novel Insights into Microvascular Mechanisms of Diseases</Heading>
<DOI>10.1152/ajpheart.00463.2015</DOI>
<DOI>10.1152/ajpheart.00691.2015</DOI>
<DOI>10.1152/ajpheart.00568.2015</DOI>
<DOI>10.1152/ajpheart.00653.2015</DOI>
</TocSection>
<TocSection>
<Heading>CALL FOR PAPERS | Exercise Training in Cardiovascular Disease: Mechanisms and Outcomes</Heading>
<DOI>10.1152/ajpheart.00341.2015</DOI>
</TocSection>
<TocSection>
<Heading>CALL FOR PAPERS | Cardiac Regeneration and Repair: Mechanisms and Therapy</Heading>
<DOI>10.1152/ajpheart.00594.2015</DOI>
</TocSection>
<TocSection>
<Heading>Vascular Biology and Microcirculation</Heading>
<DOI>10.1152/ajpheart.00289.2015</DOI>
<DOI>10.1152/ajpheart.00308.2015</DOI>
<DOI>10.1152/ajpheart.00179.2015</DOI>
</TocSection>
<TocSection>
<Heading>Muscle Mechanics and Ventricular Function</Heading>
<DOI>10.1152/ajpheart.00284.2015</DOI>
<DOI>10.1152/ajpheart.00327.2015</DOI>
</TocSection>
<TocSection>
<Heading>Signaling and Stress Response</Heading>
<DOI>10.1152/ajpheart.00050.2015</DOI>
</TocSection>
<TocSection>
<Heading>Cardiac Excitation and Contraction</Heading>
<DOI>10.1152/ajpheart.00055.2015</DOI>
</TocSection>
<TocSection>
<Heading>Integrative Cardiovascular Physiology and Pathophysiology</Heading>
<DOI>10.1152/ajpheart.00316.2015</DOI>
<DOI>10.1152/ajpheart.00721.2014</DOI>
</TocSection>
<TocSection>
<Heading>Corrigendum</Heading>
<DOI>10.1152/ajpheart.H-zh4-1780-corr.2015</DOI>
</TocSection>
</TOC>
</MetaIssue>

Can not send data to IO::Socket::INET in conditional statement

I have an issue communicating with a external system via IO::Socket::Inet.
I try to login and send multiple commands to the system but unfortunately this does'n work if the command print in line 58 is under conditional statement.
The conditional statements in this case is required to handle response data.
package Net::Cli::Cisco;
use 5.006;
use strict;
use warnings FATAL => qw(all);
use IO::Socket::INET;
use Carp;
use Data::Dumper;
$| = 1;
sub new {
my $class = shift;
my %args = #_;
my $self = bless {
_host => $args{host} || carp('No hostname defined'),
_username => $args{username} || carp('No username defined'),
_password => $args{password} || carp('No password defined'),
_logged_in => 0,
}, $class;
return $self;
}
sub connect {
my $self = shift;
my $host = $self->{_host};
my $port = 23;
my $handle = IO::Socket::INET->new(
Proto => "tcp",
PeerAddr => $host,
PeerPort => $port,
Type => SOCK_STREAM,
Timeout => 3
) or die "can't connect to port $port on $host: $!";
my $shc = "\r\n";
$self->{shc} = $shc;
$self->{handle} = $handle;
}
sub getInterface {
my ($self) = #_;
$self->connect;
my #cmd_list = ( "sh clock", "sh ip int brief" );
$self->send_cmd(#cmd_list);
}
sub send_cmd {
my ( $self, #cmd_list ) = #_;
my $handle = $self->{handle};
my $response;
while ( $response = <$handle> ) {
if ( $response =~ m/^Username:/ ) {
print "Conditional statements exec done!\n";
print $handle $self->{_username} . $self->{shc};
}
#print $handle $self->{_username} . $self->{shc};
print $response;
print $handle $self->{_password} . $self->{shc};
print $handle "enable" . $self->{shc};
print $handle $self->{_password} . $self->{shc};
print $handle "term leng 0" . $self->{shc};
foreach my $cmd (#cmd_list) {
print $handle "$cmd" . $self->{shc};
}
print $handle "exit" . $self->{shc};
}
close($handle);
}
1;
my $x = __PACKAGE__->new(
"host" => "1.1.1.1",
"username" => "user",
"password" => "pw"
);
$x->getInterface;
Well, I can't see why my code is wrong.
Note: If I recommend line 61 everything working fine.
Any ideas?
After comment from ikegami please find working subroutine below:
sub send_cmd {
my ( $self, #cmd_list ) = #_;
my $handle = $self->{handle};
my $response;
START: while ( $response = <$handle> ) {
print $response;
if ( $response =~ m/[^Username:|^Password:|\$%#:>]/ ) {
print $handle $self->{_username} . $self->{shc};
print $handle $self->{_password} . $self->{shc};
print $handle "enable" . $self->{shc};
print $handle $self->{_password} . $self->{shc};
print $handle "term leng 0" . $self->{shc};
foreach my $cmd (#cmd_list) {
print $handle "$cmd" . $self->{shc};
}
print $handle "exit" . $self->{shc};
} else {
goto START;
}
}
close($handle);

Parsing with Parse::RecDescent

I'm having trouble getting the parser to correctly return the results I want. Right now I'm just starting off with a basic string to parse, but I eventually want to get to full ACL's. I'm borrowing some code I found online that does this for Cisco ASA, but his scenarios is slight different than mine so I'm not able to use the code.
Eventually I'd like to be able to match some string like below:
permit ip any 1.2.0.0 0.0.255.255
permit ip host 1.2.3.4 1.2.3.4 0.0.0.31
deny ip 138.145.211.0 0.0.0.255 any log-input
etc...
Here is the code:
Parser.pm
package AccessList::Parser;
use 5.008008;
use strict;
use warnings;
use Carp;
use Parse::RecDescent;
our $VERSION = '0.05';
sub new {
my ($class) = #_;
my $self = { PARSER => undef, };
bless $self, $class;
$self->_init();
return $self;
}
sub _init {
my ($self) = #_;
$self->{PARSER} = Parse::RecDescent->new( $self->_grammar() );
}
sub parse {
my ( $self, $string ) = #_;
defined ($string) or confess "blank line received";
my $tree = $self->{PARSER}->acl_action($string);
defined($tree) or confess "unrecognized line\n";
return $tree;
}
sub _grammar {
my ($self) = #_;
my $grammar = q{
<autotree>
acl_action : "permit" | "deny"
acl_protocol :
PROTOCOL EOL
| <error>
PROTOCOL :
/\d+/ | "ah" | "eigrp" | "esp" | "gre" | "icmp" | "icmp6" | "igmp"
| "igrp" | "ip" | "ipinip" | "ipsec" | "nos" | "ospf" | "pcp"
| "pim" | "pptp" | "snp" | "tcp" | "udp"
EOL :
/$/
};
return $grammar;
}
1;
My Test: parse.t
use strict;
use warnings;
use Scalar::Util 'blessed';
use Test::More tests => 2;
use AccessList::Parser;
my $parser = AccessList::Parser->new();
ok( defined($parser), "constructor" );
my $string;
my $tree;
my $actual;
my $expected;
#
# Access list 1
#
$string = q{permit ip};
$tree = $parser->parse($string);
$actual = visit($tree);
$expected = {
'acl_action' => 'permit',
'acl_protocol' => 'ip',
};
is_deeply($actual, $expected, "whatever");
#
# Finished tests
#
sub visit {
my ($node) = #_;
my $Rule_To_Key_Map = {
"acl_action" => 1,
"acl_protocol" => 1
};
my $parent_key;
my $result;
# set s of explored vertices
my %seen;
#stack is all neighbors of s
my #stack;
push #stack, [ $node, $parent_key ];
my $key;
while (#stack) {
my $rec = pop #stack;
$node = $rec->[0];
$parent_key = $rec->[1]; #undef for root
next if ( $seen{$node}++ );
my $rule_id = ref($node);
if ( exists( $Rule_To_Key_Map->{$rule_id} ) ) {
$parent_key = $rule_id;
}
foreach my $key ( keys %$node ) {
next if ( $key eq "EOL" );
my $next = $node->{$key};
if ( blessed($next) ) {
if ( exists( $next->{__VALUE__} ) ) {
#print ref($node), " ", ref($next), " ", $next->{__VALUE__},"\n";
my $rule = ref($node);
my $token = $next->{__VALUE__};
$result->{$parent_key} = $token;
#print $rule, " ", $result->{$rule}, "\n";
}
push #stack, [ $next, $parent_key ];
#push #stack, $next;
}
}
}
return $result;
}
You forgot to include a question in your question, but it looks like your problem is that you're calling acl_action as the root rule of your parse, but acl_action only matches the terminals accept or deny. You want to write a rule that matches an entire line of input, and call that rule instead.

Why can't I fetch www.google.com with Perl's LWP::Simple?

I cant seem to get this peice of code to work:
$self->{_current_page} = $href;
my $response = $ua->get($href);
my $responseCode = $response->code;
if( $responseCode ne "404" ) {
my $content = LWP::Simple->get($href);
die "get failed: " . $href if (!defined $content);
}
Will return error: get failed: http://www.google.com
The full code is as follows:
#!/usr/bin/perl
use strict;
use URI;
use URI::http;
use File::Basename;
use DBI;
use LWP::Simple;
require LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->max_redirect(0);
package Crawler;
sub new {
my $class = shift;
my $self = {
_url => shift,
_max_link => 0,
_local => 1
};
bless $self, $class;
return $self;
}
sub trim{
my( $self, $string ) = #_;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
sub process_image {
my ($self, $process_image) = #_;
$self->{_process_image} = $process_image;
}
sub local {
my ($self, $local) = #_;
$self->{_local} = $local;
}
sub max_link {
my ($self, $max_link) = #_;
$self->{_max_link} = $max_link;
}
sub x_more {
my ($self, $x_more) = #_;
$self->{_x_more} = $x_more;
}
sub resolve_href {
my ($base, $href) = #_;
my $uri = URI->new($href);
return $uri->rel($base);
}
sub write {
my ( $self, $ref, $data ) = #_;
open FILE, '>c:/perlscripts/' . $ref . '_' . $self->{_process_image} . '.txt';
foreach( $data ) {
print FILE $self->trim($_) . "\n";
}
close( FILE );
}
sub scrape {
my #m_error_array;
my #m_href_array;
my #href_array;
my ( $self, $DBhost, $DBuser, $DBpass, $DBname ) = #_;
my ($dbh, $query, $result, $array);
my $DNS = "dbi:mysql:$DBname:$DBhost:3306";
$dbh = DBI->connect($DNS, $DBuser, $DBpass ) or die $DBI::errstr;
if( defined( $self->{_process_image} ) && ( -e 'c:/perlscripts/href_w_' . $self->{_process_image} . ".txt" ) ) {
open ERROR_W, "<c:/perlscripts/error_w_" . $self->{_process_image} . ".txt";
open M_HREF_W, "<c:/perlscripts/m_href_w_" . $self->{_process_image} . ".txt";
open HREF_W, "<c:/perlscripts/href_w_" . $self->{_process_image} . ".txt";
#m_error_array = <ERROR_W>;
#m_href_array = <M_HREF_W>;
#href_array = <HREF_W>;
close ( ERROR_W );
close ( M_HREF_W );
close ( HREF_W );
}else{
#href_array = ( $self->{_url} );
}
my $z = 0;
while( #href_array ){
if( defined( $self->{_x_more} ) && $z == $self->{_x_more} ) {
last;
}
if( defined( $self->{_process_image} ) ) {
$self->write( 'm_href_w', #m_href_array );
$self->write( 'href_w', #href_array );
$self->write( 'error_w', #m_error_array );
}
$self->{_link_count} = scalar #m_href_array;
my $href = shift( #href_array );
my $info = URI::http->new($href);
my $host = $info->host;
$host =~ s/^www\.//;
$result = $dbh->prepare("INSERT INTO `". $host ."` (URL) VALUES ('$href')");
if( ! $result->execute() ){
$result = $dbh->prepare("CREATE TABLE `" . $host . "` ( `ID` INT( 255 ) NOT NULL AUTO_INCREMENT , `URL` VARCHAR( 255 ) NOT NULL , PRIMARY KEY ( `ID` )) ENGINE = MYISAM ;");
$result->execute();
}
$self->{_current_page} = $href;
my $response = $ua->get($href);
my $responseCode = $response->code;
if( $responseCode ne "404" ) {
my $content = LWP::Simple->get($href);
die "get failed: " . $href if (!defined $content);
}
#print $responseCode;
}
}
1;
#$query = "SELECT * FROM `actwebdesigns.co.uk` ORDER BY ID DESC";
#$result = $dbh->prepare($query);
#$result->execute();
#while( $array = $result->fetchrow_hashref() ) {
# print $array->{'URL'} . "\n";
#}
EDIT:
Still not working with redirect fixed.
my $redirect_limit = 10;
my $y = 0;
while( 1 && $y le $redirect_limit ) {
my $response = $ua->get($href);
my $responseCode = $response->code;
if( $responseCode == 200 || $responseCode == 301 || $responseCode == 302 ) {
if( $responseCode == 301 || $responseCode == 302 ) {
$href = $response->header('Location');
}else{
last;
}
}else{
push( #m_error_array, $href );
last;
}
$y++;
}
if( $y ne $redirect_limit ) {
if( ! defined( $self->{_url_list} ) ) {
my #url_list = ( $href );
}else{
my #url_list = $self->{_url_list};
push( #url_list, $href );
$self->{_url_list} = #url_list;
}
my $content = LWP::Simple->get($href);
die "get failed: " . $href if (!defined $content);
#$result = $dbh->prepare("INSERT INTO `". $host ."` (URL) VALUES ('$href')");
#if( ! $result->execute() ){
# $result = $dbh->prepare("CREATE TABLE `" . $host . "` ( `ID` INT( 255 ) NOT NULL AUTO_INCREMENT , `URL` VARCHAR( 255 ) NOT NULL , PRIMARY KEY ( `ID` )) ENGINE = MYISAM ;");
# $result->execute();
#}
print "good";
}else{
push( #m_error_array, $href );
}
You should examine the response code to see what's happening (you're already checking for 404s). I get a 302 - a redirect.
For example:
die "get failed ($responseCode): " . $href if (!defined $content);
Resulting message:
get failed (302): http://www.google.com at goog.pl line 20.
A couple of thoughts.
1/ You seems to be using the string comparison operators (le, ne) to compare numbers. You should use the numeric comparison operators (<=, !=) instead.
2/ The value you get back from the LWP::UserAgent::get call is an HTTP::Response object. Judicious use of that class's "is_foo" method might make your code a bit cleaner.
I don't know if either of these will solve your problem. But they'll improve the quality of your code.
Here's your problem:
my $content = LWP::Simple->get($href);
That passes the string "LWP::Simple" as the first argument to 'get'. You want:
my $content = LWP::Simple::get($href);
Check your SELinux settings.
SELINUX enabled systems will not allow an outgoing connection from a web agent (httpd).
This page can tell you more about SELinux and HTTPD settings:
http://wiki.centos.org/TipsAndTricks/SelinuxBooleans
Enable outbound web connections from Apache in a Perl script:
# setsebool -P httpd_can_network_connect on