Perl - LWP UserAgent Bad URL Error Message - perl

How can I set $PageData to a error message like "$PageData=Page not found" if $URL is non extant or not responding? I cant seem to get it to work.
use LWP::UserAgent;
use HTTP::Request;
my $URL = "http://www.MyDomain.com/;
my $Request = HTTP::Request->new("GET", $URL);
my $UserAgent = LWP::UserAgent->new(timeout => 5);
my $Result = $UserAgent->request($Request);
return unless $Result->is_success;
$PageData = $Result->content;
print $PageData;

Try use library HTTP::Status with the following code:
use ...
use HTTP::Status qw(:constants :is status_message);
my $url = ...
my $request = HTTP::Request->new(GET => $url);
my $ua = LWP::UserAgent->new;
my $response = $ua->request($request);
if ($response->is_error) {
print STDERR $response->status_line, "\n";
print STDERR status_message($response->status_line), "\n";
print STDERR $response->error_as_HTML, "\n";
}
else {
print $response->decoded_content;
}

Related

get request in perl and Use of uninitialized value

my $url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=journal+of+medical+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]&usehistory=y";
print "\n before url \n";
print $url;
#post the esearch URL
my $output = get($url);
print $output;
I have not used perl ever before.
If I hit this URL in browser, I do get the XML.
However, From what I see in output from script, $output is empty and
print $output;
returns
Use of uninitialized value in print at ./extractEmails.pl line 48.
Please suggest what's wrong and how to fix it
Edit:
As suggested, complete code:
#!/usr/bin/perl -w
# A perlscript written by Joseph Hughes, University of Glasgow
# use this perl script to parse the email addressed from the affiliations in PubMed
use strict;
use LWP::Simple;
my ($query,#queries);
#Query the Journal of Virology from 2014 until the present (use 3000)
$query = 'journal+of+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Journal of General Virology
$query = 'journal+of+general+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Virology
$query = 'virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Archives of Virology
$query = 'archives+of+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Virus Research
$query = 'virus+research[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Antiviral Research
$query = 'antiviral+research[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Viruses
$query = 'viruses[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Journal of Medical Virology
$query = 'journal+of+medical+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
# global variables
push(#queries,$query);
my %emails;
my $emailcnt=0;
my $count=1;
#assemble the esearch URL
foreach my $query (#queries){
my $base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
#my $url = $base . "esearch.fcgi?db=pubmed&term=$query&usehistory=y";
my $url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=journal+of+medical+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]&usehistory=y";
print "\n before url \n";
print $url;
#post the esearch URL
my $output = get($url);
print "\n before output \n";
print get($url);
print $output;
#parse WebEnv, QueryKey and Count (# records retrieved)
my $web = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);
my $key = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);
my $count = $1 if ($output =~ /<Count>(\d+)<\/Count>/);
#retrieve data in batches of 500
my $retmax = 500;
for (my $retstart = 0; $retstart < $count; $retstart += $retmax) {
my $efetch_url = $base ."efetch.fcgi?db=pubmed&WebEnv=$web";
$efetch_url .= "&query_key=$key&retmode=xml";
my $efetch_out = get($efetch_url);
my #matches = $efetch_out =~ m(<Affiliation>(.*)</Affiliation>)g;
#print "$_\n" for #matches;
for my $match (#matches){
if ($match=~/\s([a-zA-Z0-9\.\_\-]+\#[a-zA-Z0-9\.\_\-]+)$/){
my $email=$1;
$email=~s/\.$//;
$emails{$email}++;
}
}
}
my $cnt= keys %emails;
print "$query\n$cnt\n";
}
print "Total number of emails: ";
my $cnt= keys %emails;
print "$cnt\n";
my #email = keys %emails;
my #VAR;
push #VAR, [ splice #email, 0, 100 ] while #email;
my $batch=100;
foreach my $VAR (#VAR){
open(OUT, ">Set_$batch\.txt") || die "Can't open file!\n";
print OUT join(",",#$VAR);
close OUT;
$batch=$batch+100;
}
I recommend against using LWP::Simple for any reason because it is impossible to configure it or handle errors usefully. Using LWP::UserAgent which it wraps is nearly as simple anyway (though the error handling is a bit complicated). The below examples would replace the use LWP::Simple; and my $output = get($url); lines.
use strict;
use warnings;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new(timeout => 30);
my $response = $ua->get($url);
unless ($response->is_success) {
# the Client-Warning, Client-Aborted, and X-Died headers each may be set on client/transport errors
die $response->status_line;
}
my $output = $response->decoded_content;
The core HTTP::Tiny is also simple.
use strict;
use warnings;
use HTTP::Tiny;
my $ua = HTTP::Tiny->new;
my $response = $ua->get($url);
unless ($response->{success}) {
die $response->{status} == 599 ? $response->{content} : "$response->{status} $response->{reason}";
}
my $output = $response->{content};
If you really want an LWP::Simple approach that will at least report transport errors, try ojo from Mojolicious:
perl -Mojo -E'say g(shift)->text' http://example.com
In a script rather than a oneliner, you can use Mojo::UserAgent directly, and also handle HTTP errors like above:
use strict;
use warnings;
use Mojo::UserAgent;
my $ua = Mojo::UserAgent->new;
my $response = $ua->get($url)->result;
unless ($response->is_success) {
die $response->code . ' ' . $response->message;
}
my $output = $response->text;

HTTP request with Perl to a utf-8 URI (some non-ascii chars inside) throws a 404 Not Found error

I'm trying to request an URL which has some characters that are non-ASCII, for example: http://perry.wikia.com/wiki/Página_principal which has an á symbol.
I've tried with LWP::UserAgent but it throws a 404 Not found error:
#!/usr/bin/perl
use utf8;
use LWP::UserAgent;
use Encode qw(decode encode);
my $br = LWP::UserAgent->new;
#~ my $url = 'http://perry.wikia.com/wiki/Página_principal'; # doesn't work either
my $url = encode('UTF-8','http://perry.wikia.com/wiki/Página_principal');
my $response = $br->get($url);
if ($response->{success}) {
my $html = $response->{content};
} else {
die "Unexpected error requesting $url : " . $response->status_line;
}
I've tried with HTTP::Tiny too, same result:
#!/usr/bin/perl
use utf8;
use HTTP::Tiny;
use Encode qw(decode encode);
my $url = 'http://perry.wikia.com/wiki/Página_principal';
#~ my $url = encode('UTF-8','http://perry.wikia.com/wiki/Página_principal'); # doesn't work either
my $response = HTTP::Tiny->new->get($url);
if ($response->{success}) {
my $html = $response->{content};
} else {
die "Unexpected error requesting $url : " . $response->{status};
}
This is not a bug in any of the Perl modules. This URL actually does return a 404.

Error Cisco Prime HTTP GET request

I'm trying to make an HTTP GET request with Cisco Prime:
#!/opt/local/bin/perl -w
use strict;
use JSON-support_by_pp;
use LWP 5.64;
use LWP::UserAgent;
$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;
my $ua = LWP::UserAgent->new;
my $BASE_URL = 'https://Host_name/webacs/api/v1/';
my $UN = "Username";
my $PW = "Password";
sub fetch ($) {
my ( $url ) = #_;
my $req = HTTP::Request->new( GET => $BASE_URL . $url );
$req->authorization_basic( $UN, $PW );
return $ua->request( $req )->content or die( "Cannot read from " . $BASE_URL . $url );
}
my $content = fetch( 'data/AccessPoints.json?.full=true' );
my $json = new JSON;
# these are some nice json options to relax restrictions a bit:
my $json_text =
$json->allow_nonref->utf8->relaxed->escape_slash->loose->allow_singlequote->allow_barekey->decode( $content );
foreach my $ap ( #{ $json_text->{queryResponse}->{'entity'} } ) {
print "------------------------\nAccess Point " . $ap->{'accessPointsDTO'}->{'#id'} . "\n";
print "Model:" . $ap->{'accessPointsDTO'}->{'model'} . "\n";
print "MAC Address:" . $ap->{'accessPointsDTO'}->{'macAddress'} . "\n";
print "Serial Number:" . $ap->{'accessPointsDTO'}->{'serialNumber'} . "\n";
print "Software Version:" . $ap->{'accessPointsDTO'}->{'softwareVersion'} . "\n";
print "Status:" . $ap->{'accessPointsDTO'}->{'status'} . "\n";
print "Location:" . $ap->{'accessPointsDTO'}->{'location'} . "\n";
What do I do wrong? I have already tried with curl in shell and it works:
curl --tlsv1 --user USER:PASSWORD--insecure https://Host_name/webacs/api/v1/data/AccessPoints.json?.full=true
but my Perl script doesn't work.
I have this error:
malformed JSON string, neither array, object, number, string or atom, at character offset 0 (before "Can't connect to 10....") at ersteProbe.pl line 28.
Fix already. Thank you Borodin :)
New question:
I need authentication for Cisco Prime.
Code works already, but authentication doesn't work.
I have with error
500 Can't connect to 10.10.10.10:443 (certificate verify failed) at ersteProbeAuth.pl line 27.
Line 27:
die $res->status_line unless $res->is_success;
I'm rather new in Perl und cann't fix this myself. If you have Idee, I'll be happy :)
#!/opt/local/bin/perl -w
use strict;
use warnings;
use JSON -support_by_pp;
use LWP 5.64;
use LWP::UserAgent;
use MIME::Base64;
use REST::Client;
$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;
my $ua = LWP::UserAgent->new;
my $BASE_URL = 'https://10.10.10.10/webacs/api/v1/';
my $UN='admin';
my $PW='admin';
# coding with Base 64
my $sys_id='Balalalalalal';
my $encoded_auth = encode_base64("$UN:$PW", '');
sub fetch {
my ($url) = #_;
my $res = $ua->get($BASE_URL . $url,
{'Authorization' => "Basic $encoded_auth",
'Accept' => 'application/json'});
die $res->status_line unless $res->is_success;
my $json = $res->decoded_content;
return $json
}
my $content = fetch('data/AccessPoints.json?.full=true/$sys_id');
my $json = new JSON;
# these are some nice json options to relax restrictions a bit: my$json_text=$json->allow_nonref->utf8->relaxed->escape_slash->loose->allow_singlequote->allow_barekey->decode($content);
foreach my $ap (#{$json_text->{queryResponse}->{'entity'}}){
print "------------------------\nAccess Point ".$ap->{'accessPointsDTO'}->{'#id'}."\n";
print "Model:".$ap->{'accessPointsDTO'}->{'model'}."\n";
print "MAC Address:".$ap->{'accessPointsDTO'}->{'macAddress'}."\n";
print "Serial Number:".$ap->{'accessPointsDTO'}->{'serialNumber'}."\n";
print "Software Version:".$ap->{'accessPointsDTO'}->{'softwareVersion'}."\n";
print "Status:".$ap->{'accessPointsDTO'}->{'status'}."\n";
print "Location:".$ap->{'accessPointsDTO'}->{'location'}."\n";
}
It's hard to tell what's wrong without access to the web page, but almost certainly your request has failed
I suggest you replace your fetch subroutine with this
sub fetch {
my ( $url ) = #_;
my $res = $ua->get( $BASE_URL . $url );
die $res->status_line unless $res->is_success;
my $json = $res->decoded_content;
return $json;
}
Print your raw answer from server in console.
malformed JSON string, neither array, object, number, string or atom, at character offset 0 (before "Can't connect to 10....")
"Can't connect to 10...."
Maybe, your code is not have connect

Perl: WWW:Mechanize, getting Facebook profile image?

#!/usr/bin/perl
#USE DECLARATIONS
use strict;
use warnings;
use WWW::Mechanize;
use Term::ANSIColor;
#VARIABLE DECLARATIONS
my $mech = WWW::Mechanize->new();
my $img;
my $title;
my $pic_page;
my $url;
my $count;
my #links;
#CONNECT TO FACEBOOK
$url = 'https://www.facebook.com/';
$mech = WWW::Mechanize->new();
$mech->agent_alias( 'Linux Mozilla' );
$mech->get( $url );
$title = $mech->title();
#LOGIN FORM
print "Connected to Facebook.\n";
print "Logging in...";
$mech->form_id("login_form");
$mech->field("email",'my#email.com');
$mech->field("pass",'mypass');
$mech->click();
print "done!\n";
#NAVIGATE TO USER PAGE
$mech->get("https://www.facebook.com/some.profile1234");
$title = $mech->title();
print "Finding $title 's profile pictue...\n";
#FIND PROFILE PICTURE
$img = $mech->find_image(url_regex => qr/s160x160/, );
print $img->url();
downloadImage($img->url(),$mech->title().".jpg");
sub downloadImage
{
my $local_file_name = $_[1];
my $b = WWW::Mechanize->new;
print "Downloading: $_[1]...";
$b->get( $_[0], ":content_file" => $local_file_name );
print "done!\n";
}
With this code I just want to download profile picture of a given person (#NAVIGATE TO USER PAGE) and save it. However, I get an error saying essentially that no images could be found! WHYYY? (I am using the $mech->find_image(url_regex => qr/s160x160/,) to find the image on the profile page.)
You are using new instance of Mechanize in your downloadImage sub. And this instance is not authorized by Facebook.
Try this:
downloadImage($img->url(),$mech->clone() );
sub downloadImage
{
my $mech = $_[1];
print "Downloading: $_[1]...";
$mech->get( $_[0], ":content_file" => $mech->title() . ".jpg" );
print "done!\n";
}

Perl WWW::Mechanize JSESSION issue

I am having a problem getting/staying logged in with perl mechanize to a website
Looking at the headers, it appears that the JSESSIONID keeps changing. I am using a cookie jar, but I think it's getting overwritten somehow.
#!/usr/bin/perl
use strict;
use warnings;
use WWW::Mechanize;
use HTTP::Cookies;
use Crypt::SSLeay;
use LWP::UserAgent;
use Crypt::SSLeay::CTX;
use Crypt::SSLeay::Conn;
use Crypt::SSLeay::X509;
use LWP::Simple qw(get);
use LWP::Debug;
my $cookie_jar = HTTP::Cookies->new(ignore_discard => 1);
my $agent = WWW::Mechanize->new(cookie_jar => $cookie_jar, noproxy=>0);
$agent->agent_alias('Linux Mozilla');
$ENV{HTTPS_CA_DIR} = 'cert/';
my $user = 'xxxx';
my $pass = 'xxxx';
my $url = '';
print "\n\n=========================================================\nGOING TO LOGIN PAGE:\n";
my $res = $agent->get($url);
for my $key ( $res->header_field_names() ) {
print $key, " : ", $res->header( $key ), "\n";
}
print "cookie: ".$agent->cookie_jar->as_string();
$agent->form_name('loginForm');
$agent->set_fields(
userId => $user,
password => $pass
);
$agent->submit();
print "\n\n=========================================================\nREDIRECT:\n";
my $res = $agent->submit();
for my $key ( $res->header_field_names() ) {
print $key, " : ", $res->header( $key ), "\n";
}
print "cookie: ".$agent->cookie_jar->as_string();
my $cUrl = '';
$cookie_jar->revert;
print "\n\n=========================================================\nGOING TO CAMPAIGN PAGE:\n";
my $res = $agent->get($cUrl);
for my $key ( $res->header_field_names() ) {
print $key, " : ", $res->header( $key ), "\n";
}
print "cookie: ".$agent->cookie_jar->as_string();
I am not sure why this worked, but I was able to resolve this by utilizing LWP::ConnCache
$agent->conn_cache(LWP::ConnCache->new());