I have a perl script that is supposed to use HTTP::Response to grab an XML file and then parse it. The script works great, except when it can't reach the server its trying to get the info from.
I know I have to have an error checking and if an error does exist, use a return to continue on with the loop, I have done with with NET::SNMP and SSH, but I can't seem to get it working for this scenario. I'm about to be pull my hair in frustration. Any help is greatly appreciated.
#! /usr/bin/perl
use LWP::UserAgent;
use HTTP::Request::Common;
use Net::SNMP;
use XML::Simple;
use HTTP::Status;
$ua = LWP::UserAgent->new;
if ( open ( FH, "DeviceList.txt" ) )
{
while ( defined ( my $line = <FH> ) )
{
$line =~ s/\s+$//;
$device = $line;
&checkcon;
}
close FH;
}
else
{
print "DeviceList.txt file not found\n";
}
#exit;
sub checkcon
{
my ($req, $error) = HTTP::Request->new(GET => 'https://'.$device.'/getxml?location=/HelloWorld');
$ua->ssl_opts(SSL_verify_mode => SSL_VERIFY_NONE);
$ua->timeout(10);
$req->authorization_basic('test', 'test');
$test = $ua->request($req)->content;
print $req;
if ($test =~ "parser error") {
print "No Workie\n";
return;
}
#if (!is_success ($req))
#{
#print "No Workie!";
#return;
#}
# create object
my $xml = new XML::Simple;
# read XML file
my $data = $xml->XMLin("$test");
print "Looping";
# access XML data
`echo "$device,$data->{Hello}{World}{content}" >> Test.txt`;
}
exit 0;
Related
my $url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=journal+of+medical+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]&usehistory=y";
print "\n before url \n";
print $url;
#post the esearch URL
my $output = get($url);
print $output;
I have not used perl ever before.
If I hit this URL in browser, I do get the XML.
However, From what I see in output from script, $output is empty and
print $output;
returns
Use of uninitialized value in print at ./extractEmails.pl line 48.
Please suggest what's wrong and how to fix it
Edit:
As suggested, complete code:
#!/usr/bin/perl -w
# A perlscript written by Joseph Hughes, University of Glasgow
# use this perl script to parse the email addressed from the affiliations in PubMed
use strict;
use LWP::Simple;
my ($query,#queries);
#Query the Journal of Virology from 2014 until the present (use 3000)
$query = 'journal+of+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Journal of General Virology
$query = 'journal+of+general+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Virology
$query = 'virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Archives of Virology
$query = 'archives+of+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Virus Research
$query = 'virus+research[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Antiviral Research
$query = 'antiviral+research[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Viruses
$query = 'viruses[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
push(#queries,$query);
#Journal of Medical Virology
$query = 'journal+of+medical+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]';
# global variables
push(#queries,$query);
my %emails;
my $emailcnt=0;
my $count=1;
#assemble the esearch URL
foreach my $query (#queries){
my $base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
#my $url = $base . "esearch.fcgi?db=pubmed&term=$query&usehistory=y";
my $url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=journal+of+medical+virology[journal]+AND+2014[Date+-+Publication]:3000[Date+-+Publication]&usehistory=y";
print "\n before url \n";
print $url;
#post the esearch URL
my $output = get($url);
print "\n before output \n";
print get($url);
print $output;
#parse WebEnv, QueryKey and Count (# records retrieved)
my $web = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);
my $key = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);
my $count = $1 if ($output =~ /<Count>(\d+)<\/Count>/);
#retrieve data in batches of 500
my $retmax = 500;
for (my $retstart = 0; $retstart < $count; $retstart += $retmax) {
my $efetch_url = $base ."efetch.fcgi?db=pubmed&WebEnv=$web";
$efetch_url .= "&query_key=$key&retmode=xml";
my $efetch_out = get($efetch_url);
my #matches = $efetch_out =~ m(<Affiliation>(.*)</Affiliation>)g;
#print "$_\n" for #matches;
for my $match (#matches){
if ($match=~/\s([a-zA-Z0-9\.\_\-]+\#[a-zA-Z0-9\.\_\-]+)$/){
my $email=$1;
$email=~s/\.$//;
$emails{$email}++;
}
}
}
my $cnt= keys %emails;
print "$query\n$cnt\n";
}
print "Total number of emails: ";
my $cnt= keys %emails;
print "$cnt\n";
my #email = keys %emails;
my #VAR;
push #VAR, [ splice #email, 0, 100 ] while #email;
my $batch=100;
foreach my $VAR (#VAR){
open(OUT, ">Set_$batch\.txt") || die "Can't open file!\n";
print OUT join(",",#$VAR);
close OUT;
$batch=$batch+100;
}
I recommend against using LWP::Simple for any reason because it is impossible to configure it or handle errors usefully. Using LWP::UserAgent which it wraps is nearly as simple anyway (though the error handling is a bit complicated). The below examples would replace the use LWP::Simple; and my $output = get($url); lines.
use strict;
use warnings;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new(timeout => 30);
my $response = $ua->get($url);
unless ($response->is_success) {
# the Client-Warning, Client-Aborted, and X-Died headers each may be set on client/transport errors
die $response->status_line;
}
my $output = $response->decoded_content;
The core HTTP::Tiny is also simple.
use strict;
use warnings;
use HTTP::Tiny;
my $ua = HTTP::Tiny->new;
my $response = $ua->get($url);
unless ($response->{success}) {
die $response->{status} == 599 ? $response->{content} : "$response->{status} $response->{reason}";
}
my $output = $response->{content};
If you really want an LWP::Simple approach that will at least report transport errors, try ojo from Mojolicious:
perl -Mojo -E'say g(shift)->text' http://example.com
In a script rather than a oneliner, you can use Mojo::UserAgent directly, and also handle HTTP errors like above:
use strict;
use warnings;
use Mojo::UserAgent;
my $ua = Mojo::UserAgent->new;
my $response = $ua->get($url)->result;
unless ($response->is_success) {
die $response->code . ' ' . $response->message;
}
my $output = $response->text;
I created a script which access a URL with basic authentication. Once I've passed the credentials, it will download the file in my local folder. The problem is I got an incorrect filename. Here's my sample code:
#!/usr/bin/env perl
use strict;
use warnings;
use WWW::Mechanize;
use HTTP::Cookies;
my $url = "http://sampleurl.com";
my $dir = 'C:\\pl';
my $mech = WWW::Mechanize->new();
$mech->cookie_jar(HTTP::Cookies->new());
$mech ->credentials("sampleurl.com:80", "sampleurl.com", "username", "password");
$mech->get($url);
my $res = $mech->res();
if($res->is_success){
my $filename = $res->filename();
print $filename;
$mech->save_content( $dir.'\\'.$filename, binmode => ':raw', decoded_by_headers => 1 );
print $mech->status;
}else{
print "Error";
}
exit 0;
Instead of downloading sample_url.DOC, it only downloaded sample with no file extension. can you help with my problem? I want to download the whole file.
There's no guarantee that $res->filename(); will produce a file extension or anything at all for that matter. The page you're currently reading doesn't have a filename extension for example.
You will have to guess a filename extension from the media type.
use MIME::Types qw(by_mediatype);
...
my $filename = $r->filename();
if(!$filename) { $filename = 'untitled'; }
if($filename !~ /\.[a-zA-Z0-9]{1,4}$/) {
my $type = $res->header('Content-Type');
my $ext = 'txt';
if($type) {
my #types = by_mediatype($type);
if($#types > -1) {
$ext = $types[0][0];
}
}
$filename .= '.' . $ext;
}
print $filename;
I am trying to automate one of my task where i have to download a last 5 releases of some softwares let say Google talk from http://www.filehippo.com/download_google_talk/.
I have never done such type of programming i mean, to interact with Web through perl .I have just read and came to know that through CGI module we can implement this thing so i tried with this module.
If some body can give me better advice then please you are welcome :)
My code :
#!/usr/bin/perl
use strict;
use warnings;
use CGI;
use CGI::Carp qw/fatalsToBrowser/;
my $path_to_files = 'http://www.filehippo.com/download_google_talk/download/298ba15362f425c3ac48ffbda96a6156';
my $q = CGI->new;
my $file = $q->param('file') or error('Error: No file selected.');
print "$file\n";
if ($file =~ /^(\w+[\w.-]+\.\w+)$/) {
$file = $1;
}
else {
error('Error: Unexpected characters in filename.');
}
if ($file) {
download($file) or error('Error: an unknown error has occured. Try again.');
}
sub download
{
open(DLFILE, '<', "$path_to_files/$file") or return(0);
print $q->header(-type => 'application/x-download',
-attachment => $file,
'Content-length' => -s "$path_to_files/$file",
);
binmode DLFILE;
print while <DLFILE>;
close (DLFILE);
return(1);
}
sub error {
print $q->header(),
$q->start_html(-title=>'Error'),
$q->h1($_[0]),
$q->end_html;
exit(0);
}
In above code i am trying to print the file name which i wan to download but it is displaying error message.I am not able to figure it out why this error "Error: No file selected." is comming.
Sorry, but you are in the wrong track. Your best bet is this module: http://metacpan.org/pod/WWW::Mechanize
This page contain a lot of example to start with: http://metacpan.org/pod/WWW::Mechanize::Examples
It could be more elegant but I think this code easier to understand.
use strict;
use warnings;
my $path_to_files = 'http://www.filehippo.com/download_google_talk/download/298ba15362f425c3ac48ffbda96a6156';
my $mech = WWW::Mechanize->new();
$mech->get( $path_to_files );
$mech->save_content( "download_google_talk.html" );#save the base to see how it looks like
foreach my $link ( $mech->links() ){ #walk all links
print "link: $link\n";
if ($link =~ m!what_you_want!i){ #if it match
my $fname = $link;
$fname =~ s!\A.*/!! if $link =~ m!/!;
$fname .= ".zip"; #add extension
print "Download $link to $fname\n";
$mech->get($link,":content_file" => "$fname" );#download the file and stoore it in a fname.
}
}
Is there any perl module like File::Remote, that works over http (read only)? Something like
$magic_module->open( SCRAPE, "http://somesite.com/");
while(<SCRAPE>)
{
#do something
}
Yes, of course. You can use LWP::Simple:
use LWP::Simple;
my $content = get $url;
Don't forget to check if the content is not empty:
die "Can't download $url" unless defined $content;
$content will be undef it some error occurred during downloading.
Also you can use File::Fetch module:
File::Fetch
->new(uri => 'http://google.com/robots.txt')
->fetch(to => \(my $file));
say($file);
With HTTP::Tiny:
use HTTP::Tiny qw();
my $response = HTTP::Tiny->new->get('http://example.com/');
if ($response->{success}) {
print $response->{content};
}
If you want unified interface to handle both local, remote (HTTP/FTP) and whatever else files, use IO::All module.
use IO::All;
# reading local
my $handle = io("file.txt");
while(defined(my $line = $handle->getline)){
print $line
}
# reading remote
$handle = io("http://google.com");
while(defined(my $line = $handle->getline)){
print $line
}
Hey I'm writing a program that uses an #INC hook to decrypt the real perl source from blowfish. I'm having a quite annoying problem that doesn't show up using warnings or any of my standard tricks... Basically when I get to creating the new cipher object the loop skips to the next object in #INC without an error or anything.... I dont know what to do!
#!/usr/bin/perl -w
use strict;
use Crypt::CBC;
use File::Spec;
sub load_crypt {
my ($self, $filename) = #_;
print "Key?\n: ";
chomp(my $key = <STDIN>);
for my $prefix (#INC) {
my $buffer = undef;
my $cipher = Crypt::CBC->new( -key => $key, -cipher => 'Blowfish');
my $derp = undef;
$cipher ->start('decrypting');
open my $fh, '<', File::Spec->($prefix, "$filename.nc") or next;
while (read($fh,$buffer,1024)) {
$derp .= $cipher->crypt($buffer);
}
$derp .= $cipher->finish;
return ($fh, $derp);
}
}
BEGIN {
unshift #INC, \&load_crypt;
}
require 'gold.pl';
Also if I put the actual key in the initializing method it still fails
You've got a bunch of problems here. First of all, you're using File::Spec wrong. Second, you're returning a filehandle that's already at end of file, and a string that isn't a valid return value. (Also, I'd put the key prompt outside of the hook.)
#!/usr/bin/perl -w
use strict;
use Crypt::CBC;
use File::Spec;
# Only read the key once:
print "Key?\n: ";
chomp(my $key = <STDIN>);
sub load_crypt {
my ($self, $filename) = #_;
return unless $filename =~ /\.pl$/;
for my $prefix (#INC) {
next if ref $prefix;
#no autodie 'open'; # VERY IMPORTANT if you use autodie!
open(my $fh, '<:raw', File::Spec->catfile($prefix, "$filename.nc"))
or next;
my $buffer;
my $cipher = Crypt::CBC->new( -key => $key, -cipher => 'Blowfish');
my $derp;
$cipher->start('decrypting');
while (read($fh,$buffer,1024)) {
$derp .= $cipher->crypt($buffer);
}
$derp .= $cipher->finish;
# Subroutine writes 1 line of code into $_ and returns 1 (false at EOF):
return sub { $derp =~ /\G(.*\n?)/g and ($_ = $1, 1) };
}
return; # Didn't find the file; try next #INC entry
} # end load_crypt
# This doesn't need a BEGIN block, because we're only using the hook
# with require, and that's a runtime operation:
unshift #INC, \&load_crypt;
require 'gold.pl';