I am trying to gather data from a website. Some anti-patterns make looking finding the right form objects difficult but I have this solved. I am using a post method to get around some javascript acting as a wrapper to submit the form. My problem seems to be in getting the results from the mechanize->post method.
Here's a shortened version of my code.
use strict;
use warnings;
use HTML::Tree;
use LWP::Simple;
use WWW::Mechanize;
use HTTP::Request::Common;
use Data::Dumper;
$| = 1;
my $site_url = "http://someURL";
my $mech = WWW::Mechanize->new( autocheck => 1 );
foreach my $number (#numbers)
{
my $content = get($site_url);
$mech->get ($site_url);
my $tree = HTML::Tree->new();
$tree->parse($content);
my ($title) = $tree->look_down( '_tag' , 'a' );
my $atag = "";
my $atag1 = "";
foreach $atag ( $tree->look_down( _tag => q{a}, 'class' => 'button', 'title' => 'SEARCH' ) )
{
print "Tag is ", $atag->attr('id'), "\n";
$atag1 = Dumper $atag->attr('id');
}
# Enter permit number in "Number" search field
my #forms = $mech->forms;
my #fields = ();
foreach my $form (#forms)
{
#fields = $form->param;
}
my ($name, $fnumber) = $fields[2];
print "field name and number is $name\n";
$mech->field( $name, $number, $fnumber );
print "field $name populated with search data $number\n" if $mech->success();
$mech->post($site_url ,
[
'$atag1' => $number,
'internal.wdk.wdkCommand' => $atag1,
]) ;
print $mech->content; # I think this is where the problem is.
}
The data I get from my final print statement is the data from teh original URL not the page the POST command should take me to. What have I done wrong?
Many Thanks
Update
I don't have Firefox installed so I'm avoiding WWW::Mechanize::Firefox intentionally.
Turns out I was excluding some required hidden fields from my POST command.
Related
'm trying to execute this code but it's already give me the following error :
Use of uninitialized value $site in string at C:\Users\USER\Desktop\script.pl line 35.
#!/usr/bin/perl
use LWP::UserAgent;
use File::Slurp;
use warnings;
use strict;
use HTTP::Request;
open (THETARGET, "<list.txt") || die "[-] Can't open the file";
my #TARGETS = <THETARGET>;
close THETARGET;
my $link=$#TARGETS + 1;
OUTER: foreach my $site(#TARGETS){
chomp($site);
}
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
my $url = my $site;
my $picture = "teest.png";
my %args;
my $field_name = "file[]";
my $buf ;
my $buf_ref = $args{'buf'} || \$buf ;
my $value = read_file( $picture , binmode => ':raw' , scalar_ref => 1 );
my $response = $ua->post( $url,
Content_Type => 'form-data',
Content => [ $field_name => ["$picture"] ]
);
print "$site";
I think what you are expecting is the value of $site to carry over from the for loop, but because it is defined in the for loop, its scope is limited to that loop. Perhaps what you intended is for all of the following code to also be inside that loop. Otherwise, that value for $site ceases to exist at the closing-brace two lines after it was created. Your line
my $url = my $site;
then creates a new variable $site (which is undef), and that undef value is used to set $url, so it is also undef. I suspect you initially had
my $url = $site;
then added the extra "my" to resolve an earlier error.
I hope this helps you.
My question is very simple.. It is how to get session id from cookie jar ... I have tried below code :-
use warnings;
use HTTP::Cookies;
use HTTP::Request::Common;
use LWP::UserAgent;
$ua = new LWP::UserAgent;
if ( !$ua ) {
print "Can not get the page :UserAgent fialed \n";
return 0;
}
my $cookies = new HTTP::Cookies( file => './cookies.dat', autosave => 1 );
$ua->cookie_jar($cookies);
# push does all magic to exrtact cookies and add to header for further reqs. useragent should be newer
push #{ $ua->requests_redirectable }, 'POST';
$result = $ua->request(
POST "URL",
{ Username => 'admin',
Password => 'admin',
Submit => 'Submit',
}
);
my $session_id = $cookies->extract_cookies($result);
print $session_id->content;
print "\n\n";
$resp = $result->content;
#print "Result is \n\n\n $resp \n";
$anotherURI = URL;
$requestObject = HTTP::Request::Common::GET $anotherURI;
$result = $ua->request($requestObject);
$resp = $result->content;
#print $resp."\n";
I am not getting where the session id is stored and how to fetch it ?
Note:- URL contains the URL of the page.
I wrote HTTP::CookieMonster to make this kind of thing a bit easier. If you don't know which cookie you're looking for, you can do something like this:
use strict;
use warnings;
use HTTP::CookieMonster;
use WWW::Mechanize;
my $mech = WWW::Mechanize->new;
my $monster = HTTP::CookieMonster->new( $mech->cookie_jar );
my $url = 'http://www.nytimes.com';
$mech->get( $url );
my #all_cookies = $monster->all_cookies;
foreach my $cookie ( #all_cookies ) {
printf( "key: %s value: %s\n", $cookie->key, $cookie->val);
}
If you already know the cookie's key, you can something like:
my $cookie = $monster->get_cookie('RMID');
my $session_id = $cookie->val;
Have a look at HTTP::Cookies->scan.
Something like this should do the trick (should add a constraint on the domain at least):
my $session_id;
$cookie_jar->scan(
sub {
my ($key, $val, $path, $domain, $port,
$path_spec, $secure, $expires, $discard, $hash
) = #_;
if ( $key eq "session_id" ) {
$session_id = $val;
}
}
);
#!/usr/bin/perl -w
use WWW::LinkedIn;
use CGI; # load CGI routines
use CGI::Session;
$q = CGI->new; # create new CGI object
print $q->header, # create the HTTP header
$q->start_html('hello world'), # start the HTML
$q->h1('hello world'), # level 1 header
$q->end_html; # end the HTML
my $consumer_key = 'xxxxxxx';
my $consumer_secret = 'xxxxxxxxx';
my $li = WWW::LinkedIn->new(
consumer_key => $consumer_key,
consumer_secret => $consumer_secret,
);
if ( length( $ENV{'QUERY_STRING'} ) > 0 ) {
$buffer = $ENV{'QUERY_STRING'};
#pairs = split( /&/, $buffer );
foreach $pair (#pairs) {
( $name, $value ) = split( /=/, $pair );
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$in{$name} = $value;
}
$sid = $q->cookie('CGISESSID') || $q->param('CGISESSID') || undef;
$session = new CGI::Session( undef, $sid, { Directory => '/tmp' } );
my $access_token = $li->get_access_token(
verifier => $in{'oauth_verifier'},
request_token => $session->param("request_token"),
request_token_secret => $session->param("request_token_secret"),
);
undef($session);
my $profile_xml = $li->request(
request_url =>
'http://api.linkedin.com/v1/people/~:(id,first-name,last-name,positions,industry,distance)',
access_token => $access_token->{token},
access_token_secret => $access_token->{secret},
);
print $profile_xml;
}
The output is printing in single line. I want to print that is separate line.
OUTPUT
aAVGFD34 jj DD 456456 2003 6 true ara systems Technology and Services Technology and Services 0
How can i get the each column value from the profile_xml variable?
id avsdff
first name jj
lastname dd
Simply use Data::Dumper and XML::Simple.
use Data::Dumper;
use XML::Simple; #you may want to install a specific package from your distribution
{...}
my $hash_ref = SimpeXML::XMLin($profile_xml);
print Dumper($hash_ref);
I do not know if you would like more beautifully output.
try just to make simple print out from your hash reference
foreach $key (keys %{$profile_xml}) {
print "$key $profile_xml->{$key}\n";
}
Here i am going the show the way to parse the data and print in the individual lines.
my $parser = XML::Parser->new( Style => 'Tree' );
my $tree = $parser->parse( $profile_xml );
#print Dumper( $tree ); you can use this see the data displayed in the tree formatted
my $UID = $tree->[1]->[4]->[2],"\n";
print "User ID:$UID";
print"</br>";
my $FirstName = $tree->[1]->[8]->[2],"\n";
print "First Name:$FirstName";
print"</br>";
For sample i have showed for UID and FirstName. And this is working fine.
I'm using the AnyEvent::Twitter::Stream module to grab tweets. Ultimately I'm trying to print the tweets to a file but I'm unable (I think) to get the tweet as a JSON object. My code is as follows:
#!/Applications/XAMPP/xamppfiles/bin/perl
use AnyEvent::Twitter::Stream;
my $done = AnyEvent->condvar;
BEGIN {
use Exporter;
our #ISA = qw(Exporter);
our #EXPORT = qw{
&init
};
}
sub print_tweet {
my $tweet = shift;
print $tweet;
}
# receive updates from #following_ids
my $listener = AnyEvent::Twitter::Stream->new(
username => XXXXXX
password => XXXXXX
method => 'sample', # "firehose" for everything, "sample" for sample timeline
decode_json => 1,
on_tweet => sub {
my $tweet = shift;
print_tweet($tweet);
},
on_keepalive => sub {
warn "ping\n";
},
on_delete => sub {
my ($tweet_id, $user_id) = #_; # callback executed when twitter send a delete notification
},
timeout => 45,
);
$done->recv;
Yet when I print out the tweet in the print_tweet subroutine all I get is:
HASH(0x8f0ad0)HASH(0x8f0640)HASH(0x875990)HASH(0x8f0ab0)HASH(0x8e0d80)HASH(0x8f06e0)HASH(0x8f08f0)HASH(0x93ef30)HASH(0x876190)HASH(0x93ee60)HASH(0x8f0610)HASH(0x8f0b00)HASH(0x8e13e0)HASH(0x93ee20)HASH(0x8f0a20)HASH(0x8e1970)HASH(0x8f0900)
I've even tried to print out the tweet assuming it is a hash as follows:
sub print_tweet {
my ($jsonref, $tweet) = #_;
my $tweet = shift;
print %tweet;
}
Yet that produced nothing. It appears that AnyEvent::Twitter::Stream is returning $tweet as an object based on their sample code of:
on_tweet => sub {
my $tweet = shift;
warn "$tweet->{user}{screen_name}: $tweet->{text}\n";
},
And I know I can print out individual objects, but can I get teh raw JSON object? I must be missing something or my 'noob'ness is greater than I thought...
UPDATE
I was able to ALMOST get it by changing print_tweet to the following:
sub print_tweet {
my $tweet = shift;
my $json_output = to_json($tweet);
print $json_output;
}
It prints out MOST of the JSON object but complains about wide characters, which I believe is an issue with the output being utf8 format? I'm unsure how to solve this issue though....
Looks like it's returning a hashref. If you're not sure, you could try doing something like this.
use Data::Dumper;
...
print Dumper $tweet;
That should give you an idea of what's being passed, then you can grab what you want - probably something like this:
print "$tweet->{user}{screen_name}: $tweet->{text}\n";
In print_tweet, you're declaring $tweet twice. First, you assign it the second element of the #_ array, then you redeclare it and assign it the first element of #_, because shift operated on #_ by default.
Of course, if you had use warnings turned on, you would have seen
"my" variable $tweet masks earlier declaration in same scope
That's why you should always use strict; use warnings; at the top of your code.
The strings of output that you're seeing are hash references, the result of printing what's in the first argument to print_tweet (what you initially assign to $json_ref). If you want to print out the value of $tweet, get rid of the line where you clobber it with shift.
Figured it out. Need to use the JSON module and encode. When encoding you MUST use the {utf8 => 1} option to account for the utf8 characters you get form Twitter. Final code is here:
#!/Applications/XAMPP/xamppfiles/bin/perl
use JSON;
use utf8;
use AnyEvent::Twitter::Stream;
my $done = AnyEvent->condvar;
BEGIN {
use Exporter;
our #ISA = qw(Exporter);
our #EXPORT = qw{
&init
};
}
sub print_tweet {
my $tweet = shift;
my $json_output = to_json($tweet, {utf8 => 1});
print $json_output;
print "\n";
}
# receive updates from #following_ids
my $listener = AnyEvent::Twitter::Stream->new(
username => XXXXXXXX
password => XXXXXXXX
method => 'sample', # "firehose" for everything, "sample" for sample timeline
on_tweet => sub {
my $tweet = shift;
print_tweet($tweet);
},
on_keepalive => sub {
warn "ping\n";
},
on_delete => sub {
my ($tweet_id, $user_id) = #_; # callback executed when twitter send a delete notification
},
timeout => 45,
);
$done->recv;
Thanks to the help you guys gave, the DataDumper at least let me verify the format, it just didn't produce the final result.
I'm trying to use the Net::Twitter::Stream Perl module from CPAN to read the stream from sample.json. I believe this is the corect module though they way they crafted it allows one to process the filter stream. I've modified it as such but I must be missing something as I don't get any data in return. I establish a connection but nothing comes back. I'm guessing this should be an easy fix but I'm a touch new to this part of Perl.....
package Net::Twitter::Stream;
use strict;
use warnings;
use IO::Socket;
use MIME::Base64;
use JSON;
use IO::Socket::SSL;
use LibNewsStand qw(%cf);
use utf8;
our $VERSION = '0.27';
1;
=head1 NAME
Using Twitter streaming api.
=head1 SYNOPSIS
use Net::Twitter::Stream;
Net::Twitter::Stream->new ( user => $username, pass => $password,
callback => \&got_tweet,
track => 'perl,tinychat,emacs',
follow => '27712481,14252288,972651' );
sub got_tweet {
my ( $tweet, $json ) = #_; # a hash containing the tweet
# and the original json
print "By: $tweet->{user}{screen_name}\n";
print "Message: $tweet->{text}\n";
}
=head1 DESCRIPTION
The Streaming verson of the Twitter API allows near-realtime access to
various subsets of Twitter public statuses.
The /1/status/filter.json api call can be use to track up to 200 keywords
and to follow 200 users.
HTTP Basic authentication is supported (no OAuth yet) so you will need
a twitter account to connect.
JSON format is only supported. Twitter may depreciate XML.
More details at: http://dev.twitter.com/pages/streaming_api
Options
user, pass: required, twitter account user/password
callback: required, a subroutine called on each received tweet
perl#redmond5.com
#martinredmond
=head1 UPDATES
https fix: iwan standley <iwan#slebog.net>
=cut
sub new {
my $class = shift;
my %args = #_;
die "Usage: Net::Twitter::Stream->new ( user => 'user', pass => 'pass', callback => \&got_tweet_cb )" unless
$args{user} && $args{pass} && $args{callback};
my $self = bless {};
$self->{user} = $args{user};
$self->{pass} = $args{pass};
$self->{got_tweet} = $args{callback};
$self->{connection_closed} = $args{connection_closed_cb} if
$args{connection_closed_cb};
my $content = "follow=$args{follow}" if $args{follow};
$content = "track=$args{track}" if $args{track};
$content = "follow=$args{follow}&track=$args{track}\r\n" if $args{track} && $args{follow};
my $auth = encode_base64 ( "$args{user}:$args{pass}" );
chomp $auth;
my $cl = length $content;
my $req = <<EOF;
GET /1/statuses/sample.json HTTP/1.1\r
Authorization: Basic $auth\r
Host: stream.twitter.com\r
User-Agent: net-twitter-stream/0.1\r
Content-Type: application/x-www-form-urlencoded\r
Content-Length: $cl\r
\r
EOF
my $sock = IO::Socket::INET->new ( PeerAddr => 'stream.twitter.com:https' );
#$sock->print ( "$req$content" );
while ( my $l = $sock->getline ) {
last if $l =~ /^\s*$/;
}
while ( my $l = $sock->getline ) {
next if $l =~ /^\s*$/; # skip empty lines
$l =~ s/[^a-fA-F0-9]//g; # stop hex from compaining about \r
my $jsonlen = hex ( $l );
last if $jsonlen == 0;
eval {
my $json;
my $len = $sock->read ( $json, $jsonlen );
my $o = from_json ( $json );
$self->{got_tweet} ( $o, $json );
};
}
$self->{connection_closed} ( $sock ) if $self->{connection_closed};
}
You don't need to post the source, we can pretty much figure it out. You should try one of the examples, but my advice is to use AnyEvent::Twitter::Stream which comes with a good example that you only have to modify a bit to get it running
sub parse_from_twitter_stream {
my $user = 'XXX';
my $password = 'YYYY';
my $stream = Net::Twitter::Stream->new ( user => $user, pass => $password,
callback => \&got_tweet,
connection_closed_cb => \&connection_closed,
track => SEARCH_TERM);
sub connection_closed {
sleep 1;
warn "Connection to Twitter closed";
parse_from_twitter_stream();#This isn't working for me -- can't get connection to reopen after disconnect
}
sub got_tweet {
my ( $tweet, $json ) = #_; # a hash containing the tweet
#Do stuff here
}
}