Here is my dilemma: I am trying to fill out a web form and get a result back from that form using LWP::UserAgent. Here is an example of my code:
#!/usr/bin/perl -w
use strict;
use LWP;
use HTTP::Request::Common;
use LWP::Debug qw(+);
my $ua = LWP::UserAgent->new(protocols_allowed=>["https"]);
my $req = POST 'https://their.securesite.com/index.php',
[ 'firstName' => 'Me',
'lastName' => 'Testing',
'addressLine1' => '123 Main Street',
'addressLine2' => '',
'city' => 'Anyplace',
'state' => 'MN',
'zipCode' => '55555',
'card' => 'visa',
'cardNumber' => '41111111111111111',
'ccv2' => '123',
'exp_month' => '07',
'exp_year' => '2015',
'shared_key' => 'hellos',
];
my $response = $ua->request($req);
print $response->is_success() . "\n";
print $response->status_line . "\n";
print $response->content . "\n";
When I run this, I get back a 200 OK and a "1" for success, but not the response page from the form. Just the closing tags:
</body>
</html>
Could this possibly be due to the fact that the form page and response page both have the same URL? I am new to LWP, so I am grasping at straws here. It may still be on the clients end, but I want to rule out any issues on my end as well.
Thanks in advance for any help you guys can give - I am Googled out.
If you can use Mojo::UserAgent (part of the Mojolicious suite of tools) the code would look like this. Note that you might need IO::Socket::SSL in order to use HTTPS.
#!/usr/bin/env perl
use strict;
use warnings;
use Mojo::UserAgent;
my $ua = Mojo::UserAgent->new;
my $tx = $ua->post('https://their.securesite.com/index.php', form =>
{ 'firstName' => 'Me',
'lastName' => 'Testing',
'addressLine1' => '123 Main Street',
'addressLine2' => '',
'city' => 'Anyplace',
'state' => 'MN',
'zipCode' => '55555',
'card' => 'visa',
'cardNumber' => '41111111111111111',
'ccv2' => '123',
'exp_month' => '07',
'exp_year' => '2015',
'shared_key' => 'hellos',
});
if ( $tx->success ) {
print $tx->res->body;
# or work with the resulting DOM
# my $dom = $tx->res->dom;
} else {
my ($err, $code) = $tx->error;
print $code ? "$code response: $err\n" : "Connection error: $err\n";
}
The interface is a little different, but it has lots of nice features, including Mojo::DOM integration for parsing the response HTML.
Use $response->decoded_content to get the content without the headers. See HTTP::Message for more information.
#!/usr/bin/perl -w
use strict;
use URI;
use LWP::UserAgent;
use HTTP::Request;
my $url = URI->new('https://their.securesite.com/index.php');
my $ua = LWP::UserAgent->new();
my $request = HTTP::Request->new(
'POST',
$url,
HTTP::Headers->new(
'User-Agent' => "perl ua/ v0.001",
'Accept' => "text/xml, multipart/*, application/soap"
),
[ 'firstName' => 'Me',
'lastName' => 'Testing',
'addressLine1' => '123 Main Street',
'addressLine2' => '',
'city' => 'Anyplace',
'state' => 'MN',
'zipCode' => '55555',
'card' => 'visa',
'cardNumber' => '41111111111111111',
'ccv2' => '123',
'exp_month' => '07',
'exp_year' => '2015',
'shared_key' => 'hellos',
]
) or die "Error initiating Request: $#\n";
my $response = $ua->request( $request );
if ($response->is_success) {
print $response->decoded_content, "\n";
} else {
die $response->status_line;
}
Check the value of $response->as_string
It'll show you full http response with headers
Related
Writing a REST application with perl Dancer2. I set the serializer setting to the format in code.
set serializer => 'JSON';
I wrote a test file to rest the application, but failure in POST.
REST application got KEY but null value.
DBD::Pg::st execute failed: ERROR: null value in column "email" of relation "owners" violates not-null constraint
How to set serialized format content in Plack::Test?
use strict;
use warnings;
use Test::More;
use Test::Deep;
use Plack::Test;
use Plack::Util;
use HTTP::Request::Common;
use JSON::MaybeXS qw(decode_json encode_json);
use Data::Dumper qw(Dumper);
use Storable qw(freeze thaw);
use utf8;
use MyApp;
my %data = (
password => 'A12345678',
email => 'test#test.com'
);
# APP Start
my $app = MyApp->to_app;
my $test = Plack::Test->create($app);
subtest register => sub {
print ">>> Test <<<\n";
my $datas = {
password => $data{password},
email => $data{email},
};
my $serialized_data = freeze($datas);
my $res = $test->request( POST '/api/v1/register', $serialized_data );
print Dumper $res;
};
done_testing();
Dumper $res =>
$VAR1 = bless( {
'_headers' => bless( {
'content-type' => 'application/json',
'server' => 'Perl Dancer2 0.400000',
'content-length' => 454
}, 'HTTP::Headers' ),
'_request' => bless( {
'_headers' => bless( {
'content-length' => 0,
'
12345678
test1#test.comemail
a1234567password' => undef,
'content-type' => 'application/x-www-form-urlencoded',
'::std_case' => {
'
12345678
test1#test.comemail
a1234567password' => '
12345678
Test1#Test.ComEmail
A1234567Password'
}
}, 'HTTP::Headers' ),
I tested this REST API with Postman is fine.
I am trying to update status on facebook using Mechanize.I am able to login using the script but unable to update.I verified the id of form for status update is "u_0_w".
But selecting the form_id method says "There is no form with ID "u_0_w"".
My script is this:
use WWW::Mechanize;
use strict;
use warnings;
use Data::Dumper;
use HTTP::Cookies::Netscape;
my $cookiesfilename='/home/xxx/xxx/cookies.txt';
my $out;
my $mech = WWW::Mechanize->new( cookie_jar => HTTP::Cookies::Netscape->new( file => $cookiesfilename ) );
$mech->get("https://www.facebook.com/login.php");
my $response=$mech->submit_form(
fields => {
email => 'xxxx#xxxx.com',
pass => 'xxxxx',
}
);
#my $array=$mech->forms();
#$mech->get('/home.php');
print Dumper($mech->forms());
#$mech->form_id("u_0_w");
$mech->submit_form(
fields => {
xhpc_message_text=>'Why so serious'
}
);
print $response->status_line;
open($out, ">", "output_page.html") or die "Can't open output_page.html: $!";
print $out $response->decoded_content;
Then I tried to print all the forms on the page using Dumper the output is:
$VAR1 = bless( {
'default_charset' => 'UTF-8',
'enctype' => 'application/x-www-form-urlencoded',
'accept_charset' => 'UNKNOWN',
'action' => bless( do{\(my $o = 'https://www.facebook.com/search/web/direct_search.php')}, 'URI::https' ),
'method' => 'GET',
'attr' => {
'method' => 'get'
},
'inputs' => [
bless( {
'tabindex' => '-1',
'value' => '1',
'class' => '_42ft _42fu _4w98',
'type' => 'submit'
}, 'HTML::Form::SubmitInput' ),
bless( {
'/' => '/',
'autocomplete' => 'off',
'tabindex' => '1',
'name' => 'q',
'aria-label' => 'Search Facebook',
'value_name' => '',
'class' => 'inputtext _586f',
'type' => 'text',
'id' => 'u_0_b',
'role' => 'combobox',
'placeholder' => 'Search Facebook'
}, 'HTML::Form::TextInput' )
]
}, 'HTML::Form' );
It means it is not detecting the status update form it is detecting only Facebook search form.
What may be the problem for mechanize not detecting all the form elements?
The form contains <button type="submit">. Do Mechanize support it?
Why do you have to use Mechanize for this? There's already a module available for this on CPAN.
Take a look at WWW::Facebook::API.
Also see a related question: How do I use Perl's WWW::Facebook::API to publish to a user's newsfeed?
Synopsis:
use WWW::Facebook::API;
my $facebook = WWW::Facebook::API->new(
desktop => 0,
api_key => $fb_api_key,
secret => $fb_secret,
session_key => $query->cookie($fb_api_key.'_session_key'),
session_expires => $query->cookie($fb_api_key.'_expires'),
session_uid => $query->cookie($fb_api_key.'_user')
);
my $response = $facebook->stream->publish(
message => qq|Test status message|,
);
I've found the following code:
use WWW::Mechanize;
use WWW::Mechanize::FormFiller;
use URI::URL;
my #go_terms=qw/GO:0006612 GO:0045862 GO:0048545 GO:0007568 GO:0046326 GO:0051901 GO:0010524 GO:0006044 GO:0032024/;
my $go_string=join("\n",#go_terms);
my $agent = WWW::Mechanize->new( autocheck => 1 );
my $formfiller = WWW::Mechanize::FormFiller->new();
$agent->env_proxy();
$agent->get('http://revigo.irb.hr/');
$agent->form_number(1) if $agent->forms and scalar #{$agent->forms};
$formfiller->add_filler( 'goList' => Fixed => $go_string);
$formfiller->add_filler( 'cutoff' => Fixed => '0.4' );
$formfiller->add_filler( 'isPValue' => Fixed => 'yes' );
$formfiller->add_filler( 'whatIsBetter' => Fixed => 'higher' );
$formfiller->add_filler( 'goSizes' => Fixed => 0 );
$formfiller->add_filler( 'measure' => Fixed => 'SIMREL' );
$formfiller->fill_form($agent->current_form);
my $request = $agent->click("startRevigo");
what I am trying to do is, once startRevigo is clicked, I want to go to the following url http://revigo.irb.hr/toR.jsp?table=1 and download the file it is giving to me. No clue about how to do this, even reading cpan manual.
Not tested!
use WWW::Mechanize;
my #go_terms=qw/GO:0006612 GO:0045862 GO:0048545 GO:0007568 GO:0046326 GO:0051901 GO:0010524 GO:0006044 GO:0032024/;
my $go_string=join("\n",#go_terms);
my $agent = WWW::Mechanize->new( autocheck => 1 );
$agent->env_proxy();
$agent->get('http://revigo.irb.hr/');
$agent->submit_form(
with_fields => {
goList => $go_string,
cutoff => 0.4
isPValue => "yes",
whatIsBetter => "higher",
goSizes => 0,
measure => "SIMREL",
},
);
$agent->get("http://revigo.irb.hr/toR.jsp?table=1");
$agent->save_content("your_file.r");
I'd use LWP::UserAgent instead
require LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get('http://revigo.irb.hr/toR.jsp?table=1');
if ($response->is_success) {
print $response->decoded_content; # I am just printing it but you can save it etc
}
else {
die $response->status_line;
}
http://search.cpan.org/dist/libwww-perl/lib/LWP/UserAgent.pm
I am trying to parse the following link using perl
http://www.inc.com/profile/fuhu
I am trying to get information like Rank, 2013 Revenue and 2010 Revenue, etc,
But when fetch data with perl, I get following and same shows in Page Source Code.
<dl class="RankTable">
<div class="dtddwrapper">
<div class="dtdd">
<dt>Rank</dt><dd><%=rank%></dd>
</div>
</div>
<div class="dtddwrapper">
And When I check with Firebug, I get following.
<dl class="RankTable">
<div class="dtddwrapper">
<div class="dtdd">
<dt>Rank</dt><dd>1</dd>
</div>
</div>
<div class="dtddwrapper">
My Perl code is as following.
use WWW::Mechanize;
$url = "http://www.inc.com/profile/fuhu";
my $mech = WWW::Mechanize->new();
$mech->get( $url );
$data = $mech->content();
print $data;
As other have said this is not plain HTML, there is some JS wizardry. The data comes from a dynamic JSON request.
The following script prints the rank and dumps everything else available in $data.
First it gets the ID of the profile and then it makes the appropriate JSON request, just like a regular browser.
use strict;
use warnings;
use WWW::Mechanize;
use JSON qw/decode_json/;
use Data::Dumper;
my $url = "http://www.inc.com/profile/fuhu";
my $mech = WWW::Mechanize->new();
$mech->get( $url );
if ($mech->content() =~ /profileID = (\d+)/) {
my $id = $1;
$mech->get("http://www.inc.com/rest/inc5000company/$id/full_list");
my $data = decode_json($mech->content());
my $rank = $data->{data}{rank};
print "rank is $rank\n";
print "\ndata hash value \n";
print Dumper($data);
}
Output:
rank is 1
data hash value
$VAR1 = {
'time' => '2014-08-22 11:40:00',
'data' => {
'ifi_industry' => 'Consumer Products & Services',
'app_revenues_lastyear' => '195640000',
'industry_rank' => '1',
'ifc_company' => 'Fuhu',
'current_industry_rank' => '1',
'app_employ_fouryearsago' => '49',
'ifc_founded' => '2008-00-00',
'rank' => '1',
'city_display_name' => 'Los Angeles',
'metro_rank' => '1',
'ifc_business_model' => 'The creator of an Android tablet for kids and an Adobe Air application that allows children to access the Internet in a parent-controlled environment.',
'next_id' => '25747',
'industry_id' => '4',
'metro_id' => '2',
'app_employ_lastyear' => '227',
'state_rank' => '1',
'ifc_filelocation' => 'fuhu',
'ifc_url' => 'http://www.fuhu.com',
'years' => [
{
'ify_rank' => '1',
'ify_metro_rank' => '1',
'ify_industry_rank' => '1',
'ify_year' => '2014',
'ify_state_rank' => '1'
},
{
'ify_industry_rank' => undef,
'ify_year' => '2013',
'ify_rank' => '1',
'ify_metro_rank' => undef,
'ify_state_rank' => undef
}
],
'ifc_twitter_handle' => 'NabiTablet',
'id' => '22890',
'app_revenues_fouryearsago' => '123000',
'ifc_city' => 'El Segundo',
'ifc_state' => 'CA'
}
};
This thing : <%=rank%> is inside a script, it's not HTML. So when you see it in firebug, it shows after executing this part. But when you look at the HTML code, you see it this way. So HTML parsing won't work here.
Usually in this type of cases, the variables (rank for example) are passed from server using a XHR call. So you need to check the XHR calls in firebug and see the responses.
there is a bit of code I'm trying to replicate in Perl using either LWP::UserAgent or WWW::Mechanize from an existing script.
The original script actually does more than I'm looking to do. I'd just like to log into the Nest website (the part I need help with) and then parse out some data for historical logging (I'm good there).
My current script I would expect to work, but I'm not sure if the authResult/access_token from the Ruby example us actually understood/used by either Perl module.
My code in Perl:
#!/usr/bin/perl
use WWW::Mechanize;
#use HTTP::Request::Common qw(POST);
use HTTP::Cookies;
use LWP::UserAgent;
use Data::Dumper;
use CGI;
my $email; #stores our mail
my $password; #stores our password
my $user_agent = 'Nest/1.1.0.10 CFNetwork/548.0.4';
$email = "email#email";
$password = "mypassword";
my #headers = (
'User-Agent' => 'Nest/1.1.0.10 CFNetwork/548.0.4',
'X-nl-user-id' => $email,
'X-nl-protocol-version' => '1',
'Accept-Language' => 'en-us',
'Connection' => 'keep-alive',
'Accept' => '*/*'
);
# print "Content-type: text/html\n\n";
my $cookie = HTTP::Cookies->new(file => 'cookie',autosave => 1,);
my $browser = WWW::Mechanize->new(cookie_jar => $cookie, autocheck => 1,);
# tell it to get the main page
$browser->get("https://home.nest.com/user/login");
print Dumper($browser->forms);
# okay, fill in the box with the name of the
# module we want to look up
$browser->form_number(1);
$browser->field("username", $email);
$browser->field("password", $password);
$browser->submit();
print $browser->content();
When I submit the form, I just get the same page returned back to me, and I don't know what exactly is causing Nest to not like what I'm submitting. There are two additional fields in the form on their log-in page:
'inputs' => [
bless( {
'maxlength' => '75',
'/' => '/',
'value_name' => 'E-mail address',
'name' => 'username',
'id' => 'id_username',
'type' => 'text'
}, 'HTML::Form::TextInput' ),
bless( {
'/' => '/',
'value_name' => 'Password',
'name' => 'password',
'id' => 'id_password',
'type' => 'password',
'minlength' => '6'
}, 'HTML::Form::TextInput' ),
bless( {
'readonly' => 1,
'/' => '/',
'value_name' => '',
'value' => '',
'name' => 'next',
'type' => 'hidden'
}, 'HTML::Form::TextInput' ),
bless( {
'readonly' => 1,
'/' => '/',
'value_name' => '',
'value' => 'dbbadca7910c5290a13d30785ac7fb79',
'name' => 'csrfmiddlewaretoken',
'type' => 'hidden'
}, 'HTML::Form::TextInput' )
Do I need to use the csrfmiddlewaretoken value in each submission? It appears to change. I thought getting a cookie upon a successful login would be enough.
Any suggestions on what I'm doing wrong?
Shot in the blue:
perl -E'use warnings; $email = "email#email"; say "<$email>"'
Possible unintended interpolation of #email in string at -e line 1.
<email>
I suspect it fails because the form gets the wrong user name, print it out to confirm. Always enable the pragmas strict and warnings to make many common mistakes visible.