python 3.10 soap.find(id='productTitle').get_text(strip=True) NoneType Error - gettext

soap.find(id='productTitle').get_text(strip=True)
Output: 'NoneType' Object has no attribute 'get_text'.

There's not a lot to go off since you didn't provide a lot of information, but from the information I got, you've put soap.find instead of soup.find
You could try something like this to fix it:
import requests
from bs4 import BeautifulSoup
URL = "Your url"
headers = {
"User-Agent": '(search My user agent)'}
def product_title():
req = requests.Session()
page = req.get(URL, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
productTitle = soup.find(id='productTitle').get_text(strip=True)
print(product)
productTitle()

Related

Linkedin API request dateRange end today or now

I'm using Python 3 to request share statistics from Linkedin API for the last 14 months.
It works fine, when I hardcoded the epoch values in the request link:
https://api.linkedin.com/v2/organizationalEntityShareStatistics?q=organizationalEntity&organizationalEntity=urn%3Ali%3Aorganization%3AXXXXX&timeIntervals=(timeRange:(start:1596206929000,end:1632938933000),timeGranularityType:DAY)
Obviously I don't want to be changing the end values each time I make a request, so I thought I'd declare a variable:
from datetime import datetime
epochcdt = datetime.now().timestamp()
And then just use it in the link instead of the hardcoded value:
https://api.linkedin.com/v2/organizationalEntityShareStatistics?q=organizationalEntity&organizationalEntity=urn%3Ali%3Aorganization%3AXXXXX&timeIntervals=(timeRange:(start:1596206929000,end:epochcdt),timeGranularityType:DAY)
But, that doesn't work:
{'message': 'Internal Server Error', 'status': 500}
Can you please help me with this hopefully, easy to solve problem?
Whole code:
import requests
import json
from liapiauth import auth, headers
from datetime import datetime
epochcdt = (datetime.now().timestamp())*1000
def organization_info(headers):
response = requests.get('https://api.linkedin.com/v2/organizationalEntityShareStatistics?q=organizationalEntity&organizationalEntity=urn%3Ali%3Aorganization%3AXXXXX&timeIntervals=(timeRange:(start:1596206929000,end:{epochcdt}),timeGranularityType:DAY)', headers = headers)
organization_info = response.json()
return organization_info
if __name__ == '__main__':
credentials = 'credentials.json'
access_token = auth(credentials)
headers = headers(access_token)
organization_info = organization_info(headers)
with open('lishare14m.json', 'w') as outfile:
json.dump(organization_info, outfile)
print(organization_info)

Why i am getting authentication error in MATLAB while i try to use HTTP

I am trying to query SiriDB via Matlab. I am getting the positive response from my log-in attemps. However when i try to query, i get the error: "not authenticated".
Please see my code below:
clear;clc
import matlab.net.http.*
import matlab.net.http.field.*
import matlab.net.URI
import matlab.net.http.Credentials
strct1 = jsondecode('{"username": "my_user_name", "password": "my_password"}');
strct2 = jsondecode('{"query": "select last() from ''example_query''"}');
uri = 'http://example_adress/auth/login';
request = RequestMessage( 'POST',ContentTypeField( 'application/json'),strct1);
complete(request,uri);
response = send(request,uri);
complete(response);
response.Body.string
uri = 'http://example_api';
request = RequestMessage( 'POST',ContentTypeField( 'application/json'),strct2 );
complete(request,uri)
response = request.send( 'http://example_api' );
complete(response);
response.Body.string

clients = self.AVAILABLE_CLIENTS[name] KeyError: 'requests' flask authlib client

good day everybody,
having some issues with flask and authlib. Bellow snip of my flash code
from flask import Flask, render_template
from authlib.integrations.flask_client import OAuth
import os
app = Flask(__name__)
app._static_folder = os.path.abspath("static")
app.config.from_object('config')
oauth = OAuth(app)
webex = oauth.register(name='WEBEX', redirect_uri='http://webapp.dcloud.cisco.com:5000/AuthWebex', client_kwargs={
'scope': 'spark:all'
} )
config.py
import os
WEBEX_CLIENT_ID='C3a256be511cdf07e19f272960c44a214aec14b727b108e4f10bd124d31d2112c'
WEBEX_CLIENT_SECRET='secret'
WEBEX_ACCESS_TOKEN_URL='https://api.ciscospark.com/v1/access_token'
WEBEX_REDIRECT_URI='http://localhost:5000/AuthWebex'
WEBEX_SCOPE='spark:all'
when running above code I get the following error:
File "/Users/tneumann/PycharmProjects/untitled/venv/lib/python3.7/site-packages/authlib/integrations/flask_client/oauth_registry.py", line 61, in register
self.use_oauth_clients()
File "/Users/tneumann/PycharmProjects/untitled/venv/lib/python3.7/site-packages/authlib/integrations/_client/oauth_registry.py", line 49, in use_oauth_clients
clients = self.AVAILABLE_CLIENTS[name]
KeyError: 'requests'
looked at examples and did some research, no luck. Can't find any solution...
thanks in adv.
Tobi
UPDATE:
per comment bellow here the latest code:
from flask import Flask, render_template, url_for, request
from authlib.integrations.flask_client import OAuth
import os
import requests
app = Flask(__name__)
app._static_folder = os.path.abspath("static")
app.config.from_object('config')
app.secret_key = os.urandom(24)
oauth = OAuth(app)
oauth.register(
'webex',
api_base_url='https://api.ciscospark.com/v1',
authorize_url='https://api.ciscospark.com/v1/authorize',
access_token_url='https://api.ciscospark.com/v1/access_token',
redirect_uri='http://webapp.dcloud.cisco.com:5000/AuthWebex',
scope='spark:all')
#app.route('/')
def main():
"""Entry point; the view for the main page"""
return render_template('main.html')
#app.route('/authorize')
def authorize():
return render_template('authorize.html')
#app.route('/login')
def login():
#redirect_uri = url_for('AuthWebex', _external=True)
redirect_uri = 'http://webapp.dcloud.cisco.com:5000/AuthWebex'
print(redirect_uri)
return oauth.webex.authorize_redirect(redirect_uri)
#app.route('/AuthWebex')
def AuthWebex():
#print(request.__dict__)
token = oauth.webex.authorize_access_token( authorization_response=request.url,
redirect_uri='http://webapp.dcloud.cisco.com:5000/AuthWebex',
client_id='C3a256be511cdf07e19f272960c44a214aec14b727b108e4f10bd124d31d2112c',
client_secret='secret',
)
print("Token: ", token)
resp = oauth.webex.get('https://api.ciscospark.com/v1/people/me')
profile = resp.json()
print(profile)
# do something with the token and profile
return '<html><body><h1>Authenticated</h1></body></html>'
if __name__ == '__main__':
app.run()
oauth.webex.authorize_access_token function throws and error when called without the parameters. which is strange as most examples I found exactly do that.
client_id and client_secret are set via the config.py file. This works for the oauth.register function but not for the authorize_access_token.
Additional problem is that even with the parameters, it produces a valid token. When I call the get function I get the following error:
File "/Users/tneumann/PycharmProjects/untitled/venv/lib/python3.7/site-packages/requests/models.py", line 317, in prepare
self.prepare_auth(auth, url)
File "/Users/tneumann/PycharmProjects/untitled/venv/lib/python3.7/site-packages/requests/models.py", line 548, in prepare_auth
r = auth(self)
File "/Users/tneumann/PycharmProjects/untitled/venv/lib/python3.7/site-packages/authlib/integrations/requests_client/oauth2_session.py", line 41, in __call__
raise UnsupportedTokenTypeError(description=description)
authlib.integrations._client.errors.UnsupportedTokenTypeError: unsupported_token_type: Unsupported token_type: 'token_type'
here is the format of the token returned from authorize_access_token function
Token: {'access_token': 'YWIzNGU3<secret>tNDQ5_PF84_7cc07dbd-<secret>-5877334424fd', 'expires_in': 1209599, 'refresh_token': 'MjU2ZDM4N2Et<secret>ZmItMTg5_PF84_7cc07dbd-<secret>877334424fd', 'refresh_token_expires_in': 7722014, 'expires_at': 1574863645}
went through the docs, the code on github and debugging in pycharm with no luck, help would be much appreciated!
The problem here is that this AuthWebex is not a standard OAuth service. The response has no token_type. We can fix it with Authlib compliance fix:
Check the example here:
https://docs.authlib.org/en/latest/client/frameworks.html#compliance-fix-for-oauth-2-0
The slack example has the same issue.

Webscraper not giving the right results with bs4

I'm trying to scrape the live billionaire networth table here > https://www.bloomberg.com/billionaires/
This is my code so far. All I get is [] as result on the python shell.
Something has to be wrong with the "findAll", I don't think I'm using the correct tag lines.
Tried to use just "find"
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import csv
#Open page and grab html
my_url = ('https://www.bloomberg.com/billionaires/')
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
#HTML Parser.
page_soup = soup(page_html, 'html.parser')
table = []
#Find table.
ele_table = page_soup.findAll('div',{'class':'dvz-content'})
print(ele_table)
I'm expecting for the table to be printed out so I can get it into a CSV file.
Data is dynamically loaded. You can pull from script tag provided you supply the right headers. Regex out the required info and parse with json library. Hand this off to pandas to write to csv
from bs4 import BeautifulSoup as bs
import requests, re, json
import pandas as pd
headers = {
'user-agent': 'Mozilla/5.0',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'if-none-match': 'W/^\\^5dbb59e6-91b10^\\^',
'if-modified-since': 'Thu, 31 Oct 2019 22:02:14 GMT' # this may be safeguard for caching. Consider if add dynamically.
}
p = re.compile(r'window.top500 = (.*);')
r = requests.get('https://www.bloomberg.com/billionaires/', headers = headers)
data = json.loads(p.findall(r.text)[0])
df = pd.DataFrame(data)
df.to_csv(r'C:\Users\User\Desktop\Data.csv', sep=',', encoding='utf-8-sig',index = False)
Example output:

How to log in to a website with urllib?

I am trying to log on this website: http://www.broadinstitute.org/cmap/index.jsp. I am using python 3.3 on Windows. I followed this answer https://stackoverflow.com/a/2910487/651779. My code:
import http.cookiejar
import urllib
url = 'http://www.broadinstitute.org/cmap/index.jsp'
values = {'j_username' : 'username',
'j_password' : 'password'}
data = urllib.parse.urlencode(values)
binary_data = data.encode('ascii')
cookies = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(
urllib.request.HTTPRedirectHandler(),
urllib.request.HTTPHandler(debuglevel=0),
urllib.request.HTTPSHandler(debuglevel=0),
urllib.request.HTTPCookieProcessor(cookies))
response = opener.open(url, binary_data)
the_page = response.read()
http_headers = response.info()
It runs without erros, however the html in the_page is just the log in page. How can I log onto this page?
The site is using a JSESSIONID cookie to create session since HTTP requests are stateless. When you're making your request, you're not getting that session id first.
I sniffed a session to log into that site using Fiddler and found that the POST is made to a different URL, but it has that JSESSIONID cookie set. So you need to make a get to the URL first, capture that cookie using the cookiehandler, then POST to this URL:
post_url = 'http://www.broadinstitute.org/cmap/j_security_check'
You don't need to save the HTTP GET request at all, you can simply call opener.open(url), then in your code change the response line to this:
response = opener.open(post_url, binary_data)
Also the payload was missing the submit method. Here's the whole thing with the changes I suggest:
import http.cookiejar
import urllib
get_url = 'http://www.broadinstitute.org/cmap/index.jsp'
post_url = 'http://www.broadinstitute.org/cmap/j_security_check'
values = urllib.parse.urlencode({'j_username': <MYCOOLUSERNAME>,
'j_password': <MYCOOLPASSSWORD>,
'submit': 'sign in'})
payload = bytes(values, 'ascii')
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(
urllib.request.HTTPRedirectHandler(),
urllib.request.HTTPHandler(debuglevel=0),
urllib.request.HTTPSHandler(debuglevel=0),
urllib.request.HTTPCookieProcessor(cj))
opener.open(get_url) #First call to capture the JSESSIONID
resp = opener.open(post_url, payload)
resp_html = resp.read()
resp_headers = resp.info()
Any other requests using the opener you created will re-use that cookie and you should be able to freely navigate the site.