ValueError: No engine for filetype: '' - valueerror

I have this error:
ValueError: No engine for filetype: ''
This is what I wrote:
for i in df['participant_num'].unique():
df.loc[df['participant_num'] == i].to_excel('/home/raz', sheet_name = i, index=False)

Related

Open binary file data with Spark - ValueError: The truth value of a Series is ambiguous

Having the following binary file (mp3) that send audio to a service in Azure to be trascripted.
The following code works in Databricks.
import os
import requests
url = "https://endpoint_service"
headers = {
'Ocp-Apim-Subscription-Key': 'MyKey',
'Content-Type': 'audio/mpeg'
}
def send_audio_transcript(url, payload, header):
"""Send audio.mp3 to a Azure service to be transcripted to text."""
response = requests.request("POST", url, headers=headers, data=payload)
return response.json()
full_path = <my_path>file.mp3
with open(full_path, mode='rb') as file: # b is important -> binary
fileContent = file.read()
send_audio_transcript(url, fileContent, headers) # a POST request its works
But my audio files are in a sensitive storage in Data lake and the only way to access them is by spark read.
looking for the documentation the way to read a binary file is.
df = spark.read.format("binaryFile").load(full_path)
display(df)
path || modificationTime || length || content
path || sometime || some_lenght || 2dnUwAC
first try:
content = df.content
test_service = send_audio_transcript(url, content , headers)
ValueError: Cannot convert column into bool: please use '&' for 'and', '|' for 'or', '~' for 'not' when building DataFrame boolean expressions.
Second try(convert spark to pandas):
pandas_df = df.toPandas()
content = pandas_df["content"]
test_service = send_audio_transcript(url, content , headers)
Valuerror:ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
What is the exactly translate in python-pyspark to:
with open(full_path, mode='rb') as file: # b is important -> binary
fileContent = file.read()
Your content data comming from Spark is not the same as the content data comming from open file.
From spark and later pandas you have a pandas series but from open the file you will have a class bytes
with open(full_path, mode='rb') as file: # b is important -> binary
fileContent = file.read()
print(type(fileContent)) # will return <class 'bytes'>
but from Spark
input_df = spark.read.format("binaryFile").load(full_path)
pandas_df = input_df.toPandas()
content = pandas_df['content']
print(type(content)) # return <class 'pandas.core.series.Series'>
In your case to fix your problem you need to take just the first element of the series.
content_good = content[0]
print(content_good) # you have your <class 'bytes'> wich is what you need

MongoDB not connecting to the server

Every time I try to connect to my mongodb atlas via my app, I keep getting this error.
ERROR 'f len([h for h in host if "/" in h]) > 1: TypeError: 'Flask' object is not iterable'
from turtle import turtlesize
from flask import Flask, redirect, render_template, request, url_for
from pymongo import MongoClient
app = Flask(__name__)
#app.config['SECRET KEY'] = ''
app.config['MONGO_URI'] = 'mongodb+srv:// admin:admin#sunbeam.cb2bg.mongodb.net/?retryWrites=true&w=majority'
mongodb_client = MongoClient(app)
db = mongodb_client.db
cluster = db['Studentsdb']
collection = cluster['students']
#app.route('/', methods=['GET', 'POST'])
def login():
error = None
if request.method == 'POST':
if request.form['username'] != 'admin' or request.form['password'] != 'admin':
error = 'Invalid Credentials. Please try again.'
else:
return redirect(url_for('student_forms'))
return render_template('login.html', error=error)
#app.route("/")
def products():
#return "<p>This is a proejcts page</p>"
return render_template('index.html')
#app.route("/student_forms", methods = ['GET','POST'])
def student_forms():
if request.method == 'POST':
name = request.form['name']
gender = request.form['gender']
class_admit = request.form['class']
dob = request.form['birthdate']
collection.insert_one({'name':name, 'gender':gender, 'class': class_admit, 'dob': dob})
return redirect(url_for('student_forms'))
return render_template('studentform.html', form=student_forms)
if __name__ == '__main__':
app.run(debug=True, port=8000)
You may have a number of issues but the first is your connection string should not have a space:
app.config['MONGO_URI'] = 'mongodb+srv:// admin:admin#sunbeam.cb2bg.mongodb.net/?retryWrites=true&w=majority'
->
app.config['MONGO_URI'] = 'mongodb+srv://admin:admin#sunbeam.cb2bg.mongodb.net/?retryWrites=true&w=majority'

Pyspark AssertionError: on should be Column or list of Column

Hi I have the below dataframes and when I join them I get AssertionError: on should be Column or list of Column. How do I get around this please as I cannot find any solution on google related to this?
Pages = sc.read.json("/Users/me/desktop/clickstream/Clicks/Page*.json.gz")
Pages_Dataset = Pages.select("SessionNumber", "PageLocation", "PageInstanceID")\
.withColumnRenamed("PageLocation", "URL")\
.withColumnRenamed("PageInstanceID", "CsaNumber")\
.withColumn("URL2", expr("CASE WHEN INSTR(URL, '=') > 0 THEN SUBSTR(URL,0,INSTR(URL, '=') -1) ELSE URL END"))\
.withColumn("URL2", expr("CASE WHEN INSTR(URL2, '?') > 0 THEN SUBSTR(URL2,0,INSTR(URL2, '?') -1) ELSE URL2 END"))\
.withColumn("URL2", expr("CASE WHEN INSTR(URL2, '#') > 0 THEN SUBSTR(URL2,0,INSTR(URL2, '#') -1) ELSE URL2 END"))\
.withColumn("URL3", expr("CASE WHEN INSTR(URL, 'prdcls=') > 0 THEN SUBSTR(URL,INSTR(URL, 'prdcls=')+7,2) ELSE '' END"))\
.withColumn("URL", concat("URL2", "URL3"))\
.select("SessionNumber", "URL", "CsaNumber").alias("a")\
.join(ConfiguredUrls.alias("b"), lower("a.URL") == lower("b.URL"), "left")\
.select("a.SessionNumber", "b.Product", "a.CsaNumber", "b.EndQuote", "a.URL")\
.withColumnRenamed("Product", "Session")\
.withColumn("Session", expr("CASE WHEN lower(URL) like 'https://mobilephones.com/deals/%' THEN 'Mobile Phones' ELSE Session END"))\
.withColumn("EndQuote", expr("CASE WHEN lower(URL) like 'https://mobilephones.com/deals/%' THEN 'Mobile Phones' ELSE EndQuote END"))\
.distinct()
Goals_Dataset = Goals.select("SessionNumber", "GoalName", "PageInstanceID", "EventTimestamp")\
.withColumnRenamed("EventTimestamp", "GoalDate")\
.withColumnRenamed("PageInstanceID", "CsaNumber")\
.select("SessionNumber", "GoalName", "CsaNumber", "GoalDate").alias("a")\
.join(ConfiguredGoals.alias("b"), lower("a.GoalName") == lower("b.GoalNameValue"), "left")\
.select("a.SessionNumber", coalesce("b.StartQuote", "b.EndQuote", "b.Switch").alias("Session"), "a.CsaNumber", "b.EndQuote")\
.distinct()
Session_Dataset = Pages_Dataset.select("SessionNumber", "Session", "CsaNumber", "EndQuote").alias("a")\
.join(Goals_Dataset.alias("b"), "a.SessionNumber" == "b.SessionNumber", "fullouter")\
.select(coalesce("a.SessionNumber", "b.SessionNumber").alias("SessionNumber"), coalesce("a.Session", "b.Session").alias("Session"), coalesce("a.CsaNumber", "b.CsaNumber").alias("CsaNumber"), coalesce("a.EndQuote", "b.EndQuote").alias("EndQuote"))\
.distinct()
#Error:
Session_Dataset = Pages_Dataset.select("SessionNumber", "Session", "CsaNumber", "EndQuote").alias("a")\
File "/usr/local/Cellar/apache-spark/3.2.1/libexec/python/lib/pyspark.zip/pyspark/sql/dataframe.py", line 1343, in join
AssertionError: on should be Column or list of Column
"a.SessionNumber" == "b.SessionNumber" should be col("a.SessionNumber") == col("b.SessionNumber"), or just "SessionNumber"

Traceback after looping through all available news article

I am making a python CLI utility that will answer questions like "15 + 15" or "How many letters are in the alphabet".
I then decided to add the ability to search up the latest news using the newspaper module.
All of it works except when the for loop finishes, after printing a string literal, it gives me a error that I do not know what the heck it means.
Can someone decipher the error for me and if possible, help me fix the error? Thanks.
import requests
import wolframalpha
import wikipedia
import time
import sys
from threading import Thread
from newspaper import Article
import bs4
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen
version = 2.1
build = '19w12a6'
ready = 0
loadingAnimationStop = 0
appId = 'CENSORED STUFF BECAUSE I DON\'T WANT ANYONE TO TOUCH MY KEY'
client = wolframalpha.Client(appId)
exitNow = 0
def loadingAnimation():
while exitNow == 0:
print("Loading: |", end='\r')
time.sleep(0.2)
while ready == 1:
time.sleep(0)
print("Loading: /", end='\r')
time.sleep(0.2)
while ready == 1:
time.sleep(0)
print("Loading: -", end='\r')
time.sleep(0.2)
while ready == 1:
time.sleep(0)
sys.stdout.write("Loading: \ \r")
time.sleep(0.2)
while ready == 1:
time.sleep(0)
hui = Thread(target = loadingAnimation, args=())
hui.start()
def search_wiki(keyword=''):
searchResults = wikipedia.search(keyword)
if not searchResults:
print("No result from Wikipedia")
return
try:
page = wikipedia.page(searchResults[0])
except wikipedia.DisambiguationError:
page = wikipedia.page(err.options[0])
wikiTitle = str(page.title.encode('utf-8'))
wikiSummary = str(page.summary.encode('utf-8'))
print(' ', end='\r')
print(wikiTitle)
print(wikiSummary)
def search(text=''):
res = client.query(text)
if res['#success'] == 'false':
ready = 1
time.sleep(1)
print('Query cannot be resolved')
else:
result = ''
pod0 = res['pod'][0]
pod1 = res['pod'][1]
if (('definition' in pod1['#title'].lower()) or ('result' in pod1['#title'].lower()) or (pod1.get('#primary','false') == 'True')):
result = resolveListOrDict(pod1['subpod'])
ready = 1
time.sleep(0.75)
print(' ', end='\r')
print(result)
question = resolveListOrDict(pod0['subpod'])
question = removeBrackets(question)
#primaryImage(question)
else:
question = resolveListOrDict(pod0['subpod'])
question = removeBrackets(question)
search_wiki(question)
def removeBrackets(variable):
return variable.split('(')[0]
def resolveListOrDict(variable):
if isinstance(variable, list):
return variable[0]['plaintext']
else:
return variable['plaintext']
#def primaryImage(title=''):
# url = 'http://en.wikipedia.org/w/api.php'
# data = {'action':'query', 'prop':'pageimages','format':'json','piprop':'original','titles':title}
# try:
# res = requests.get(url, params=data)
# key = res.json()['query']['pages'].keys()[0]
# imageUrl = res.json()['query']['pages'][key]['original']['source']
# print(imageUrl)
# except Exception:
# print('Exception while finding image:= '+str(err))
page = requests.get('https://www.wolframalpha.com/')
s = page.status_code
if (s != 200):
ready = 1
time.sleep(1)
print('It looks like https://www.wolframalpha.com/ is not online.')
print('Please check your connection to the internet and https://www.wolframalpha.com/')
print('Stopping Python Information Engine')
while True:
time.sleep(1)
page = requests.get('https://www.wikipedia.org/')
s = page.status_code
if (s != 200):
ready = 1
time.sleep(1)
print('It looks like https://www.wikipedia.org/ is not online.')
print('Please check your connection to the internet and https://www.wikipedia.org/')
print('Stopping Python Information Engine')
while True:
time.sleep(1)
ready = 1
while exitNow == 0:
print('================================================================================================')
print('Python Information Engine CLI Version', end=' ')
print(version)
print('Create by Unsigned_Py')
print('================================================================================================')
ready = 1
time.sleep(1)
print(' ', end='\r')
print(' ', end='\r')
q = input('Search: ')
print('================================================================================================')
if (q == 'Credits()'):
print('Credits')
print('================================================================================================')
print('PIE is made by Unsigned_Py')
print('Unsigned_Py on the Python fourms: https://python-forum.io/User-Unsigned-Py')
print('Contact Unsigned_Py: Ckyiu#outlook.com')
if (q == 'Latest News'):
print('DISCLAIMER: The Python Information Engine News port is still in DEVELOPMENT!')
print('Getting latest news links from Google News...')
ready = 0
news_url = "https://news.google.com/news/rss"
Client = urlopen(news_url)
xml_page = Client.read()
Client.close()
soup_page = soup(xml_page,"xml")
news_list = soup_page.findAll("item")
ready = 1
print('================================================================================================')
article_number = 1
for news in news_list:
print(article_number, end=': ')
print(news.title.text)
print(news.pubDate.text)
if (input('Read (Y or N)? ') == 'y'):
ready = 0
url = news.link.text
article = Article(url)
article.download()
article.parse()
article.nlp()
ready = 1
print('================================================================================================')
print(article.summary)
print('================================================================================================')
article_number = article_number + 1
print("That's all for today!")
if (q == 'Version()'):
print('Python Information Engine CLI Version', end=' ')
print(version)
print('Running Build', end=' ')
print(build)
print('Upon finding a bug, please report to Unsigned_Py and I will try to fix it!')
print('Looking for Python Information Engine CLI Version 1.0 - 1.9?')
print("It's called Wolfram|Alpha and Wikipedia Engine Search!")
if (q != 'Exit()'):
if (q != 'Credits()'):
if (q != 'News'):
if (q != 'Version()'):
ready = 0
search(q)
else:
exitNow = 1
print('Thank you for using Python Information Engine')
print('================================================================================================')
time.sleep(2)
ready = 0
Here's the error:
Traceback (most recent call last):
File "C:\Users\ckyiu\OneDrive\Desktop\Python Information Engine 2.1.py", line 210, in <module>
search(q)
File "C:\Users\ckyiu\OneDrive\Desktop\Python Information Engine 2.1.py", line 62, in search
res = client.query(text)
File "C:\Users\ckyiu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\wolframalpha\__init__.py", line 56, in query
return Result(resp)
File "C:\Users\ckyiu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\wolframalpha\__init__.py", line 178, in __init__
super(Result, self).__init__(doc)
File "C:\Users\ckyiu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\wolframalpha\__init__.py", line 62, in __init__
self._handle_error()
File "C:\Users\ckyiu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\wolframalpha\__init__.py", line 69, in _handle_error
raise Exception(template.format(**self))
Exception: Error 0: Unknown error
Well I got it to work now, for some reason I put: if (q != 'News'): I wanted if (q != 'Latest News'):.
Then python threw me a error for that. I got it to work at least now.

server doesn't send data to clients

I have this piece of code for server to handle clients. it properly receive data but when i want to send received data to clients nothing happens.
server
import socket
from _thread import *
class GameServer:
def __init__(self):
# Game parameters
board = [None] * 9
turn = 1
# TCP parameters specifying
self.tcp_ip = socket.gethostname()
self.tcp_port = 9999
self.buffer_size = 2048
self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
self.s.bind((self.tcp_ip, self.tcp_port))
except:
print("socket error, Please try again! ")
self.s.listen(5)
print('Waiting for a connection...')
def messaging(self, conn):
while True:
data = conn.recv(self.buffer_size)
if not data:
break
print("This data from client:", data)
conn.send(data)
def thread_run(self):
while True:
conn, addr = self.s.accept()
print('connected to: ' + addr[0] + " : " + str(addr[1]))
start_new_thread(self.messaging, (conn,))
def main():
gameserver = GameServer()
gameserver.thread_run()
if __name__ == '__main__':
main()
'
I want to if data received completely send to clients by retrieve the address of sender and send it to other clients by means of conn.send() but seems there is no way to do this with 'send()' method.
The piece of client side code
'
def receive_parser(self):
global turn
rcv_data = self.s.recv(4096)
rcv_data.decode()
if rcv_data[:2] == 'c2':
message = rcv_data[2:]
if message[:3] == 'trn':
temp = message[3]
if temp == 2:
turn = -1
elif temp ==1:
turn = 1
elif message[:3] == 'num':
self.set_text(message[3])
elif message[:3] == 'txt':
self.plainTextEdit_4.appendPlainText('client1: ' + message[3:])
else:
print(rcv_data)
'
the receiver method does not receive any data.
I modified your code a little(as I have python 2.7) and conn.send() seems to work fine. You can also try conn.sendall(). Here is the code I ran:
Server code:
import socket
from thread import *
class GameServer:
def __init__(self):
# Game parameters
board = [None] * 9
turn = 1
# TCP parameters specifying
self.tcp_ip = "127.0.0.1"#socket.gethostname()
self.tcp_port = 9999
self.buffer_size = 2048
self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
self.s.bind((self.tcp_ip, self.tcp_port))
except:
print("socket error, Please try again! ")
self.s.listen(5)
print('Waiting for a connection...')
def messaging(self, conn):
while True:
data = conn.recv(self.buffer_size)
if not data:
break
print("This data from client:", data)
conn.send(data)
def thread_run(self):
while True:
conn, addr = self.s.accept()
print('connected to: ' + addr[0] + " : " + str(addr[1]))
start_new_thread(self.messaging, (conn,))
def main():
gameserver = GameServer()
gameserver.thread_run()
main()
Client code:
import socket
s=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(("127.0.0.1", 9999))
def receive_parser():
#global turn
s.sendall("hello world")
rcv_data = s.recv(4096)
# rcv_data.decode()
# if rcv_data[:2] == 'c2':
# message = rcv_data[2:]
# if message[:3] == 'trn':
# temp = message[3]
# if temp == 2:
# turn = -1
# elif temp ==1:
# turn = 1
# elif message[:3] == 'num':
# self.set_text(message[3])
# elif message[:3] == 'txt':
# self.plainTextEdit_4.appendPlainText('client1: ' + message[3:])
# else:
print(rcv_data)
receive_parser()