Python Faust await agent.ask() doesn't return reply and hangs function calling it - apache-kafka

I am new to Python, playing around with stuff, trying to communicate python services via Kafka using Faust.
So I have small PoC project.
Faust app definition:
# app.py
import faust as f
from models import ReadRequest, ReadResponse
app = f.App("faust-app", broker="kafka://localhost:9092", store="rocksdb://")
topics = {"read-request": app.topic("read-request", value_type=ReadRequest)}
def get_app() -> f.types.AppT:
return app
def get_topic(name: str) -> f.types.TopicT:
return topics[name]
My DB reader agent:
# reader.py
import pandas as pd
from pymongo import MongoClient
from app import get_app, get_topic
client = MongoClient()
app = get_app()
req_topic = get_topic("read-request")
#app.agent(req_topic)
async def read_request(requests):
async for request in requests:
db = client.test
coll = db[request["collection"]]
result = coll.find(request["query"])
df = pd.DataFrame(result)
response = {
"id": request["id"],
"data": list(df.loc[:, df.columns != "_id"].to_dict(orient="records")),
}
print(response) # debug <1>
yield response
Model definitions:
# models.py
import faust as f
class ReadRequest(f.Record):
id: int
collection: str
query: dict
Test agent.ask()
# test.py
import asyncio
from reader import read_request
from models import ReadRequest
async def run():
result = await read_request.ask(ReadRequest(id=1, collection="test", query={}))
print(result) # debug <2>
if __name__ == "__main__":
asyncio.run(run())
So I have zookeeper, kafka server, mongodb and faust worker reader running. Everything is using out of the box configs.
When I run python3 test.py I see debug <1> print output as expected, but debug <2> never goes off and execution hangs there.
Faust docs say
So I assume that I am doing everything right.
Anyone has any clues?

Related

FastAPI StreamingResponse with picamera2 - browser refresh problem

I'm trying to make FastAPI server which streams MJPEG from Raspberry Pi via picamera2 library. It works, but when I reload browser on /mjpeg multiple times, it will stuck. But with this example it works perfectly - you can reload browsers as many times as you want. Can you spot the problem?
For server I'm using uvicorn with default settings.
Thanks!
import io
import os
from threading import Condition
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from picamera2 import Picamera2
from picamera2.encoders import JpegEncoder
from picamera2.outputs import FileOutput
from fastapi.responses import HTMLResponse, StreamingResponse
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins="http://localhost:8000",
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class StreamingOutput(io.BufferedIOBase):
def __init__(self):
self.frame = None
self.condition = Condition()
def write(self, buf):
with self.condition:
self.frame = buf
self.condition.notify_all()
picam2 = Picamera2()
picam2.configure(picam2.create_video_configuration(main={"size": (640, 480)}))
output = StreamingOutput()
picam2.start_recording(JpegEncoder(), FileOutput(output))
def get_frame():
try:
while True:
with output.condition:
output.condition.wait()
frame = output.frame
yield (
b"--frame\r\n" b"Content-Type: image/jpeg\r\n\r\n" + frame + b"\r\n"
)
except Exception as e:
print("Error! Frames")
#app.get("/mjpeg", response_class=StreamingResponse)
def mjpeg(request: Request):
try:
frames = get_frame()
response = StreamingResponse(
frames,
headers={
"Cache-Control": "no-cache, private",
"Pragma": "no-cache",
},
media_type="multipart/x-mixed-replace; boundary=frame",
)
return response
except Exception as e:
print("Error! Route")

ChannelsLiveServerTestCase equivalent for pytest

In pytest-django there is a builtin fixture live_server though it seems like this server (that is actually based on LiveServerTestCase) can't handle web-sockets or at least won't interact with my asgi.py module.
How can one mimic that fixture in order to use ChannelsLiveServerTestCase instead? Or anything else that will run a test-database and will be able to serve an ASGI application?
My goal eventually is to have as close to production environment as possible, for testing and being able to test interaction between different Consumers.
P.S: I know I can run manage.py testserver <Fixture> on another thread / process by overriding django_db_setup though I seek for a better solution.
You can implement a channels_live_server fixture based on the implementations of:
live_server fixture, which instantiates
LiveServer helper, which starts LiveServerThread, and
ChannelsLiveServerTestCase, which starts DaphneProcess.
#medihack implemented it at https://github.com/pytest-dev/pytest-django/issues/1027:
from functools import partial
from channels.routing import get_default_application
from daphne.testing import DaphneProcess
from django.contrib.staticfiles.handlers import ASGIStaticFilesHandler
from django.core.exceptions import ImproperlyConfigured
from django.db import connections
from django.test.utils import modify_settings
def make_application(*, static_wrapper):
# Module-level function for pickle-ability
application = get_default_application()
if static_wrapper is not None:
application = static_wrapper(application)
return application
class ChannelsLiveServer:
host = "localhost"
ProtocolServerProcess = DaphneProcess
static_wrapper = ASGIStaticFilesHandler
serve_static = True
def __init__(self) -> None:
for connection in connections.all():
if connection.vendor == "sqlite" and connection.is_in_memory_db():
raise ImproperlyConfigured(
"ChannelsLiveServer can not be used with in memory databases"
)
self._live_server_modified_settings = modify_settings(ALLOWED_HOSTS={"append": self.host})
self._live_server_modified_settings.enable()
get_application = partial(
make_application,
static_wrapper=self.static_wrapper if self.serve_static else None,
)
self._server_process = self.ProtocolServerProcess(self.host, get_application)
self._server_process.start()
self._server_process.ready.wait()
self._port = self._server_process.port.value
def stop(self) -> None:
self._server_process.terminate()
self._server_process.join()
self._live_server_modified_settings.disable()
#property
def url(self) -> str:
return f"http://{self.host}:{self._port}"
#pytest.fixture
def channels_live_server(request):
server = ChannelsLiveServer()
request.addfinalizer(server.stop)
return server
#aaron's solution can't work, due to pytest-django conservative approach for database access.
another proccess wouldn't be aware that your test has database access permissions therefore you won't have database access. (here is a POC)
Using a scoped fixture of daphne Server suffice for now.
import threading
import time
from functools import partial
from django.contrib.staticfiles.handlers import ASGIStaticFilesHandler
from django.core.exceptions import ImproperlyConfigured
from django.db import connections
from django.test.utils import modify_settings
from daphne.server import Server as DaphneServer
from daphne.endpoints import build_endpoint_description_strings
def get_open_port() -> int:
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(("", 0))
s.listen(1)
port = s.getsockname()[1]
s.close()
return port
def make_application(*, static_wrapper):
# Module-level function for pickle-ability
if static_wrapper is not None:
application = static_wrapper(your_asgi_app)
return application
class ChannelsLiveServer:
port = get_open_port()
host = "localhost"
static_wrapper = ASGIStaticFilesHandler
serve_static = True
def __init__(self) -> None:
for connection in connections.all():
if connection.vendor == "sqlite" and connection.is_in_memory_db():
raise ImproperlyConfigured(
"ChannelsLiveServer can not be used with in memory databases"
)
self._live_server_modified_settings = modify_settings(ALLOWED_HOSTS={"append": self.host})
self._live_server_modified_settings.enable()
get_application = partial(
make_application,
static_wrapper=self.static_wrapper if self.serve_static else None,
)
endpoints = build_endpoint_description_strings(
host=self.host, port=self.port
)
self._server = DaphneServer(
application=get_application(),
endpoints=endpoints
)
t = threading.Thread(target=self._server.run)
t.start()
for i in range(10):
time.sleep(0.10)
if self._server.listening_addresses:
break
assert self._server.listening_addresses[0]
def stop(self) -> None:
self._server.stop()
self._live_server_modified_settings.disable()
#property
def url(self) -> str:
return f"ws://{self.host}:{self.port}"
#property
def http_url(self):
return f"http://{self.host}:{self.port}"
#pytest.fixture(scope='session')
def channels_live_server(request, live_server):
server = ChannelsLiveServer()
request.addfinalizer(server.stop)
return server

Parameterize the find method in python using mongo

Files to upload will be like WFSIV0101202001.318.tar.gz,WFSIV0101202001.2624.tar.gz etc.
INPUT_FILE_PATH = 'C:\Files to upload'
try:
import os
from google.cloud import storage
import sys
import pymongo
import pymongo.errors
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure
except:
print("missing modules")
try:
mongo_client = MongoClient(host="xyz.com", port=27017)
Db = mongo_client['abcd']
coll = Db['shopper_journey_sitedata']
except ConnectionFailure:
print("Connection failed")
date=[]
# Thirdpartyid=[]
input_files = os.listdir(INPUT_FILE_PATH)
# looping through input files
for input_file in input_files:
x = input_file.split(".")
date.append(x[0][5:13])
tp_site_id = x[1]
# print(tp_site_id)
cur = coll.find({"third_party_site_id":tp_site_id})
for doc in cur:
print(doc)
Now i want to parameterize the find() method for every id, so that on each iteration i should get st_site_id ?
above code i tried but ist giving error as "Datas:name error"
You can do one thing
coll.find({"third_party_site_id": { $in :
[318,2624,2621,2622,102,078]}})
If Tid is an array, then you could replace 318 in your query to Tid[I]

flask/MongoDB error on local server using raspberry pi3 - raspbian os

i've made a local server using flask and mongoDB which works great on windows, but when i moved my code to the raspberry pi, i've got an error which i couldn't figure out why it occurs.
the code im using:
1) for the flask server
from flask import Flask
from flask import jsonify
from flask import request
import pymongo
import time
import datetime
import json
app = Flask(__name__)
client = pymongo.MongoClient("localhost", 27017)
db = client['mqtt-db']
obs_collection = db['mqtt-collection']
#app.route("/obs")
def obs():
data_str = request.args.get("data")
print data_str
data = json.loads(data_str)
print data
data["date"] = datetime.datetime.now()
obs_collection.save(data)
return "success"
#app.route("/get_obs")
def get_obs():
res = []
for row in obs_collection.find():
del row['_id']
res.append(row)
return jsonify(res)
#app.route("/delete_all")
def delete_all():
res = obs_collection.delete_many({})
return jsonify({"deleted": res.deleted_count})
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True)
2) script for inserting messages into db , using mqtt protocol:
import paho.mqtt.client as mqtt
import pymongo
import json
import datetime
topic = "sensor"
host = "10.0.0.6"
client = pymongo.MongoClient("localhost", 27017)
db = client['mqtt-db']
mqtt_collection = db['mqtt-collection']
# The callback for when the client receives a CONNACK response from the server.
def on_connect(client, userdata, flags, rc):
print("Connected with result code "+str(rc))
# Subscribing in on_connect() means that if we lose the connection and
# reconnect then subscriptions will be renewed.
client.subscribe(topic)
# The callback for when a PUBLISH message is received from the server.
def on_message(client, userdata, msg):
data_str = str(msg.payload)
data = json.loads(data_str)
print data_str
print data
data["date"] = datetime.datetime.now()
mqtt_collection.save(data)
print(msg.topic+" "+str(msg.payload))
client = mqtt.Client()
client.on_connect = on_connect
client.on_message = on_message
client.connect(host, 1883, 60)
# Blocking call that processes network traffic, dispatches callbacks and
# handles reconnecting.
# Other loop*() functions are available that give a threaded interface and a
# manual interface.
client.loop_forever()
the error occurs when i try to retrieve data from the server using "get_obs" function.
the error is: "Value Error: dictionary update sequence element #0 has length 4; 2 is required"
appreciate your help.
as #davidism suggested, the solution was to update to the latest version of Flask

falcon python example with celery

import falcon
import json
from tasks import add
from waitress import serve
class tasksresource:
def on_get(self, req, resp):
"""Handles GET requests"""
self.result = add.delay(1, 2)
self.context = {'ID': self.result.id, 'final result': self.result.ready()}
resp.body = json.dumps(self.context)
api = falcon.API()
api.add_route('/result', tasksresource())
# api.add_route('/result/task', taskresult())
if __name__ == '__main__':
serve(api, host='127.1.0.1', port=5555)
how do i get the Get the task id from json payload ( post data)
and add a route to it
Here a small example. Structure of files:
/project
__init__.py
app.py # routes, falcon etc.
tasks.py # celery
example.py # script for demonstration how it works
app.py:
import json
import falcon
from tasks import add
from celery.result import AsyncResult
class StartTask(object):
def on_get(self, req, resp):
# start task
task = add.delay(4, 4)
resp.status = falcon.HTTP_200
# return task_id to client
result = {'task_id': task.id}
resp.body = json.dumps(result)
class TaskStatus(object):
def on_get(self, req, resp, task_id):
# get result of task by task_id and generate content to client
task_result = AsyncResult(task_id)
result = {'status': task_result.status, 'result': task_result.result}
resp.status = falcon.HTTP_200
resp.body = json.dumps(result)
app = falcon.API()
# registration of routes
app.add_route('/start_task', StartTask())
app.add_route('/task_status/{task_id}', TaskStatus())
tasks.py:
from time import sleep
import celery
app = celery.Celery('tasks', broker='redis://localhost:6379/0', backend='redis://localhost:6379/0')
#app.task
def add(x, y):
"""
:param int x:
:param int y:
:return: int
"""
# sleep just for demonstration
sleep(5)
return x + y
Now we need to start celery application. Go to project folder and run:
celery -A tasks worker --loglevel=info
After this we need to start Falcon application. Go to project folder and run:
gunicorn app:app
Ok. Everything is ready.
example.py is small client side which can help to understand:
from time import sleep
import requests
# start new task
task_info = requests.get('http://127.0.0.1:8000/start_task')
task_info = task_info.json()
while True:
# check status of task by task_id while task is working
result = requests.get('http://127.0.0.1:8000/task_status/' + task_info['task_id'])
task_status = result.json()
print task_status
if task_status['status'] == 'SUCCESS' and task_status['result']:
print 'Task with id = %s is finished' % task_info['task_id']
print 'Result: %s' % task_status['result']
break
# sleep and check status one more time
sleep(1)
Just call python ./example.py and you should see something like this:
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'SUCCESS', u'result': 8}
Task with id = 76542904-6c22-4536-99d9-87efd66d9fe7 is finished
Result: 8
Hope this helps you.
The above example by Danila Ganchar is great and very helpful. I'm using celery version 4.3.0 with Python 3, and one of the errors I received from using the example above is on this line:
task_result = AsyncResult(task_id)
The error I would receive is:
AttributeError: 'DisabledBackend' object has no attribute '_get_task_meta_for'
This may be a recent change, but result.AsyncResult (or just AsyncResult in this example because he imported it from celery.result) doesn't know the backend you are using. There are 2 solutions to solving this problem:
1) You can take the AsyncResult of the actual task itself add.AsyncResult(task_id) because the add task already has the backend defined through the #app.task decorator. The downside to this in this example is you want to be able to get the result for any task by just passing in the task_id via the Falcon endpoint, so this is limited
2) The preferred method is to just pass in the app parameter to the AsyncResult function:
task = result.AsyncResult(id, app=app)
Hope this helps!