If I launch a group of task to be executed in an event loop asyncio, can I react to the return of each separately? - subprocess

I want to launch 10 OS subprocess with asyncio. I can do that with gather for example and then I can find out at the end of the event loop, the status of each tasks. But I have to wait for the whole thing to finish. Even when each task run concurrently.
Is there a way to know that subprocess 1 already finished and react to that event, even before the other 9 tasks have completed?
I am working with Python >3.7 (3.8.6 and 3.9.1).
Maybe my question should be: Once that the event loop is running. Is there a way to find out the status of the tasks being running?
Or, the way it is expected that the task itself would do any after work after the await statement is completed but before returning and leaving the event loop.
I'll try that approach. In the meantime this is the code I am using for my basic testings:
Example of what I want:
import time
async def osrunner(cmd):
proc = await asyncio.create_subprocess_shell(
cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE)
stdout, stderr = await proc.communicate()
if stdout:
print(f'[stdout]\n{stdout.decode()}')
if stderr:
print(f'[stderr]\n{stderr.decode()}')
return True
async def main():
cmd00='sleep 35'
cmd01='sleep 15'
cmd02='sleep 25'
cmd03='sleep 5'
task0 = asyncio.create_task( osrunner(cmd00) )
task1 = asyncio.create_task( osrunner(cmd01) )
task2 = asyncio.create_task( osrunner(cmd02) )
task3 = asyncio.create_task( osrunner(cmd03) )
await task0
await task1
await task2
await task3
print(f"started main at {time.strftime('%X')}")
asyncio.run(main()) #<------------------I want to poll the status of the tasks and do something while the others are still unfinished
print(f"finished main at {time.strftime('%X')}")

Related

Pytest asyncio - run multiple tests gives error regarding event loop

I am using pytest with pytest-asyncio to run async tests. What is strange is that these tests only work if there is exactly one of them. For example:
#pytest.mark.asyncio
async def test_foo():
client = await get_db_connection()
user = await client.DO_QUERY
response = await FunctionUnderTest(
db=client, ...args
)
assert response.id is not None
#pytest.mark.asyncio
async def test_error_foo():
client = await get_db_connection()
with pytest.raises(MissingRequiredError):
await FunctionUnderTest(
db=client, ...args
)
If I comment out either of those tests, the remaining one will pass, but running both together gives:
RuntimeError: Task <Task pending name='Task-5' coro=<test_error_foo() running at /tests/test_function_under_test.py:44> cb=[_run_until_complete_cb() at /usr/lib/python3.10/asyncio/base_events.py:184]> got Future <Future pending> attached to a different loop
I would have expected pytest-asyncio to create a single event loop and run all the tests sequentially, but this does not seem to work.

How to create a websocket manager in Solana-py

I am using Python v3.7 + solana v0.21.0 and trying to create a websocket manager handling several different subscriptions, but the code seems to block whenever websocket.recv()
or asyncstdlib.enumerate(websocket) is called. Examples from the docs:
import asyncio
from asyncstdlib import enumerate
from solana.rpc.websocket_api import connect
async def main():
# First example using websocket.recv()
async with connect("wss://api.devnet.solana.com") as websocket:
await websocket.logs_subscribe()
first_resp = await websocket.recv()
subscription_id = first_resp.result
next_resp = await websocket.recv() <--- Blocks until msg received.
print(next_resp)
await websocket.logs_unsubscribe(subscription_id)
# Second example using client as an infinite asynchronous iterator:
async with connect("wss://api.devnet.solana.com") as websocket:
await websocket.logs_subscribe()
first_resp = await websocket.recv()
subscription_id = first_resp.result
async for idx, msg in enumerate(websocket): <--- Blocks until msg received.
if idx == 3:
break
print(msg)
await websocket.logs_unsubscribe(subscription_id)
asyncio.run(main())
The idea is being able to iterate an infinite loop so new subscriptions can be added to the websocket for instance:
from solana.rpc.request_builder import LogsSubscribeFilter
from solana.rpc.websocket_api import connect
from asgiref.sync import sync_to_async
from solana.publickey import PublicKey
from time import sleep
async def websocket_manager(rpc: str):
async with connect(rpc) as websocket:
while True:
active_pubkeys = await sync_to_async(get_my_active_pubkeys)()
if active_pubkeys:
# Add missing pubkeys
for pubkey in active_pubkeys:
if ws.get(pubkey) in websocket.subscriptions:
continue
print(f"Subscribe to {pubkey}")
await websocket.logs_subscribe(LogsSubscribeFilter.mentions(PublicKey(pubkey)))
first_resp = await websocket.recv()
ws[pubkey] = first_resp.result # Maps the pubkey to the subscription ID
# Delete non used subscriptions:
for non_used_pubkey in set(active_pubkeys) ^ set(ws.keys()):
if non_used_pubkey in ws:
print(f"Delete subscription for pubkey #{non_used_pubkey}")
websocket.account_unsubscribe(ws[non_used_pubkey])
ws.pop(non_used_pubkey)
# <-- HERE HOW TO ITERATE SUBSCRIPTIONS WITHOUT BLOCKING THE MANAGER????
sleep(30) # Sleep for 30 seconds
Would be it be safe using a new thread or a subprocess for reading the websocket messages so they don't block the main function?
I figured it out, just needed to use task = asyncio.create_task(my_msg_listener_function(websocket, ws)) to have another task running concurrently and taking care of the websocket messages.

Is it possible to prevent execution of further tasks in locust TaskSequence if some task has failed?

For example i have the following class. How i can prevent execution of get_entity task if create_entity task was not executed?
class MyTaskSequence(TaskSequence):
#seq_task(1)
def create_entity(self):
self.round += 1
with self.client.post('/entities', json={}, catch_response=True) as resp:
if resp.status_code != HTTPStatus.CREATED:
resp.failure()
# how to stop other tasks for that run?
self.entity_id = resp.json()['data']['entity_id']
#seq_task(2)
def get_entity(self):
# It is being always executed,
# but it should not be run if create_entity task failed
resp = self.client.get(f'/entities/{self.entity_id}')
...
I found TaskSet.interrupt method in documentation, but does not allow to cancel root TaskSet. I tried to make parent TaskSet for my task sequence, so TaskSet.interrupt works.
class MyTaskSet(TaskSet):
tasks = {MyTaskSequence: 10}
But now i see that all results in ui are cleared after i call interrupt!
I just need to skip dependent tasks in this sequence. I need the results.
The easiest way to solve this is just to use a single #task with multiple requests inside it. Then, if a request fails just do a return after resp.failure()
Might self.interrupt() be what you are looking for?
See https://docs.locust.io/en/latest/writing-a-locustfile.html#interrupting-a-taskset for reference.
Why not using on_start(self): which runs once whenever a locust created, it can set a global which can be checked whether the locust executes the tasks
class MyTaskSequence(TaskSequence):
entity_created = false
def on_start(self):
self.round += 1
with self.client.post('/entities', json={}, catch_response=True) as resp:
if resp.status_code != HTTPStatus.CREATED:
self.entity_created = true
resp.failure()
self.entity_id = resp.json()['data']['entity_id']
#seq_task(2)
def get_entity(self):
if self.entity_created:
resp = self.client.get(f'/entities/{self.entity_id}')
...

How to check status of a non-registered task?

My application A calls a celery task longtask in application B. However, longtask is registered in B but not in A, so A calls it by using send_task. I want a mechanism in A to check periodically if longtask is complete. How do I do it?
send_task returns an AsyncResult that contains the task id. You can use this id to periodically check on the result of longtask.
result = my_app.send_task('longtask', kwargs={})
task_id = result.id
# anywhere else in your code you can reuse the
# task_id to check the status
from celery.result import AsyncResult
import time
done = False
while not done:
result = AsyncResult(task_id)
current_status = result.status
if current_status == 'SUCCESS':
print('yay! we are done')
done = True
time.sleep(10)

#task_postrun.connect signal in Celery and executing another task results in some infite loop of executions

I need following workflow for my celery tasks.
when taskA finishes with success I want to execute taskB.
I know there is signal #task_success but this returns only task's result, and I need access to parameters of previous task's arguments. So I decided for code like these:
#app.task
def taskA(arg):
# not cool, but... https://github.com/celery/celery/issues/3797
from shopify.tasks import taskA
taskA(arg)
#task_postrun.connect
def fetch_taskA_success_handler(sender=None, **kwargs):
from gcp.tasks import taskB
if kwargs.get('state') == 'SUCCESS':
taskB.apply_async((kwargs.get('args')[0], ))
The problem is the taskB seems to be executed in some endless loop many, many times instead only once.
This way it works correctly:
#app.task
def taskA(arg):
# not cool, but... https://github.com/celery/celery/issues/3797
# otherwise it won't added in periodic tasks
from shopify.tasks import taskA
return taskA(arg)
#task_postrun.connect
def taskA_success_handler(sender=None, state=None, **kwargs):
resource_name = kwargs.get('kwargs', {}).get('resource_name')
if resource_name and state == 'SUCCESS':
if sender.name == 'shopify.tasks.taskA':
from gcp.tasks import taskB
taskB.apply_async(kwargs={
'resource_name': resource_name
})
just for reference:
celery==4.1.0
Django==2.0
django-celery-beat==1.1.0
django-celery-results==1.0.1
flower==0.9.2
amqp==2.2.2
Python 3.6