I have an application with Blueprints and Celery
the code is here:
config.py
import os
from celery.schedules import crontab
basedir = os.path.abspath(os.path.dirname(__file__))
class Config:
SECRET_KEY = os.environ.get('SECRET_KEY') or ''
SQLALCHEMY_COMMIT_ON_TEARDOWN = True
RECORDS_PER_PAGE = 40
SQLALCHEMY_DATABASE_URI = ''
CELERY_BROKER_URL = ''
CELERY_RESULT_BACKEND = ''
CELERY_RESULT_DBURI = ''
CELERY_TIMEZONE = 'Europe/Kiev'
CELERY_ENABLE_UTC = False
CELERYBEAT_SCHEDULE = {}
#staticmethod
def init_app(app):
pass
class DevelopmentConfig(Config):
DEBUG = True
WTF_CSRF_ENABLED = True
APP_HOME = ''
SQLALCHEMY_DATABASE_URI = 'mysql+mysqldb://...'
CELERY_BROKER_URL = 'sqla+mysql://...'
CELERY_RESULT_BACKEND = "database"
CELERY_RESULT_DBURI = 'mysql://...'
CELERY_TIMEZONE = 'Europe/Kiev'
CELERY_ENABLE_UTC = False
CELERYBEAT_SCHEDULE = {
'send-email-every-morning': {
'task': 'app.workers.tasks.send_email_task',
'schedule': crontab(hour=6, minute=15),
},
}
class TestConfig(Config):
DEBUG = True
WTF_CSRF_ENABLED = False
TESTING = True
SQLALCHEMY_DATABASE_URI = 'mysql+mysqldb://...'
class ProdConfig(Config):
DEBUG = False
WTF_CSRF_ENABLED = True
SQLALCHEMY_DATABASE_URI = 'mysql+mysqldb://...'
CELERY_BROKER_URL = 'sqla+mysql://...celery'
CELERY_RESULT_BACKEND = "database"
CELERY_RESULT_DBURI = 'mysql://.../celery'
CELERY_TIMEZONE = 'Europe/Kiev'
CELERY_ENABLE_UTC = False
CELERYBEAT_SCHEDULE = {
'send-email-every-morning': {
'task': 'app.workers.tasks.send_email_task',
'schedule': crontab(hour=6, minute=15),
},
}
config = {
'development': DevelopmentConfig,
'default': ProdConfig,
'production': ProdConfig,
'testing': TestConfig,
}
class AppConf:
"""
Class to store current config even out of context
"""
def __init__(self):
self.app = None
self.config = {}
def init_app(self, app):
if hasattr(app, 'config'):
self.app = app
self.config = app.config.copy()
else:
raise TypeError
init.py:
import os
from flask import Flask
from celery import Celery
from config import config, AppConf
def create_app(config_name):
app = Flask(__name__)
app.config.from_object(config[config_name])
config[config_name].init_app(app)
app_conf.init_app(app)
# Connect to Staging view
from staging.views import staging as staging_blueprint
app.register_blueprint(staging_blueprint)
return app
def make_celery(app=None):
app = app or create_app(os.getenv('FLASK_CONFIG') or 'default')
celery = Celery(__name__, broker=app.config.CELERY_BROKER_URL)
celery.conf.update(app.conf)
TaskBase = celery.Task
class ContextTask(TaskBase):
abstract = True
def __call__(self, *args, **kwargs):
with app.app_context():
return TaskBase.__call__(self, *args, **kwargs)
celery.Task = ContextTask
return celery
tasks.py:
from app import make_celery, app_conf
cel = make_celery(app_conf.app)
#cel.task
def send_realm_to_fabricdb(realm, form):
some actions...
and here is the problem:
The Blueprint "staging" uses task send_realm_to_fabricdb, so it makes: from tasks import send_realm_to_fabricdb
than, when I just run application, everything goes ok
BUT, when I'm trying to run celery: celery -A app.tasks worker -l info --beat, it goes to cel = make_celery(app_conf.app) in tasks.py, got app=None and trying to create application again: registering a blueprint... so I've got cycle import here.
Could you tell me how to break this cycle?
Thanks in advance.
I don't have the code to try this out, but I think things would work better if you move the creation of the Celery instance out of tasks.py and into the create_app function, so that it happens at the same time the app instance is created.
The argument you give to the Celery worker in the -A option does not need to have the tasks, Celery just needs the celery object, so for example, you could create a separate starter script, say celery_worker.py that calls create_app to create app and cel and then give it to the worker as -A celery_worker.cel, without involving the blueprint at all.
Hope this helps.
What I do to solve this error is that I create two Flask instance which one is for Web app, and another is for initial Celery instance.
Like #Miguel said, I have
celery_app.py for celery instance
manager.py for Flask instance
And in these two files, each module has it's own Flask instance.
So I can use celery.task in Views. And I can start celery worker separately.
Thanks Bob Jordan, you can find the answer from https://stackoverflow.com/a/50665633/2794539,
Key points:
1. make_celery do two things at the same time: create celery app and run celery with flask content, so you can create two functions to do make_celery job
2. celery app must init before blueprint register
Having the same problem, I ended up solving it very easily using shared_task (docs), keeping a single app.py file and not having to instantiate the flask app multiple times.
The original situation that led to the circular import:
from src.app import celery # src.app is ALSO importing the blueprints which are importing this file which causes the circular import.
#celery.task(bind=True)
def celery_test(self):
sleep(5)
logger.info("Task processed by Celery.")
The current code that works fine and avoids the circular import:
# from src.app import celery <- not needed anymore!
#shared_task(bind=True)
def celery_test(self):
sleep(5)
logger.info("Task processed by Celery.")
Please mind that I'm pretty new to Celery so I might be overseeing important stuff, it would be great if someone more experienced can give their opinion.
Related
So I have a simple celery app code(app.py) which looks something like this:
``from celery import Celery
from functools import lru_cache`
#lru_cache def get_celery() -> Celery:
celery_app = Celery(
'worker',
broker='redis://XXXXXXXXX/4',
backend='redis://XXXXXXXX/5',
)
celery_app.autodiscover_tasks(
['api.v1.worker.worker'],
force=True
)
return celery_app`
Now when I run this app.py file through the VSCode terminal I get a error like this:
ModuleNotFoundError: No module named 'api'
My directory looks something like this:
app
|__api
| |__v1
| |__endpoints
| |__utils
| |__worker
| |__worker.py
|__celery_app
|__app.py
The API and celery_app folders are on the same level. Can anyone please help me understand and fix this issue.
I'm running this entire project on a poetry shell and my pyproject.toml file looks something like this:
[tool.poetry.dependencies]
python = "3.10.9" requests = "^2.28.2" numpy = "^1.24.1" pandas = "^1.5.3" fastapi = "^0.89.1" datetime = "^5.0" mimir = "^0.6.3" jsonify = "^0.5" uvicorn = {extras = ["standard"], version = "^0.20.0"} appdirs = "^1.4.4" black = "^22.12.0" click = "^8.1.3" pydantic = "^1.10.4" pymongo = "^4.3.3" regex = "^2022.10.31" motor = "^3.1.1" json-utils = "^0.2" celery = "^5.2.7" redis = "^4.4.2"
To try resolving this issue I commented out this part of the code:
celery_app.autodiscover_tasks( ['api.v1.worker.worker'], force=True )
and the celery app worked fine. But i want the code to work along with the api.v1.worker.worker statement.
In pytest-django there is a builtin fixture live_server though it seems like this server (that is actually based on LiveServerTestCase) can't handle web-sockets or at least won't interact with my asgi.py module.
How can one mimic that fixture in order to use ChannelsLiveServerTestCase instead? Or anything else that will run a test-database and will be able to serve an ASGI application?
My goal eventually is to have as close to production environment as possible, for testing and being able to test interaction between different Consumers.
P.S: I know I can run manage.py testserver <Fixture> on another thread / process by overriding django_db_setup though I seek for a better solution.
You can implement a channels_live_server fixture based on the implementations of:
live_server fixture, which instantiates
LiveServer helper, which starts LiveServerThread, and
ChannelsLiveServerTestCase, which starts DaphneProcess.
#medihack implemented it at https://github.com/pytest-dev/pytest-django/issues/1027:
from functools import partial
from channels.routing import get_default_application
from daphne.testing import DaphneProcess
from django.contrib.staticfiles.handlers import ASGIStaticFilesHandler
from django.core.exceptions import ImproperlyConfigured
from django.db import connections
from django.test.utils import modify_settings
def make_application(*, static_wrapper):
# Module-level function for pickle-ability
application = get_default_application()
if static_wrapper is not None:
application = static_wrapper(application)
return application
class ChannelsLiveServer:
host = "localhost"
ProtocolServerProcess = DaphneProcess
static_wrapper = ASGIStaticFilesHandler
serve_static = True
def __init__(self) -> None:
for connection in connections.all():
if connection.vendor == "sqlite" and connection.is_in_memory_db():
raise ImproperlyConfigured(
"ChannelsLiveServer can not be used with in memory databases"
)
self._live_server_modified_settings = modify_settings(ALLOWED_HOSTS={"append": self.host})
self._live_server_modified_settings.enable()
get_application = partial(
make_application,
static_wrapper=self.static_wrapper if self.serve_static else None,
)
self._server_process = self.ProtocolServerProcess(self.host, get_application)
self._server_process.start()
self._server_process.ready.wait()
self._port = self._server_process.port.value
def stop(self) -> None:
self._server_process.terminate()
self._server_process.join()
self._live_server_modified_settings.disable()
#property
def url(self) -> str:
return f"http://{self.host}:{self._port}"
#pytest.fixture
def channels_live_server(request):
server = ChannelsLiveServer()
request.addfinalizer(server.stop)
return server
#aaron's solution can't work, due to pytest-django conservative approach for database access.
another proccess wouldn't be aware that your test has database access permissions therefore you won't have database access. (here is a POC)
Using a scoped fixture of daphne Server suffice for now.
import threading
import time
from functools import partial
from django.contrib.staticfiles.handlers import ASGIStaticFilesHandler
from django.core.exceptions import ImproperlyConfigured
from django.db import connections
from django.test.utils import modify_settings
from daphne.server import Server as DaphneServer
from daphne.endpoints import build_endpoint_description_strings
def get_open_port() -> int:
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(("", 0))
s.listen(1)
port = s.getsockname()[1]
s.close()
return port
def make_application(*, static_wrapper):
# Module-level function for pickle-ability
if static_wrapper is not None:
application = static_wrapper(your_asgi_app)
return application
class ChannelsLiveServer:
port = get_open_port()
host = "localhost"
static_wrapper = ASGIStaticFilesHandler
serve_static = True
def __init__(self) -> None:
for connection in connections.all():
if connection.vendor == "sqlite" and connection.is_in_memory_db():
raise ImproperlyConfigured(
"ChannelsLiveServer can not be used with in memory databases"
)
self._live_server_modified_settings = modify_settings(ALLOWED_HOSTS={"append": self.host})
self._live_server_modified_settings.enable()
get_application = partial(
make_application,
static_wrapper=self.static_wrapper if self.serve_static else None,
)
endpoints = build_endpoint_description_strings(
host=self.host, port=self.port
)
self._server = DaphneServer(
application=get_application(),
endpoints=endpoints
)
t = threading.Thread(target=self._server.run)
t.start()
for i in range(10):
time.sleep(0.10)
if self._server.listening_addresses:
break
assert self._server.listening_addresses[0]
def stop(self) -> None:
self._server.stop()
self._live_server_modified_settings.disable()
#property
def url(self) -> str:
return f"ws://{self.host}:{self.port}"
#property
def http_url(self):
return f"http://{self.host}:{self.port}"
#pytest.fixture(scope='session')
def channels_live_server(request, live_server):
server = ChannelsLiveServer()
request.addfinalizer(server.stop)
return server
import falcon
import json
from tasks import add
from waitress import serve
class tasksresource:
def on_get(self, req, resp):
"""Handles GET requests"""
self.result = add.delay(1, 2)
self.context = {'ID': self.result.id, 'final result': self.result.ready()}
resp.body = json.dumps(self.context)
api = falcon.API()
api.add_route('/result', tasksresource())
# api.add_route('/result/task', taskresult())
if __name__ == '__main__':
serve(api, host='127.1.0.1', port=5555)
how do i get the Get the task id from json payload ( post data)
and add a route to it
Here a small example. Structure of files:
/project
__init__.py
app.py # routes, falcon etc.
tasks.py # celery
example.py # script for demonstration how it works
app.py:
import json
import falcon
from tasks import add
from celery.result import AsyncResult
class StartTask(object):
def on_get(self, req, resp):
# start task
task = add.delay(4, 4)
resp.status = falcon.HTTP_200
# return task_id to client
result = {'task_id': task.id}
resp.body = json.dumps(result)
class TaskStatus(object):
def on_get(self, req, resp, task_id):
# get result of task by task_id and generate content to client
task_result = AsyncResult(task_id)
result = {'status': task_result.status, 'result': task_result.result}
resp.status = falcon.HTTP_200
resp.body = json.dumps(result)
app = falcon.API()
# registration of routes
app.add_route('/start_task', StartTask())
app.add_route('/task_status/{task_id}', TaskStatus())
tasks.py:
from time import sleep
import celery
app = celery.Celery('tasks', broker='redis://localhost:6379/0', backend='redis://localhost:6379/0')
#app.task
def add(x, y):
"""
:param int x:
:param int y:
:return: int
"""
# sleep just for demonstration
sleep(5)
return x + y
Now we need to start celery application. Go to project folder and run:
celery -A tasks worker --loglevel=info
After this we need to start Falcon application. Go to project folder and run:
gunicorn app:app
Ok. Everything is ready.
example.py is small client side which can help to understand:
from time import sleep
import requests
# start new task
task_info = requests.get('http://127.0.0.1:8000/start_task')
task_info = task_info.json()
while True:
# check status of task by task_id while task is working
result = requests.get('http://127.0.0.1:8000/task_status/' + task_info['task_id'])
task_status = result.json()
print task_status
if task_status['status'] == 'SUCCESS' and task_status['result']:
print 'Task with id = %s is finished' % task_info['task_id']
print 'Result: %s' % task_status['result']
break
# sleep and check status one more time
sleep(1)
Just call python ./example.py and you should see something like this:
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'SUCCESS', u'result': 8}
Task with id = 76542904-6c22-4536-99d9-87efd66d9fe7 is finished
Result: 8
Hope this helps you.
The above example by Danila Ganchar is great and very helpful. I'm using celery version 4.3.0 with Python 3, and one of the errors I received from using the example above is on this line:
task_result = AsyncResult(task_id)
The error I would receive is:
AttributeError: 'DisabledBackend' object has no attribute '_get_task_meta_for'
This may be a recent change, but result.AsyncResult (or just AsyncResult in this example because he imported it from celery.result) doesn't know the backend you are using. There are 2 solutions to solving this problem:
1) You can take the AsyncResult of the actual task itself add.AsyncResult(task_id) because the add task already has the backend defined through the #app.task decorator. The downside to this in this example is you want to be able to get the result for any task by just passing in the task_id via the Falcon endpoint, so this is limited
2) The preferred method is to just pass in the app parameter to the AsyncResult function:
task = result.AsyncResult(id, app=app)
Hope this helps!
I've tried to make a broadcast task but only one of my workers recieve it per each call. Would you please help me? (I'm using rabbitmq and node-celery)
default_exchange = Exchange('celery', type='direct')
celery.conf.update(
CELERY_RESULT_BACKEND = "amqp",
CELERY_RESULT_SERIALIZER='json',
CELERY_QUEUES = (
Queue('celery', default_exchange, routing_key='celery'),
Broadcast('broadcast_tasks'),
),
CELERY_ROUTES = (
{'my_tasks.sample_broadcast_task': {
'queue': 'broadcast_tasks',
}},
{'my_tasks.sample_normal_task': {
'queue': 'celery',
'exchange': 'celery',
'exchange_type': 'direct',
'routing_key': 'celery',
}}
),
)
I've also test following configurtion but not working.
celery.conf.update(
CELERY_RESULT_BACKEND = "amqp",
CELERY_RESULT_SERIALIZER='json',
CELERY_QUEUES=(
Queue('celery', Exchange('celery'), routing_key='celery'),
Broadcast('broadcast'),
),
)
#celery.task(ignore_result=True, queue='broadcast',
options=dict(queue='broadcast'))
def sample_broadcast_task():
print "test"
EDIT
after changing how to run worker by adding -Q broadcast, now i face to this error:
PreconditionFailed: Exchange.declare: (406) PRECONDITION_FAILED - inequivalent arg 'type' for exchange 'broadcast' in vhost '/': received 'direct' but current is 'fanout'
After trying many many many things, i finally find a solution. This work for me.
( celery 3.1.24 (Cipater) and Python 2.7.12 )
WORKER - tasks.py :
from celery import Celery
import celery_config
from kombu.common import Broadcast, Queue, Exchange
app = Celery()
app.config_from_object(sysadmin_celery_config)
#app.task
def print_prout(x):
print x
return x
WORKER - celery_config.py :
# coding=utf-8
from kombu.common import Broadcast, Queue, Exchange
BROKER_URL = 'amqp://login:pass#172.17.0.1//'
CELERY_RESULT_BACKEND = 'redis://:login#172.17.0.1'
CELERY_TIMEZONE = 'Europe/Paris'
CELERY_ENABLE_UTC = True
CELERY_TASK_SERIALIZER = 'pickle'
CELERY_RESULT_SERIALIZER = 'pickle'
CELERY_ACCEPT_CONTENT = ['pickle', 'json', 'msgpack', 'yaml']
CELERY_DISABLE_RATE_LIMITS = True
CELERY_ALWAYS_EAGER = False
CELERY_QUEUES = (Broadcast('broadcast_tasks'), )
worker lauched with :
celery -A celery_worker.tasks worker --loglevel=info --concurrency=1 -n worker_name_1
On the client (another docker container for me).
from celery import Celery
from celery_worker import tasks
result = tasks.print_prout.apply_async(['prout'], queue='broadcast_tasks')
print result.get()
The next step for me is how to retrieve and display results returned by all the workers. The "print result.get()" seems to return only the result of the last worker.
It does not seem obvious ( Have Celery broadcast return results from all workers )
according to your description:
I've tried to make a broadcast task but only one of my workers recieve it per each call
you may be using direct type exchange.
Try this
from celery import Celery
from kombu.common import Broadcast
BROKER_URL = 'amqp://guest:guest#localhost:5672//'
class CeleryConf:
# List of modules to import when celery starts.
CELERY_ACCEPT_CONTENT = ['json']
CELERY_IMPORTS = ('main.tasks')
CELERY_QUEUES = (Broadcast('q1'),)
CELERY_ROUTES = {
'tasks.sampletask': {'queue': 'q1'}
}
celeryapp = Celery('celeryapp', broker=BROKER_URL)
celeryapp.config_from_object(CeleryConf())
#celeryapp.task
def sampletask(form):
print form
To send the message, do
d= sampletask.apply_async(['4c5b678350fc643'],serializer="json", queue='q1')
I'm trying to daemonize my tasks in celery, i've tested without daemonizing and it's working very well.
But i can't daemonize like the tutorial said (http://docs.celeryproject.org/en/latest/tutorials/daemonizing.html#daemonizing)
I have my files:
solr_desa.py
from celery import Celery
celery = Celery('solrdesa')
celery.config_from_object('celeryconfig')
#celery.task(name = "doSomething")
def doSomething():
return 6
celeryconfig.py
from celery.schedules import crontab
BROKER_URL = '127.0.0.1'
BROKER_PORT = 5673
CELERY_RESULT_BACKEND = 'redis://127.0.0.1:6969/0'
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_TIMEZONE = 'America/Santiago'
CELERY_IMPORTS = ('solr_desa', )
CELERYBEAT_SCHEDULE = {
'solr_schedule': {
'task': 'doSomething',
'schedule': crontab(minute=9, hour=12)
},
}
And also
/etc/default/celeryd
CELERY_NODES="w1"
CELERYD_CHDIR="/opt/latam/script/solr"
CELERYD_OPTS="--time-limit=300 --concurrency=8"
CELERY_CONFIG_MODULE="celeryconfig"
CELERYD_LOG_FILE="/var/log/celery/%n.log"
CELERYD_PID_FILE="/var/run/celery/%n.pid"
I execute with the default celeryd in https://github.com/celery/celery/blob/3.0/extra/generic-init.d/celeryd, but the tasks are just enqueued but it looks like there are not workers :(
Where is my mistake in my configuration? :(
your broker url is wrong it should be something like this
BROKER_URL : transport://userid:password#hostname:port/virtual_host
#example
BROKER_URL = "amqp://{username}:{password}#{host}:{port}//"
read here for more details