I am looking for solutions to set the celery beats schedule options to a task to include a link_error callback.
My intention is to log exceptions in my scheduled tasks. Adding a link_error option seems to be the way forward.
However, I am not sure how I can get the signature of the error task in my settings file.
Here my error handler definition straight from the docs
#app.task(bind=True)
def error_handler(self, uuid):
result = self.app.AsyncResult(uuid)
logger.error('Task {0} raised exception: {1!r}\n{2!r}'.format(
uuid, result.result, result.traceback)
)
and my error throwing task
#app.task
def failme():
raise Exception()
Here two things I tried in my schedule but that didn't work
'failme-every-10-se': {
'task': 'ep.tasks.failme',
'schedule': timedelta(seconds=5),
'options': {'link_error': 'ep.tasks.error_handler.s()'}
},
and
import error_handler
CELERYBEAT_SCHEDULE = {
'error-task-every-5-minutes': {
'task': 'failme',
'schedule': timedelta(minutes=5),
'options': {'link_error': error_handler.s()}
},
}
Thanks
Because I did not get a solution to this I resorted to inheriting from a different Task class:
class ErrorLoggingTask(Task):
def on_failure(self, exc, task_id, args, kwargs, einfo):
logger.error("Task failed: %s" % einfo)
e.g.
#shared_task(base=ErrorLoggingTask)
def my_task(arg):
logger.info("working")
Related
I want to run a complex task scheduled by beat. Let us assume the default add/mul tasks are defined.
#app.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(
crontab(),
add.s(2,3) | mul.s(2)
)
But this will return an error in the worker:
NotImplementedError: chain is not a real task
How can I schedule a non trivial task with celery beat?
One way to do that is to schedule your tasks chain in beat_schedule in your celeryconfig, using link option, celery_tasks here is a module name where your tasks are defined
from celery.schedules import crontab
from celery import signature
beat_schedule = {
'chained': {
'task': 'celery_tasks.add',
'schedule': crontab(),
'options': {
'queue': 'default',
'link': signature('celery_tasks.mul',
args=(),
kwargs={},
options={
'link': signature('celery_tasks.another_task',
args=(),
kwargs={},
queue='default')
},
queue='default')
},
'args': ()
}
}
for add chained periodic tasks you can use an #app.task when declare your chain and then, add this new task on add_periodic_task() method. Example:
#app.on_after_finalize.connect ->i use this because it`s declared on task.py
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(timedelta(minutes=10), chian_st22.s(),name='test')
#app.task
def chian_st22(): -> i create the task with chain
cadena = chain(st22.s(), mailer.s()).apply_async()
#app.task
def mailer(data):
clase = CheckAlert()
mail = clase.envio_mail(data)
return mail
#app.task
def st22():
clase = CheckAlert()
st = clase.check_st22_dumps()
return st
import falcon
import json
from tasks import add
from waitress import serve
class tasksresource:
def on_get(self, req, resp):
"""Handles GET requests"""
self.result = add.delay(1, 2)
self.context = {'ID': self.result.id, 'final result': self.result.ready()}
resp.body = json.dumps(self.context)
api = falcon.API()
api.add_route('/result', tasksresource())
# api.add_route('/result/task', taskresult())
if __name__ == '__main__':
serve(api, host='127.1.0.1', port=5555)
how do i get the Get the task id from json payload ( post data)
and add a route to it
Here a small example. Structure of files:
/project
__init__.py
app.py # routes, falcon etc.
tasks.py # celery
example.py # script for demonstration how it works
app.py:
import json
import falcon
from tasks import add
from celery.result import AsyncResult
class StartTask(object):
def on_get(self, req, resp):
# start task
task = add.delay(4, 4)
resp.status = falcon.HTTP_200
# return task_id to client
result = {'task_id': task.id}
resp.body = json.dumps(result)
class TaskStatus(object):
def on_get(self, req, resp, task_id):
# get result of task by task_id and generate content to client
task_result = AsyncResult(task_id)
result = {'status': task_result.status, 'result': task_result.result}
resp.status = falcon.HTTP_200
resp.body = json.dumps(result)
app = falcon.API()
# registration of routes
app.add_route('/start_task', StartTask())
app.add_route('/task_status/{task_id}', TaskStatus())
tasks.py:
from time import sleep
import celery
app = celery.Celery('tasks', broker='redis://localhost:6379/0', backend='redis://localhost:6379/0')
#app.task
def add(x, y):
"""
:param int x:
:param int y:
:return: int
"""
# sleep just for demonstration
sleep(5)
return x + y
Now we need to start celery application. Go to project folder and run:
celery -A tasks worker --loglevel=info
After this we need to start Falcon application. Go to project folder and run:
gunicorn app:app
Ok. Everything is ready.
example.py is small client side which can help to understand:
from time import sleep
import requests
# start new task
task_info = requests.get('http://127.0.0.1:8000/start_task')
task_info = task_info.json()
while True:
# check status of task by task_id while task is working
result = requests.get('http://127.0.0.1:8000/task_status/' + task_info['task_id'])
task_status = result.json()
print task_status
if task_status['status'] == 'SUCCESS' and task_status['result']:
print 'Task with id = %s is finished' % task_info['task_id']
print 'Result: %s' % task_status['result']
break
# sleep and check status one more time
sleep(1)
Just call python ./example.py and you should see something like this:
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'PENDING', u'result': None}
{u'status': u'SUCCESS', u'result': 8}
Task with id = 76542904-6c22-4536-99d9-87efd66d9fe7 is finished
Result: 8
Hope this helps you.
The above example by Danila Ganchar is great and very helpful. I'm using celery version 4.3.0 with Python 3, and one of the errors I received from using the example above is on this line:
task_result = AsyncResult(task_id)
The error I would receive is:
AttributeError: 'DisabledBackend' object has no attribute '_get_task_meta_for'
This may be a recent change, but result.AsyncResult (or just AsyncResult in this example because he imported it from celery.result) doesn't know the backend you are using. There are 2 solutions to solving this problem:
1) You can take the AsyncResult of the actual task itself add.AsyncResult(task_id) because the add task already has the backend defined through the #app.task decorator. The downside to this in this example is you want to be able to get the result for any task by just passing in the task_id via the Falcon endpoint, so this is limited
2) The preferred method is to just pass in the app parameter to the AsyncResult function:
task = result.AsyncResult(id, app=app)
Hope this helps!
I don't know how to reproduce my problem in a simple way.
I have an actor that executes external command by 'sys.process' package.
object FileHelper {
def downloadFile(url: String, filename: String): Either[String, Unit] = {
println(s"MyThread: ${Thread.currentThread().getName}")
util.Try {
import scala.language.postfixOps
new URL(url) #> new File(filename) !
} match {
case util.Failure(err) => Left(s"Download error: $err")
case util.Success(code) => if (code != 0) Left("Can't download file") else Right({})
}
}
}
So when i call dowloadFile within actor Try statement doesn't work!
router MyThread: app-akka.actor.default-dispatcher-3
router[ERROR] Exception in thread "Thread-10" java.io.FileNotFoundException: /home/alex/dumpss/456.tar.bz2 (No such file or directory)
router[ERROR] at java.io.FileOutputStream.open0(Native Method)
router[ERROR] at java.io.FileOutputStream.open(FileOutputStream.java:270)
router[ERROR] at java.io.FileOutputStream.<init>(FileOutputStream.java:213)
router[ERROR] at scala.sys.process.ProcessBuilderImpl$FileOutput$$anonfun$$lessinit$greater$3.apply(ProcessBuilderImpl.scala:33)
router[ERROR] at scala.sys.process.ProcessBuilderImpl$FileOutput$$anonfun$$lessinit$greater$3.apply(ProcessBuilderImpl.scala:33)
router[ERROR] at scala.sys.process.ProcessBuilderImpl$OStreamBuilder$$anonfun$$lessinit$greater$4.apply(ProcessBuilderImpl.scala:38)
router[ERROR] at scala.sys.process.ProcessBuilderImpl$OStreamBuilder$$anonfun$$lessinit$greater$4.apply(ProcessBuilderImpl.scala:38)
router[ERROR] at scala.sys.process.ProcessBuilderImpl$ThreadBuilder$$anonfun$1.apply$mcV$sp(ProcessBuilderImpl.scala:58)
router[ERROR] at scala.sys.process.ProcessImpl$Spawn$$anon$1.run(ProcessImpl.scala:23)
As you see external command has been executed in thread 'Thread-10' but Try is catching exception in 'app-akka.actor.default-dispatcher-3'.
With scala process api, url downloading and file redirection are implemented by threads instead of real processes: https://github.com/scala/scala/blob/2.12.x/src/library/scala/sys/process/ProcessBuilderImpl.scala#L31-L64
So, when this line gets executed,
new URL(url) #> new File(filename) !
two more threads is spawned, one for downloading the url and writing the result to the pipe, the other reads from the pipe and writes whatever it reads to the file. And the parent thread (in which the actor is running) waits for their exit values, and returns either of them accordingly: https://github.com/scala/scala/blob/2.12.x/src/library/scala/sys/process/ProcessImpl.scala#L151
Unfortunately, exit value for file redirection is always ignored. So you cannot tell whether the operation succeeds or not by checking the return code of the pipe. https://github.com/scala/scala/blob/2.12.x/src/library/scala/sys/process/ProcessBuilderImpl.scala#L39
Instead of using scala process api, you can do the work with the help of commons-io library:
Try {
IOUtils.copy(url.openStream, new FileOutputStream(file))
} match {
case Success(_) => ...
case Failure(ex) => ...
}
I just try to write a simple spec like this:
"saves the record on create" in {
val request = FakeRequest(POST, "/countries").withJsonBody(Json.parse("""{ "country": {"title":"Germany", "abbreviation":"GER"} }"""))
val create = route(app, request).get
status(create) mustBe OK
contentType(create) mustBe Some("application/json")
contentAsString(create) must include("country")
}
But on execution it throws such an error:
java.util.concurrent.RejectedExecutionException: Task slick.backend.DatabaseComponent$DatabaseDef$$anon$2#f456097 rejected from java.util.concurrent.ThreadPoolExecutor#6265d40c[Terminated, pool size = 0, active threads = 0, queued tasks = 0, completed tasks = 1]
It works good for get request test for index page, any ideas how to workaround this ?
The problem was OneAppPerTest since problems with DB connections: just replacing it to OneAppPerSuitesolves the problem
I want to write a task that is only executable from within a given queue - if somebody tries to pass a different queue into the routing_key parameter of apply_async I want to raise an exception. How do I do this?
You could write your own task that would check to make sure a valid routing key is being passed in when apply_async is being called. You can also apply this to queues. Set up routes and queues in your config:
import celery
from kombu import Queue, Exchange
app = celery.Celery('app')
app.conf.CELERY_QUEUES = (
Queue('add', Exchange('default'), routing_key='good'),
)
app.conf.CELERY_ROUTES = {
'app.add': {
'queue': 'add',
'routing_key': 'good'
}
}
Now, create your own Task class that will perform the check on the routing key. You'll need to override apply_async:
class RouteCheckerTask(celery.Task):
abstract = True
def apply_async(self, args=None, kwargs=None, task_id=None, producer=None,
link=None, link_error=None, **options):
app = self._get_app()
routing_key = options.get('routing_key', None)
if routing_key:
valid_routes = [v['routing_key'] for k, v in app.conf.CELERY_ROUTES.items()]
is_valid = routing_key in valid_routes
if not is_valid:
raise NotImplementedError('{} is not a valid routing key. Options are: {}'.format(routing_key, valid_routes))
if app.conf.CELERY_ALWAYS_EAGER:
return self.apply(args, kwargs, task_id=task_id or uuid(), link=link, link_error=link_error, **options)
# add 'self' if this is a "task_method".
if self.__self__ is not None:
args = args if isinstance(args, tuple) else tuple(args or ())
args = (self.__self__, ) + args
return app.send_task(
self.name, args, kwargs, task_id=task_id, producer=producer,
link=link, link_error=link_error, result_cls=self.AsyncResult,
**dict(self._get_exec_options(), **options)
)
Base your tasks from this one and call apply_async normally:
#app.task(base=RouteCheckerTask)
def add(x, y):
return x + y
# Fails
add.apply_async([1, 2], routing_key='bad')
# Passes
add.apply_async([1, 2], routing_key='good')