diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..fbef003 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,60 @@ +name: CI + +on: [pull_request] + +jobs: + build: + + runs-on: ubuntu-latest + + strategy: + matrix: + python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + django: ["4.2", "5.0", "5.1"] + exclude: + - python: "3.9" + django: "5.0" + - python: "3.9" + django: "5.1" + - python: "3.13" + django: "4.2" + - python: "3.13" + django: "5.0" + database_url: + - postgres://runner:password@localhost/project + - mysql://root:root@127.0.0.1/project + - 'sqlite:///:memory:' + + services: + postgres: + image: postgres + ports: + - 5432:5432 + env: + POSTGRES_DB: project + POSTGRES_USER: runner + POSTGRES_PASSWORD: password + + env: + DATABASE_URL: ${{ matrix.database_url }} + + steps: + - name: Start MySQL + run: sudo systemctl start mysql.service + - uses: actions/checkout@v4 + - name: Install system Python build deps for psycopg2 + run: sudo apt-get install python3-dev + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }}.* + - name: Upgraded pip + run: pip install --upgrade pip + - name: Install dependencies + run: pip install -r test-requirements.txt + - name: Install Django + run: pip install -U django~=${{ matrix.django }}.0 + - name: Run tests + run: python manage.py test + - name: Run black + run: black --check django_dbq diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..fd2d1c3 --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,26 @@ +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.8' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7142c9b..0000000 --- a/.travis.yml +++ /dev/null @@ -1,20 +0,0 @@ -language: python -sudo: false -python: -- '2.7' -- '3.4' -env: -- DJANGO_VERSION=1.7 -- DJANGO_VERSION=1.8 -install: -- pip install -r test-requirements.txt -- pip install -U django==$DJANGO_VERSION -script: python manage.py test -deploy: - provider: pypi - user: dabapps - password: - secure: YkRDJO+QK2Rr3AJwmxoghTWCCTZFLfXUlqky/my6g7oeMI5Q/F2WzNBmvr84v069fckobHhlN4hhH/JFEaRnCNuYmPhFdiNQh5M5cP/qhUqqh7LsMiX5mJSfM6yCp+rAL6F+yb5r59t3IQKmXKiFzRm/AuS4nHINDFHXwaPjWTw= - on: - tags: true - repo: dabapps/django-db-queue diff --git a/README.md b/README.md index 55dad27..fb01036 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,44 @@ django-db-queue ========== -[![Build Status](https://travis-ci.org/dabapps/django-db-queue.svg)](https://travis-ci.org/dabapps/django-db-queue) [![pypi release](https://img.shields.io/pypi/v/django-db-queue.svg)](https://pypi.python.org/pypi/django-db-queue) -Simple databased-backed job queue. Jobs are defined in your settings, and are processed by management commands. +Simple database-backed job queue. Jobs are defined in your settings, and are processed by management commands. Asynchronous tasks are run via a *job queue*. This system is designed to support multi-step job workflows. -**NOTE**: This module uses differing implementations of UUIDField on Django 1.7 and 1.8 - a Python 3 shimmed django-uuidfield version on 1.7, and the built-in implementation on Django 1.8 and above. The simplest way to upgrade it is to drop the existing -`django_dbq_job` table, delete the migration from `django_migrations`, and then -re-run `manage.py migrate`. +Supported and tested against: +- Django 4.2, 5.0, 5.1 +- Python 3.9, 3.10, 3.11, 3.12, 3.13 ## Getting Started +### Installation + +Install from PIP + +``` +pip install django-db-queue +``` + +Add `django_dbq` to your installed apps + +```python +INSTALLED_APPS = [ + ..., + "django_dbq", +] +``` + +Run migrations + +``` +manage.py migrate +``` + +### Upgrading from 1.x to 2.x + +Note that version 2.x only supports Django 3.1 or newer. If you need support for Django 2.2, please stick with the latest 1.x release. + ### Describe your job In e.g. project.common.jobs: @@ -20,6 +46,7 @@ In e.g. project.common.jobs: ```python import time + def my_task(job): logger.info("Working hard...") time.sleep(10) @@ -32,24 +59,133 @@ In project.settings: ```python JOBS = { - 'my_job': ['project.common.jobs.my_task'], + "my_job": { + "tasks": ["project.common.jobs.my_task"], + }, +} +``` + +### Hooks + + +#### Failure Hooks +When an unhandled exception is raised by a job, a failure hook will be called if one exists enabling +you to clean up any state left behind by your failed job. Failure hooks are run in your worker process (if your job fails). + +A failure hook receives the failed `Job` instance along with the unhandled exception raised by your failed job as its arguments. Here's an example: + +```python +def my_task_failure_hook(job, e): + ... # clean up after failed job +``` + +To ensure this hook gets run, simply add a `failure_hook` key to your job config like so: + +```python +JOBS = { + "my_job": { + "tasks": ["project.common.jobs.my_task"], + "failure_hook": "project.common.jobs.my_task_failure_hook", + }, } ``` +#### Creation Hooks +You can also run creation hooks, which happen just after the creation of your `Job` instances and are executed in the process +in which the job was created, _not the worker process_. + +A creation hook receives your `Job` instance as its only argument. Here's an example: + +```python +def my_task_creation_hook(job): + ... # configure something before running your job +``` + +To ensure this hook gets run, simply add a `creation_hook` key to your job config like so: + +```python +JOBS = { + "my_job": { + "tasks": ["project.common.jobs.my_task"], + "creation_hook": "project.common.jobs.my_task_creation_hook", + }, +} +``` + +#### Pre & Post Task Hooks +You can also run pre task or post task hooks, which happen in the normal processing of your `Job` instances and are executed inside the worker process. + +Both pre and post task hooks receive your `Job` instance as their only argument. Here's an example: + +```python +def my_pre_task_hook(job): + ... # configure something before running your task +``` + +To ensure these hooks are run, simply add a `pre_task_hook` or `post_task_hook` key (or both, if needed) to your job config like so: + +```python +JOBS = { + "my_job": { + "tasks": ["project.common.jobs.my_task"], + "pre_task_hook": "project.common.jobs.my_pre_task_hook", + "post_task_hook": "project.common.jobs.my_post_task_hook", + }, +} +``` + +Notes: + +* If the `pre_task_hook` fails (raises an exception), the task function is not run, and django-db-queue behaves as if the task function itself had failed: the failure hook is called, and the job is goes into the `FAILED` state. +* The `post_task_hook` is always run, even if the job fails. In this case, it runs after the `failure_hook`. +* If the `post_task_hook` raises an exception, this is logged but the the job is **not marked as failed** and the failure hook does not run. This is because the `post_task_hook` might need to perform cleanup that always happens after the task, no matter whether it succeeds or fails. + + ### Start the worker In another terminal: -`python manage.py worker` +``` +python manage.py worker +``` ### Create a job Using the name you configured for your job in your settings, create an instance of Job. ```python -Job.objects.create(name='my_job') +Job.objects.create(name="my_job") +``` + +### Prioritising jobs +Sometimes it is necessary for certain jobs to take precedence over others. For example; you may have a worker which has a primary purpose of dispatching somewhat +important emails to users. However, once an hour, you may need to run a _really_ important job which needs to be done on time and cannot wait in the queue for dozens +of emails to be dispatched before it can begin. + +In order to make sure that an important job is run before others, you can set the `priority` field to an integer higher than `0` (the default). For example: + +```python +Job.objects.create(name="normal_job") +Job.objects.create(name="important_job", priority=1) +Job.objects.create(name="critical_job", priority=2) +``` + +Jobs will be ordered by their `priority` (highest to lowest) and then the time which they were created (oldest to newest) and processed in that order. + +### Scheduling jobs +If you'd like to create a job but have it run at some time in the future, you can use the `run_after` field on the Job model: + +```python +Job.objects.create( + name="scheduled_job", + run_after=(timezone.now() + timedelta(minutes=10)), +) ``` +Of course, the scheduled job will only be run if your `python manage.py worker` process is running at the time when the job is scheduled to run. Otherwise, it will run the next time you start your worker process after that time has passed. + +It's also worth noting that, by default, scheduled jobs run as part of the same queue as all other jobs, and so if a job is already being processed at the time when your scheduled job is due to run, it won't run until that job has finished. If increased precision is important, you might consider using the `queue_name` feature to run a separate worker dedicated to only running scheduled jobs. + ## Terminology ### Job @@ -60,24 +196,49 @@ The top-level abstraction of a standalone piece of work. Jobs are stored in the Jobs are processed to completion by *tasks*. These are simply Python functions, which must take a single argument - the `Job` instance being processed. A single job will often require processing by more than one task to be completed fully. Creating the task functions is the responsibility of the developer. For example: - def my_task(job): - logger.info("Doing some hard work") - do_some_hard_work() +```python +def my_task(job): + logger.info("Doing some hard work") + do_some_hard_work() +``` ### Workspace -The *workspace* is an area that tasks within a single job can use to communicate with each other. It is implemented as a Python dictionary, available on the `job` instance passed to tasks as `job.workspace`. The initial workspace of a job can be empty, or can contain some parameters that the tasks require (for example, API access tokens, account IDs etc). A single task can edit the workspace, and the modified workspace will be passed on to the next task in the sequence. For example: +The *workspace* is an area that can be used 1) to provide additional arguments to task functions, and 2) to categorize jobs with additional metadata. It is implemented as a Python dictionary, available on the `job` instance passed to tasks as `job.workspace`. The initial workspace of a job can be empty, or can contain some parameters that the tasks require (for example, API access tokens, account IDs etc). - def my_first_task(job): - job.workspace['message'] = 'Hello, task 2!' +When creating a Job, the workspace is passed as a keyword argument: - def my_second_task(job): - logger.info("Task 1 says: %s" % job.workspace['message']) +```python +Job.objects.create(name="my_job", workspace={"key": value}) +``` -When creating a Job, the workspace is passed as a keyword argument: +Then, the task function can access the workspace to get the data it needs to perform its task: + +```python +def my_task(job): + cats_import = CatsImport.objects.get(pk=job.workspace["cats_import_id"]) +``` + +Tasks within a single job can use the workspace to communicate with each other. A single task can edit the workspace, and the modified workspace will be passed on to the next task in the sequence. For example: + +```python +def my_first_task(job): + job.workspace['message'] = 'Hello, task 2!' + +def my_second_task(job): + logger.info("Task 1 says: %s" % job.workspace['message']) +``` + +The workspace can be queried like any [JSONField](https://docs.djangoproject.com/en/3.2/topics/db/queries/#querying-jsonfield). For instance, if you wanted to display a list of jobs that a certain user had initiated, add `user_id` to the workspace when creating the job: + +```python +Job.objects.create(name="foo", workspace={"user_id": request.user.id}) +``` + +Then filter the query with it in the view that renders the list: ```python -Job.objects.create(name='my_job', workspace={'key': value}) +user_jobs = Job.objects.filter(workspace__user_id=request.user.id) ``` ### Worker process @@ -88,9 +249,11 @@ A *worker process* is a long-running process, implemented as a Django management Jobs are configured in the Django `settings.py` file. The `JOBS` setting is a dictionary mapping a *job name* (eg `import_cats`) to a *list* of one or more task function paths. For example: - JOBS = { - 'import_cats': ['apps.cat_importer.import_cats.step_one', 'apps.cat_importer.import_cats.step_two'], - } +```python +JOBS = { + 'import_cats': ['apps.cat_importer.import_cats.step_one', 'apps.cat_importer.import_cats.step_two'], +} +``` ### Job states @@ -99,32 +262,79 @@ Jobs have a `state` field which can have one of the following values: * `NEW` (has been created, waiting for a worker process to run the next task) * `READY` (has run a task before, awaiting a worker process to run the next task) * `PROCESSING` (a task is currently being processed by a worker) +* `STOPPING` (the worker process has received a signal from the OS requesting it to exit) * `COMPLETED` (all job tasks have completed successfully) * `FAILED` (a job task failed) +#### State diagram + +![state diagram](states.png) + ### API +#### Model methods + +##### Job.get_queue_depths +If you need to programatically get the depth of any queue you can run the following: +```python +from django_dbq.models import Job + +... + +Job.objects.create(name="do_work", workspace={}) +Job.objects.create(name="do_other_work", queue_name="other_queue", workspace={}) + +queue_depths = Job.get_queue_depths() +print(queue_depths) # {"default": 1, "other_queue": 1} +``` + +You can also exclude jobs which exist but are scheduled to be run in the future from the queue depths, where `run_after` is set to a future time from now. To do this set the `exclude_future_jobs` kwarg like so: +```python +queue_depths = Job.get_queue_depths(exclude_future_jobs=True) +``` + +**Important:** When checking queue depths, do not assume that the key for your queue will always be available. Queue depths of zero won't be included +in the dict returned by this method. + #### Management commands +##### manage.py delete_old_jobs There is a management command, `manage.py delete_old_jobs`, which deletes any jobs from the database which are in state `COMPLETE` or `FAILED` and were -created more than 24 hours ago. This could be run, for example, as a cron task, -to ensure the jobs table remains at a reasonable size. +created more than (by default) 24 hours ago. This could be run, for example, as a cron task, to ensure the jobs table remains at a reasonable size. Use the `--hours` argument to control the age of jobs that will be deleted. + +##### manage.py worker +To start a worker: + +``` +manage.py worker [queue_name] [--rate_limit] +``` -For debugging/development purposes, a simple management command is supplied to create jobs: +- `queue_name` is optional, and will default to `default` +- The `--rate_limit` flag is optional, and will default to `1`. It is the minimum number of seconds that must have elapsed before a subsequent job can be run. - manage.py create_job --queue_name 'my_queue_name' --workspace '{"key": "value"}' +##### manage.py queue_depth +If you'd like to check your queue depth from the command line, you can run `manage.py queue_depth [queue_name [queue_name ...]]` and any +jobs in the "NEW" or "READY" states will be returned. -The `workspace` flag is optional. If supplied, it must be a valid JSON string. +If you wish to exclude jobs which are scheduled to be run in the future you can add `--exclude_future_jobs` to the command. -`queue_name` is optional and defaults to `default` +**Important:** If you misspell or provide a queue name which does not have any jobs, a depth of 0 will always be returned. -To start a worker: +### Gotcha: `bulk_create` - manage.py worker [queue_name] +Because the `Job` model has logic in its `save` method, and because `save` doesn't get called when using `bulk_create`, you can't easily use `bulk_create` to create multiple `Job` instances at the same time. -`queue_name` is optional, and will default to `default` +If you really need to do this, you should be able to get it to work by using `django_dbq.tasks.get_next_task_name` to compute the next task name from the `name` of the job, and then use that value to populate the `next_task` field on each of the unsaved `Job` instances before calling `bulk_create`. Note that if you use the approach, the job's `creation_hook` will not be called. ## Testing It may be necessary to supply a DATABASE_PORT environment variable. + +## Windows support + +Windows is supported on a best-effort basis only, and is not covered by automated or manual testing. + +## Code of conduct + +For guidelines regarding the code of conduct when contributing to this repository please review [https://www.dabapps.com/open-source/code-of-conduct/](https://www.dabapps.com/open-source/code-of-conduct/) diff --git a/django_dbq/__init__.py b/django_dbq/__init__.py index cd7ca49..88c513e 100644 --- a/django_dbq/__init__.py +++ b/django_dbq/__init__.py @@ -1 +1 @@ -__version__ = '1.0.1' +__version__ = "3.3.0" diff --git a/django_dbq/fields.py b/django_dbq/fields.py deleted file mode 100644 index 6991a8d..0000000 --- a/django_dbq/fields.py +++ /dev/null @@ -1,7 +0,0 @@ -from uuidfield import UUIDField -from django.db.models import SubfieldBase -from django.utils import six - - -class UUIDField(six.with_metaclass(SubfieldBase, UUIDField)): - pass diff --git a/django_dbq/management/commands/create_job.py b/django_dbq/management/commands/create_job.py deleted file mode 100644 index 93dbb01..0000000 --- a/django_dbq/management/commands/create_job.py +++ /dev/null @@ -1,47 +0,0 @@ -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError -from django_dbq.models import Job -from optparse import make_option -import json -import logging - - -logger = logging.getLogger(__name__) - - -class Command(BaseCommand): - - help = "Create a job" - args = '' - - option_list = BaseCommand.option_list + ( - make_option('--workspace', - help='JSON-formatted initial command workspace'), - make_option('--queue_name', - help='A specific queue to add this job to'), - ) - - def handle(self, *args, **options): - if len(args) != 1: - raise CommandError("Please supply a single job name") - - name = args[0] - if name not in settings.JOBS: - raise CommandError('"%s" is not a valid job name' % name) - - workspace = options['workspace'] - if workspace: - workspace = json.loads(workspace) - - queue_name = options['queue_name'] - - kwargs = { - 'name': name, - 'workspace': workspace, - } - - if queue_name: - kwargs['queue_name'] = queue_name - - job = Job.objects.create(**kwargs) - self.stdout.write('Created job: "%s", id=%s for queue "%s"' % (job.name, job.pk, queue_name if queue_name else 'default')) diff --git a/django_dbq/management/commands/delete_old_jobs.py b/django_dbq/management/commands/delete_old_jobs.py index 4c512d9..1bdc072 100644 --- a/django_dbq/management/commands/delete_old_jobs.py +++ b/django_dbq/management/commands/delete_old_jobs.py @@ -6,6 +6,15 @@ class Command(BaseCommand): help = "Delete old jobs" + def add_arguments(self, parser): + parser.add_argument( + "--hours", + help="Delete jobs older than this many hours", + default=None, + required=False, + type=int, + ) + def handle(self, *args, **options): - Job.objects.delete_old() - self.stdout.write('Deleted old jobs') + Job.objects.delete_old(hours=options["hours"]) + self.stdout.write("Deleted old jobs") diff --git a/django_dbq/management/commands/queue_depth.py b/django_dbq/management/commands/queue_depth.py new file mode 100644 index 0000000..cb8b6fd --- /dev/null +++ b/django_dbq/management/commands/queue_depth.py @@ -0,0 +1,31 @@ +from django.core.management.base import BaseCommand +from django_dbq.models import Job + + +class Command(BaseCommand): + + help = "Print the current depth of the given queue" + + def add_arguments(self, parser): + parser.add_argument("queue_name", nargs="*", default=["default"], type=str) + parser.add_argument("--exclude_future_jobs", default=False, type=bool) + + def handle(self, *args, **options): + queue_names = options["queue_name"] + queue_depths = Job.get_queue_depths( + exclude_future_jobs=options["exclude_future_jobs"] + ) + + queue_depths_string = " ".join( + [ + "{queue_name}={queue_depth}".format( + queue_name=queue_name, + queue_depth=queue_depths.get(queue_name, 0), + ) + for queue_name in queue_names + ] + ) + + self.stdout.write( + "event=queue_depths {queue_depths}".format(queue_depths=queue_depths_string) + ) diff --git a/django_dbq/management/commands/worker.py b/django_dbq/management/commands/worker.py index 326a030..d166b8d 100644 --- a/django_dbq/management/commands/worker.py +++ b/django_dbq/management/commands/worker.py @@ -1,83 +1,133 @@ from django.db import transaction from django.core.management.base import BaseCommand, CommandError -from django.utils.module_loading import import_by_path +from django.utils import timezone +from django.utils.module_loading import import_string from django_dbq.models import Job -from optparse import make_option -from simplesignals.process import WorkerProcessBase from time import sleep import logging +import signal logger = logging.getLogger(__name__) -DEFAULT_QUEUE_NAME = 'default' +DEFAULT_QUEUE_NAME = "default" -def process_job(queue_name): - """This function grabs the next available job for a given queue, and runs its next task.""" - - with transaction.atomic(): - job = Job.objects.get_ready_or_none(queue_name) - if not job: - return - - logger.info('Processing job: name="%s" queue="%s" id=%s state=%s next_task=%s', job.name, queue_name, job.pk, job.state, job.next_task) - job.state = Job.STATES.PROCESSING - job.save() +class Worker: + def __init__(self, name, rate_limit_in_seconds): + self.queue_name = name + self.rate_limit_in_seconds = rate_limit_in_seconds + self.alive = True + self.last_job_finished = None + self.current_job = None + self.init_signals() - try: - task_function = import_by_path(job.next_task) - task_function(job) - job.update_next_task() - if not job.next_task: - job.state = Job.STATES.COMPLETE - else: - job.state = Job.STATES.READY - except Exception as exception: - logger.exception("Job id=%s failed", job.pk) - job.state = Job.STATES.FAILED + def init_signals(self): + signal.signal(signal.SIGINT, self.shutdown) - failure_hook_name = job.get_failure_hook_name() - if failure_hook_name: - logger.info("Running failure hook %s for job id=%s", failure_hook_name, job.pk) - failure_hook_function = import_by_path(failure_hook_name) - failure_hook_function(job, exception) - else: - logger.info("No failure hook for job id=%s", job.pk) + # for Windows, which doesn't support the SIGQUIT signal + if hasattr(signal, "SIGQUIT"): + signal.signal(signal.SIGQUIT, self.shutdown) - logger.info('Updating job: name="%s" id=%s state=%s next_task=%s', job.name, job.pk, job.state, job.next_task or 'none') + signal.signal(signal.SIGTERM, self.shutdown) - try: - job.save() - except: - logger.error('Failed to save job: id=%s org=%s', job.pk, job.workspace.get('organisation_id')) - raise + def shutdown(self, signum, frame): + self.alive = False + if self.current_job: + self.current_job.state = Job.STATES.STOPPING + self.current_job.save(update_fields=["state"]) + def run(self): + while self.alive: + self.process_job() -class Worker(WorkerProcessBase): + def process_job(self): + sleep(1) + if ( + self.last_job_finished + and (timezone.now() - self.last_job_finished).total_seconds() + < self.rate_limit_in_seconds + ): + return - process_title = "jobworker" + self._process_job() + + self.last_job_finished = timezone.now() + + def _process_job(self): + with transaction.atomic(): + job = Job.objects.get_ready_or_none(self.queue_name) + if not job: + return + + logger.info( + 'Processing job: name="%s" queue="%s" id=%s state=%s next_task=%s', + job.name, + self.queue_name, + job.pk, + job.state, + job.next_task, + ) + job.state = Job.STATES.PROCESSING + job.save() + self.current_job = job + + try: + job.run_pre_task_hook() + job.run_next_task() + job.update_next_task() + + if not job.next_task: + job.state = Job.STATES.COMPLETE + else: + job.state = Job.STATES.READY + except Exception as exception: + logger.exception("Job id=%s failed", job.pk) + job.state = Job.STATES.FAILED + job.run_failure_hook(exception) + finally: + try: + job.run_post_task_hook() + except: + logger.exception("Job id=%s post_task_hook failed", job.pk) + + logger.info( + 'Updating job: name="%s" id=%s state=%s next_task=%s', + job.name, + job.pk, + job.state, + job.next_task or "none", + ) - def __init__(self, name): - self.queue_name = name - super(Worker, self).__init__() + try: + job.save() + except: + logger.exception("Failed to save job: id=%s", job.pk) + raise - def do_work(self): - sleep(1) - process_job(self.queue_name) + self.current_job = None class Command(BaseCommand): help = "Run a queue worker process" - option_list = BaseCommand.option_list + ( - make_option('--dry-run', - action='store_true', - dest='dry_run', + def add_arguments(self, parser): + parser.add_argument("queue_name", nargs="?", default="default", type=str) + parser.add_argument( + "--rate_limit", + help="The rate limit in seconds. The default rate limit is 1 job per second.", + nargs="?", + default=1, + type=int, + ) + parser.add_argument( + "--dry-run", + action="store_true", + dest="dry_run", default=False, - help="Don't actually start the worker. Used for testing."), + help="Don't actually start the worker. Used for testing.", ) def handle(self, *args, **options): @@ -87,13 +137,17 @@ def handle(self, *args, **options): if len(args) != 1: raise CommandError("Please supply a single queue job name") - queue_name = args[0] + queue_name = options["queue_name"] + rate_limit_in_seconds = options["rate_limit"] - self.stdout.write("Starting job worker for queue \"%s\"" % queue_name) + self.stdout.write( + 'Starting job worker for queue "%s" with rate limit of one job per %s second(s)' + % (queue_name, rate_limit_in_seconds) + ) - worker = Worker(queue_name) + worker = Worker(queue_name, rate_limit_in_seconds) - if options['dry_run']: + if options["dry_run"]: return worker.run() diff --git a/django_dbq/migrations/0001_initial.py b/django_dbq/migrations/0001_initial.py index 4075219..4d63fb3 100644 --- a/django_dbq/migrations/0001_initial.py +++ b/django_dbq/migrations/0001_initial.py @@ -2,35 +2,55 @@ from __future__ import unicode_literals from django.db import models, migrations -import jsonfield.fields import uuid -try: - from django.db.models import UUIDField -except ImportError: - from django_dbq.fields import UUIDField +from django.db.models import UUIDField class Migration(migrations.Migration): - dependencies = [ - ] + dependencies = [] operations = [ migrations.CreateModel( - name='Job', + name="Job", fields=[ - ('id', UUIDField(serialize=False, editable=False, default=uuid.uuid4, primary_key=True)), - ('created', models.DateTimeField(db_index=True, auto_now_add=True)), - ('modified', models.DateTimeField(auto_now=True)), - ('name', models.CharField(max_length=100)), - ('state', models.CharField(db_index=True, max_length=20, default='NEW', choices=[('NEW', 'NEW'), ('READY', 'READY'), ('PROCESSING', 'PROCESSING'), ('FAILED', 'FAILED'), ('COMPLETE', 'COMPLETE')])), - ('next_task', models.CharField(max_length=100, blank=True)), - ('workspace', jsonfield.fields.JSONField(null=True)), - ('queue_name', models.CharField(db_index=True, max_length=20, default='default')), + ( + "id", + UUIDField( + serialize=False, + editable=False, + default=uuid.uuid4, + primary_key=True, + ), + ), + ("created", models.DateTimeField(db_index=True, auto_now_add=True)), + ("modified", models.DateTimeField(auto_now=True)), + ("name", models.CharField(max_length=100)), + ( + "state", + models.CharField( + db_index=True, + max_length=20, + default="NEW", + choices=[ + ("NEW", "NEW"), + ("READY", "READY"), + ("PROCESSING", "PROCESSING"), + ("FAILED", "FAILED"), + ("COMPLETE", "COMPLETE"), + ], + ), + ), + ("next_task", models.CharField(max_length=100, blank=True)), + ("workspace", models.TextField(null=True)), + ( + "queue_name", + models.CharField(db_index=True, max_length=20, default="default"), + ), ], options={ - 'ordering': ['-created'], + "ordering": ["-created"], }, ), ] diff --git a/django_dbq/migrations/0002_auto_20151016_1027.py b/django_dbq/migrations/0002_auto_20151016_1027.py index a9cf0b4..9769061 100644 --- a/django_dbq/migrations/0002_auto_20151016_1027.py +++ b/django_dbq/migrations/0002_auto_20151016_1027.py @@ -7,12 +7,12 @@ class Migration(migrations.Migration): dependencies = [ - ('django_dbq', '0001_initial'), + ("django_dbq", "0001_initial"), ] operations = [ migrations.AlterModelOptions( - name='job', - options={'ordering': ['created']}, + name="job", + options={"ordering": ["created"]}, ), ] diff --git a/django_dbq/migrations/0003_auto_20180713_1000.py b/django_dbq/migrations/0003_auto_20180713_1000.py new file mode 100644 index 0000000..78a09ed --- /dev/null +++ b/django_dbq/migrations/0003_auto_20180713_1000.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2018-07-13 10:00 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("django_dbq", "0002_auto_20151016_1027"), + ] + + operations = [ + migrations.AlterModelOptions( + name="job", + options={"ordering": ["-priority", "created"]}, + ), + migrations.AddField( + model_name="job", + name="priority", + field=models.SmallIntegerField(db_index=True, default=0), + ), + ] diff --git a/django_dbq/migrations/0004_auto_20210818_0247.py b/django_dbq/migrations/0004_auto_20210818_0247.py new file mode 100644 index 0000000..b62ab02 --- /dev/null +++ b/django_dbq/migrations/0004_auto_20210818_0247.py @@ -0,0 +1,34 @@ +# Generated by Django 3.2rc1 on 2021-08-18 02:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("django_dbq", "0003_auto_20180713_1000"), + ] + + operations = [ + migrations.AlterField( + model_name="job", + name="state", + field=models.CharField( + choices=[ + ("NEW", "New"), + ("READY", "Ready"), + ("PROCESSING", "Processing"), + ("FAILED", "Failed"), + ("COMPLETE", "Complete"), + ], + db_index=True, + default="NEW", + max_length=20, + ), + ), + migrations.AlterField( + model_name="job", + name="workspace", + field=models.JSONField(null=True), + ), + ] diff --git a/django_dbq/migrations/0005_job_run_after.py b/django_dbq/migrations/0005_job_run_after.py new file mode 100644 index 0000000..67a2c0d --- /dev/null +++ b/django_dbq/migrations/0005_job_run_after.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2rc1 on 2021-11-04 03:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("django_dbq", "0004_auto_20210818_0247"), + ] + + operations = [ + migrations.AddField( + model_name="job", + name="run_after", + field=models.DateTimeField(db_index=True, null=True), + ), + ] diff --git a/django_dbq/migrations/0006_alter_job_state.py b/django_dbq/migrations/0006_alter_job_state.py new file mode 100644 index 0000000..e7c51cb --- /dev/null +++ b/django_dbq/migrations/0006_alter_job_state.py @@ -0,0 +1,30 @@ +# Generated by Django 3.2rc1 on 2021-11-29 04:48 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("django_dbq", "0005_job_run_after"), + ] + + operations = [ + migrations.AlterField( + model_name="job", + name="state", + field=models.CharField( + choices=[ + ("NEW", "New"), + ("READY", "Ready"), + ("PROCESSING", "Processing"), + ("STOPPING", "Stopping"), + ("FAILED", "Failed"), + ("COMPLETE", "Complete"), + ], + db_index=True, + default="NEW", + max_length=20, + ), + ), + ] diff --git a/django_dbq/models.py b/django_dbq/models.py index f9778a1..a90354f 100644 --- a/django_dbq/models.py +++ b/django_dbq/models.py @@ -1,26 +1,26 @@ from django.db import models -from django.utils.module_loading import import_by_path -from django_dbq.tasks import get_next_task_name, get_failure_hook_name, get_creation_hook_name -from jsonfield import JSONField -from model_utils import Choices +from django.utils import timezone +from django.utils.module_loading import import_string +from django_dbq.tasks import ( + get_next_task_name, + get_pre_task_hook_name, + get_post_task_hook_name, + get_failure_hook_name, + get_creation_hook_name, +) +from django.db.models import JSONField, UUIDField, Count, TextChoices, Q import datetime import logging import uuid -try: - from django.db.models import UUIDField -except ImportError: - from django_dbq.fields import UUIDField - logger = logging.getLogger(__name__) -DELETE_JOBS_AFTER_HOURS = 24 +DEFAULT_DELETE_JOBS_AFTER_HOURS = 24 class JobManager(models.Manager): - def get_ready_or_none(self, queue_name, max_retries=3): """ Get a job in state READY or NEW for a given queue. Supports retrying in case of database deadlock @@ -40,58 +40,103 @@ def get_ready_or_none(self, queue_name, max_retries=3): retries_left = max_retries while True: try: - return self.select_for_update().filter(queue_name=queue_name, state__in=(Job.STATES.READY, Job.STATES.NEW)).first() + return self.to_process(queue_name).first() except Exception as e: if retries_left == 0: raise retries_left -= 1 - logger.warn("Caught %s when looking for a READY job, retrying %s more times", str(e), retries_left) + logger.warn( + "Caught %s when looking for a READY job, retrying %s more times", + str(e), + retries_left, + ) - def delete_old(self): + def delete_old(self, hours=None): """ - Delete all jobs older than DELETE_JOBS_AFTER_HOURS + Delete all jobs older than hours, or DEFAULT_DELETE_JOBS_AFTER_HOURS """ - delete_jobs_in_states = [Job.STATES.FAILED, Job.STATES.COMPLETE] - delete_jobs_created_before = datetime.datetime.utcnow() - datetime.timedelta(hours=DELETE_JOBS_AFTER_HOURS) - logger.info("Deleting all job in states %s created before %s", ", ".join(delete_jobs_in_states), delete_jobs_created_before.isoformat()) - Job.objects.filter(state__in=delete_jobs_in_states, created__lte=delete_jobs_created_before).delete() + delete_jobs_in_states = [ + Job.STATES.FAILED, + Job.STATES.COMPLETE, + Job.STATES.STOPPING, + ] + delete_jobs_created_before = timezone.now() - datetime.timedelta( + hours=hours or DEFAULT_DELETE_JOBS_AFTER_HOURS + ) + logger.info( + "Deleting all job in states %s created before %s", + ", ".join(delete_jobs_in_states), + delete_jobs_created_before.isoformat(), + ) + Job.objects.filter( + state__in=delete_jobs_in_states, created__lte=delete_jobs_created_before + ).delete() + + def to_process(self, queue_name): + return self.select_for_update().filter( + models.Q(queue_name=queue_name) + & models.Q(state__in=(Job.STATES.READY, Job.STATES.NEW)) + & models.Q( + models.Q(run_after__isnull=True) + | models.Q(run_after__lte=timezone.now()) + ) + ) class Job(models.Model): - - STATES = Choices("NEW", "READY", "PROCESSING", "FAILED", "COMPLETE") + class STATES(TextChoices): + NEW = "NEW" + READY = "READY" + PROCESSING = "PROCESSING" + STOPPING = "STOPPING" + FAILED = "FAILED" + COMPLETE = "COMPLETE" id = UUIDField(primary_key=True, default=uuid.uuid4, editable=False) created = models.DateTimeField(auto_now_add=True, db_index=True) modified = models.DateTimeField(auto_now=True) name = models.CharField(max_length=100) - state = models.CharField(max_length=20, choices=STATES, default=STATES.NEW, db_index=True) + state = models.CharField( + max_length=20, choices=STATES.choices, default=STATES.NEW, db_index=True + ) next_task = models.CharField(max_length=100, blank=True) workspace = JSONField(null=True) - queue_name = models.CharField(max_length=20, default='default', db_index=True) + queue_name = models.CharField(max_length=20, default="default", db_index=True) + priority = models.SmallIntegerField(default=0, db_index=True) + run_after = models.DateTimeField(null=True, db_index=True) class Meta: - ordering = ['created'] + ordering = ["-priority", "created"] objects = JobManager() def save(self, *args, **kwargs): - is_new = not Job.objects.filter(pk=self.pk).exists() - - if is_new: + if self._state.adding: self.next_task = get_next_task_name(self.name) self.workspace = self.workspace or {} try: self.run_creation_hook() except Exception as exception: # noqa - logger.exception("Failed to create new job, creation hook raised an exception") + logger.exception( + "Failed to create new job, creation hook raised an exception" + ) return # cancel the save - return super(Job, self).save(*args, **kwargs) + return super().save(*args, **kwargs) def update_next_task(self): - self.next_task = get_next_task_name(self.name, self.next_task) or '' + self.next_task = get_next_task_name(self.name, self.next_task) or "" + + def run_next_task(self): + next_task_function = import_string(self.next_task) + next_task_function(self) + + def get_pre_task_hook_name(self): + return get_pre_task_hook_name(self.name) + + def get_post_task_hook_name(self): + return get_post_task_hook_name(self.name) def get_failure_hook_name(self): return get_failure_hook_name(self.name) @@ -99,10 +144,51 @@ def get_failure_hook_name(self): def get_creation_hook_name(self): return get_creation_hook_name(self.name) + def run_pre_task_hook(self): + pre_task_hook_name = self.get_pre_task_hook_name() + if pre_task_hook_name: + logger.info("Running pre_task hook %s for job", pre_task_hook_name) + pre_task_hook_function = import_string(pre_task_hook_name) + pre_task_hook_function(self) + + def run_post_task_hook(self): + post_task_hook_name = self.get_post_task_hook_name() + if post_task_hook_name: + logger.info("Running post_task hook %s for job", post_task_hook_name) + post_task_hook_function = import_string(post_task_hook_name) + post_task_hook_function(self) + + def run_failure_hook(self, exception): + failure_hook_name = self.get_failure_hook_name() + if failure_hook_name: + logger.info("Running failure hook %s for job", failure_hook_name) + failure_hook_function = import_string(failure_hook_name) + failure_hook_function(self, exception) + def run_creation_hook(self): creation_hook_name = self.get_creation_hook_name() if creation_hook_name: - logger.info("Running creation hook %s for new job", creation_hook_name) - creation_hook_function = import_by_path(creation_hook_name) + logger.info("Running creation hook %s for job", creation_hook_name) + creation_hook_function = import_string(creation_hook_name) creation_hook_function(self) + @staticmethod + def get_queue_depths(*, exclude_future_jobs=False): + jobs_waiting_in_queue = Job.objects.filter( + state__in=(Job.STATES.READY, Job.STATES.NEW) + ) + if exclude_future_jobs: + jobs_waiting_in_queue = jobs_waiting_in_queue.filter( + Q(run_after__isnull=True) | Q(run_after__lte=timezone.now()) + ) + + annotation_dicts = ( + jobs_waiting_in_queue.values("queue_name") + .order_by("queue_name") + .annotate(Count("queue_name")) + ) + + return { + annotation_dict["queue_name"]: annotation_dict["queue_name__count"] + for annotation_dict in annotation_dicts + } diff --git a/django_dbq/serializers.py b/django_dbq/serializers.py deleted file mode 100644 index 1a8a7e7..0000000 --- a/django_dbq/serializers.py +++ /dev/null @@ -1,29 +0,0 @@ -from django.conf import settings -from django_dbq.models import Job -from rest_framework import serializers -import json - - -class JobSerializer(serializers.Serializer): - name = serializers.ChoiceField() - created = serializers.DateTimeField(read_only=True) - modified = serializers.DateTimeField(read_only=True) - state = serializers.CharField(read_only=True) - workspace = serializers.WritableField(required=False) - url = serializers.HyperlinkedIdentityField(view_name='job_detail') - - def __init__(self, *args, **kwargs): - super(JobSerializer, self).__init__(*args, **kwargs) - self.fields['name'].choices = ((key, key) for key in settings.JOBS) - - def validate_workspace(self, attrs, source): - workspace = attrs.get('workspace') - if workspace and isinstance(workspace, basestring): - try: - attrs['workspace'] = json.loads(workspace) - except ValueError: - raise serializers.ValidationError("Invalid JSON") - return attrs - - def restore_object(self, attrs, instance=None): - return Job(**attrs) diff --git a/django_dbq/tasks.py b/django_dbq/tasks.py index b7a7ad2..a95b4a5 100644 --- a/django_dbq/tasks.py +++ b/django_dbq/tasks.py @@ -1,9 +1,11 @@ from django.conf import settings -TASK_LIST_KEY = 'tasks' -FAILURE_HOOK_KEY = 'failure_hook' -CREATION_HOOK_KEY = 'creation_hook' +TASK_LIST_KEY = "tasks" +PRE_TASK_HOOK_KEY = "pre_task_hook" +POST_TASK_HOOK_KEY = "post_task_hook" +FAILURE_HOOK_KEY = "failure_hook" +CREATION_HOOK_KEY = "creation_hook" def get_next_task_name(job_name, current_task=None): @@ -24,6 +26,16 @@ def get_next_task_name(job_name, current_task=None): return None +def get_pre_task_hook_name(job_name): + """Return the name of the pre task hook for the given job (as a string) or None""" + return settings.JOBS[job_name].get(PRE_TASK_HOOK_KEY) + + +def get_post_task_hook_name(job_name): + """Return the name of the post_task hook for the given job (as a string) or None""" + return settings.JOBS[job_name].get(POST_TASK_HOOK_KEY) + + def get_failure_hook_name(job_name): """Return the name of the failure hook for the given job (as a string) or None""" return settings.JOBS[job_name].get(FAILURE_HOOK_KEY) diff --git a/django_dbq/tests.py b/django_dbq/tests.py index 654e6ea..200623b 100644 --- a/django_dbq/tests.py +++ b/django_dbq/tests.py @@ -1,14 +1,16 @@ -from datetime import datetime, timedelta -from django.core.management import call_command, CommandError -from django.core.urlresolvers import reverse +from datetime import datetime, timedelta, timezone as datetime_timezone +from unittest import mock + +import freezegun +from django.core.management import call_command from django.test import TestCase from django.test.utils import override_settings -from django_dbq.management.commands.worker import process_job +from django.utils import timezone + +from django_dbq.management.commands.worker import Worker from django_dbq.models import Job -try: - from StringIO import StringIO -except ImportError: - from io import StringIO + +from io import StringIO def test_task(job=None): @@ -16,90 +18,295 @@ def test_task(job=None): def workspace_test_task(job): - input = job.workspace['input'] - job.workspace['output'] = input + '-output' + input = job.workspace["input"] + job.workspace["output"] = input + "-output" def failing_task(job): raise Exception("uh oh") -def failure_hook(job, exception): - job.workspace['output'] = 'failure hook ran' - +def pre_task_hook(job): + job.workspace["output"] = "pre task hook ran" + job.workspace["job_id"] = str(job.id) -def creation_hook(job): - job.workspace['output'] = 'creation hook ran' +def post_task_hook(job): + job.workspace["output"] = "post task hook ran" + job.workspace["job_id"] = str(job.id) -@override_settings(JOBS={'testjob': {'tasks': ['a']}}) -class JobManagementCommandTestCase(TestCase): - - def test_create_job(self): - call_command('create_job', 'testjob', stdout=StringIO()) - job = Job.objects.get() - self.assertEqual(job.name, 'testjob') - self.assertEqual(job.queue_name, 'default') - - def test_create_job_with_workspace(self): - workspace = '{"test": "test"}' - call_command('create_job', 'testjob', workspace=workspace, stdout=StringIO()) - job = Job.objects.get() - self.assertEqual(job.workspace, {'test': 'test'}) - def test_create_job_with_queue_name(self): - call_command('create_job', 'testjob', queue_name='lol', stdout=StringIO()) - job = Job.objects.get() - self.assertEqual(job.name, 'testjob') - self.assertEqual(job.queue_name, 'lol') +def failure_hook(job, exception): + job.workspace["output"] = "failure hook ran" + job.workspace["exception"] = str(exception) + job.workspace["job_id"] = str(job.id) - def test_errors_raised_correctly(self): - with self.assertRaises(CommandError): - call_command('create_job', stdout=StringIO()) - with self.assertRaises(CommandError): - call_command('create_job', 'some_other_job', stdout=StringIO()) +def creation_hook(job): + job.workspace["output"] = "creation hook ran" + job.workspace["job_id"] = str(job.id) -@override_settings(JOBS={'testjob': {'tasks': ['a']}}) +@override_settings(JOBS={"testjob": {"tasks": ["a"]}}) class WorkerManagementCommandTestCase(TestCase): - def test_worker_no_args(self): stdout = StringIO() - call_command('worker', dry_run=True, stdout=stdout) + call_command("worker", dry_run=True, stdout=stdout) output = stdout.getvalue() - self.assertTrue('Starting job worker' in output) - self.assertTrue('default' in output) + self.assertTrue("Starting job worker" in output) + self.assertTrue("default" in output) def test_worker_with_queue_name(self): stdout = StringIO() - call_command('worker', 'test_queue', dry_run=True, stdout=stdout) + call_command("worker", queue_name="test_queue", dry_run=True, stdout=stdout) output = stdout.getvalue() - self.assertTrue('test_queue' in output) + self.assertTrue("test_queue" in output) + + +@freezegun.freeze_time("2025-01-01T12:00:00Z") +@override_settings(JOBS={"testjob": {"tasks": ["a"]}}) +class JobModelMethodTestCase(TestCase): + def test_get_queue_depths(self): + Job.objects.create(name="testjob", queue_name="default") + Job.objects.create(name="testjob", queue_name="testworker") + Job.objects.create( + name="testjob", + queue_name="testworker", + run_after=timezone.make_aware(datetime(2025, 1, 1, 13, 0, 0)), + ) + Job.objects.create( + name="testjob", queue_name="testworker", state=Job.STATES.FAILED + ) + Job.objects.create( + name="testjob", queue_name="testworker", state=Job.STATES.COMPLETE + ) + + queue_depths = Job.get_queue_depths() + self.assertDictEqual(queue_depths, {"default": 1, "testworker": 2}) + + def test_get_queue_depths_exclude_future_jobs(self): + Job.objects.create(name="testjob", queue_name="default") + Job.objects.create(name="testjob", queue_name="testworker") + Job.objects.create( + name="testjob", + queue_name="testworker", + run_after=timezone.make_aware(datetime(2025, 1, 1, 13, 0, 0)), + ) + Job.objects.create( + name="testjob", queue_name="testworker", state=Job.STATES.FAILED + ) + Job.objects.create( + name="testjob", queue_name="testworker", state=Job.STATES.COMPLETE + ) + + queue_depths = Job.get_queue_depths(exclude_future_jobs=True) + self.assertDictEqual(queue_depths, {"default": 1, "testworker": 1}) + + +@freezegun.freeze_time("2025-01-01T12:00:00Z") +@override_settings(JOBS={"testjob": {"tasks": ["a"]}}) +class QueueDepthTestCase(TestCase): + def test_queue_depth(self): + Job.objects.create(name="testjob", state=Job.STATES.FAILED) + Job.objects.create(name="testjob", state=Job.STATES.NEW) + Job.objects.create(name="testjob", state=Job.STATES.FAILED) + Job.objects.create(name="testjob", state=Job.STATES.COMPLETE) + Job.objects.create( + name="testjob", + state=Job.STATES.READY, + run_after=timezone.make_aware(datetime(2025, 1, 1, 13, 0, 0)), + ) + Job.objects.create( + name="testjob", queue_name="testqueue", state=Job.STATES.READY + ) + Job.objects.create( + name="testjob", queue_name="testqueue", state=Job.STATES.READY + ) + stdout = StringIO() + call_command("queue_depth", stdout=stdout) + output = stdout.getvalue() + self.assertEqual(output.strip(), "event=queue_depths default=2") + + def test_queue_depth_exclude_future_jobs(self): + Job.objects.create(name="testjob", state=Job.STATES.FAILED) + Job.objects.create(name="testjob", state=Job.STATES.NEW) + Job.objects.create(name="testjob", state=Job.STATES.FAILED) + Job.objects.create(name="testjob", state=Job.STATES.COMPLETE) + Job.objects.create( + name="testjob", + state=Job.STATES.READY, + run_after=timezone.make_aware(datetime(2025, 1, 1, 13, 0, 0)), + ) + Job.objects.create( + name="testjob", queue_name="testqueue", state=Job.STATES.READY + ) + Job.objects.create( + name="testjob", queue_name="testqueue", state=Job.STATES.READY + ) -@override_settings(JOBS={'testjob': {'tasks': ['a']}}) -class JobTestCase(TestCase): + stdout = StringIO() + call_command("queue_depth", exclude_future_jobs=True, stdout=stdout) + output = stdout.getvalue() + self.assertEqual(output.strip(), "event=queue_depths default=1") + + def test_queue_depth_multiple_queues(self): + + Job.objects.create(name="testjob", state=Job.STATES.FAILED) + Job.objects.create(name="testjob", state=Job.STATES.NEW) + Job.objects.create(name="testjob", state=Job.STATES.FAILED) + Job.objects.create(name="testjob", state=Job.STATES.COMPLETE) + Job.objects.create(name="testjob", state=Job.STATES.READY) + Job.objects.create( + name="testjob", queue_name="testqueue", state=Job.STATES.READY + ) + Job.objects.create( + name="testjob", queue_name="testqueue", state=Job.STATES.READY + ) + + stdout = StringIO() + call_command( + "queue_depth", + queue_name=( + "default", + "testqueue", + ), + stdout=stdout, + ) + output = stdout.getvalue() + self.assertEqual(output.strip(), "event=queue_depths default=2 testqueue=2") + def test_queue_depth_for_queue_with_zero_jobs(self): + stdout = StringIO() + call_command("queue_depth", queue_name=("otherqueue",), stdout=stdout) + output = stdout.getvalue() + self.assertEqual(output.strip(), "event=queue_depths otherqueue=0") + + +@freezegun.freeze_time() +@mock.patch("django_dbq.management.commands.worker.sleep") +class WorkerProcessProcessJobTestCase(TestCase): + def setUp(self): + super().setUp() + self.mock_worker = mock.MagicMock() + self.mock_worker.queue_name = "default" + self.mock_worker.rate_limit_in_seconds = 5 + self.mock_worker.last_job_finished = None + + def test_process_job_no_previous_job_run(self, mock_sleep): + Worker.process_job(self.mock_worker) + self.assertEqual(mock_sleep.call_count, 1) + self.assertEqual(self.mock_worker._process_job.call_count, 1) + self.assertEqual(self.mock_worker.last_job_finished, timezone.now()) + + def test_process_job_previous_job_too_soon(self, mock_sleep): + self.mock_worker.last_job_finished = timezone.now() - timezone.timedelta( + seconds=2 + ) + Worker.process_job(self.mock_worker) + self.assertEqual(mock_sleep.call_count, 1) + self.assertEqual(self.mock_worker._process_job.call_count, 0) + self.assertEqual( + self.mock_worker.last_job_finished, + timezone.now() - timezone.timedelta(seconds=2), + ) + + def test_process_job_previous_job_long_time_ago(self, mock_sleep): + self.mock_worker.last_job_finished = timezone.now() - timezone.timedelta( + seconds=7 + ) + Worker.process_job(self.mock_worker) + self.assertEqual(mock_sleep.call_count, 1) + self.assertEqual(self.mock_worker._process_job.call_count, 1) + self.assertEqual(self.mock_worker.last_job_finished, timezone.now()) + + +@override_settings(JOBS={"testjob": {"tasks": ["a"]}}) +class ShutdownTestCase(TestCase): + def test_shutdown_sets_state_to_stopping(self): + job = Job.objects.create(name="testjob") + worker = Worker("default", 1) + worker.current_job = job + + worker.shutdown(None, None) + + job.refresh_from_db() + self.assertEqual(job.state, Job.STATES.STOPPING) + + +@override_settings(JOBS={"testjob": {"tasks": ["a"]}}) +class JobTestCase(TestCase): def test_create_job(self): - job = Job(name='testjob') + job = Job(name="testjob") self.assertEqual(job.state, Job.STATES.NEW) def test_create_job_with_queue(self): - job = Job(name='testjob', queue_name='lol') + job = Job(name="testjob", queue_name="lol") self.assertEqual(job.state, Job.STATES.NEW) - self.assertEqual(job.queue_name, 'lol') + self.assertEqual(job.queue_name, "lol") def test_get_next_ready_job(self): - self.assertTrue(Job.objects.get_ready_or_none('default') is None) + self.assertTrue(Job.objects.get_ready_or_none("default") is None) - Job.objects.create(name='testjob', state=Job.STATES.READY) - Job.objects.create(name='testjob', state=Job.STATES.PROCESSING) - expected = Job.objects.create(name='testjob', state=Job.STATES.READY) - expected.created = datetime.now() - timedelta(minutes=1) + Job.objects.create(name="testjob", state=Job.STATES.READY) + Job.objects.create(name="testjob", state=Job.STATES.PROCESSING) + expected = Job.objects.create(name="testjob", state=Job.STATES.READY) + expected.created = timezone.now() - timedelta(minutes=1) expected.save() - self.assertEqual(Job.objects.get_ready_or_none('default'), expected) + self.assertEqual(Job.objects.get_ready_or_none("default"), expected) + + def test_gets_jobs_in_priority_order(self): + job_1 = Job.objects.create(name="testjob") + job_2 = Job.objects.create(name="testjob", state=Job.STATES.PROCESSING) + job_3 = Job.objects.create(name="testjob", priority=3) + job_4 = Job.objects.create(name="testjob", priority=2) + self.assertEqual( + {job for job in Job.objects.to_process("default")}, {job_3, job_4, job_1} + ) + self.assertEqual(Job.objects.get_ready_or_none("default"), job_3) + self.assertFalse(Job.objects.to_process("default").filter(id=job_2.id).exists()) + + def test_gets_jobs_in_negative_priority_order(self): + job_1 = Job.objects.create(name="testjob") + job_2 = Job.objects.create(name="testjob", state=Job.STATES.PROCESSING) + job_3 = Job.objects.create(name="testjob", priority=-2) + job_4 = Job.objects.create(name="testjob", priority=1) + self.assertEqual( + {job for job in Job.objects.to_process("default")}, {job_4, job_3, job_1} + ) + self.assertEqual(Job.objects.get_ready_or_none("default"), job_4) + self.assertFalse(Job.objects.to_process("default").filter(id=job_2.id).exists()) + + def test_gets_jobs_in_priority_and_date_order(self): + job_1 = Job.objects.create(name="testjob", priority=3) + job_2 = Job.objects.create( + name="testjob", state=Job.STATES.PROCESSING, priority=3 + ) + job_3 = Job.objects.create(name="testjob", priority=3) + job_4 = Job.objects.create(name="testjob", priority=3) + self.assertEqual( + {job for job in Job.objects.to_process("default")}, {job_1, job_3, job_4} + ) + self.assertEqual(Job.objects.get_ready_or_none("default"), job_1) + self.assertFalse(Job.objects.to_process("default").filter(id=job_2.id).exists()) + + def test_ignores_jobs_until_run_after_is_in_the_past(self): + job_1 = Job.objects.create(name="testjob") + job_2 = Job.objects.create( + name="testjob", + run_after=datetime(2021, 11, 4, 8, tzinfo=datetime_timezone.utc), + ) + + with freezegun.freeze_time(datetime(2021, 11, 4, 7)): + self.assertEqual( + {job for job in Job.objects.to_process("default")}, {job_1} + ) + + with freezegun.freeze_time(datetime(2021, 11, 4, 9)): + self.assertEqual( + {job for job in Job.objects.to_process("default")}, {job_1, job_2} + ) def test_get_next_ready_job_created(self): """ @@ -109,37 +316,35 @@ def test_get_next_ready_job_created(self): selected by get_ready_or_none (the model is ordered by 'created' and the query picks the .first()) """ - self.assertTrue(Job.objects.get_ready_or_none('default') is None) + self.assertTrue(Job.objects.get_ready_or_none("default") is None) - Job.objects.create(name='testjob', state=Job.STATES.NEW) - Job.objects.create(name='testjob', state=Job.STATES.PROCESSING) - expected = Job.objects.create(name='testjob', state=Job.STATES.NEW) - expected.created = datetime.now() - timedelta(minutes=1) + Job.objects.create(name="testjob", state=Job.STATES.NEW) + Job.objects.create(name="testjob", state=Job.STATES.PROCESSING) + expected = Job.objects.create(name="testjob", state=Job.STATES.NEW) + expected.created = timezone.now() - timedelta(minutes=1) expected.save() - self.assertEqual(Job.objects.get_ready_or_none('default'), expected) + self.assertEqual(Job.objects.get_ready_or_none("default"), expected) -@override_settings(JOBS={'testjob': {'tasks': ['a', 'b', 'c']}}) +@override_settings(JOBS={"testjob": {"tasks": ["a", "b", "c"]}}) class JobTaskTestCase(TestCase): - def test_task_sequence(self): - job = Job.objects.create(name='testjob') - self.assertEqual(job.next_task, 'a') + job = Job.objects.create(name="testjob") + self.assertEqual(job.next_task, "a") job.update_next_task() - self.assertEqual(job.next_task, 'b') + self.assertEqual(job.next_task, "b") job.update_next_task() - self.assertEqual(job.next_task, 'c') + self.assertEqual(job.next_task, "c") job.update_next_task() - self.assertEqual(job.next_task, '') + self.assertEqual(job.next_task, "") -@override_settings(JOBS={'testjob': {'tasks': ['django_dbq.tests.test_task']}}) +@override_settings(JOBS={"testjob": {"tasks": ["django_dbq.tests.test_task"]}}) class ProcessJobTestCase(TestCase): - def test_process_job(self): - job = Job.objects.create(name='testjob') - process_job('default') + job = Job.objects.create(name="testjob") + Worker("default", 1)._process_job() job = Job.objects.get() self.assertEqual(job.state, Job.STATES.COMPLETE) @@ -147,62 +352,130 @@ def test_process_job_wrong_queue(self): """ Processing a different queue shouldn't touch our other job """ - job = Job.objects.create(name='testjob', queue_name='lol') - process_job('default') + job = Job.objects.create(name="testjob", queue_name="lol") + Worker("default", 1)._process_job() job = Job.objects.get() self.assertEqual(job.state, Job.STATES.NEW) -@override_settings(JOBS={'testjob': {'tasks': ['django_dbq.tests.test_task'], 'creation_hook': 'django_dbq.tests.creation_hook'}}) +@override_settings( + JOBS={ + "testjob": { + "tasks": ["django_dbq.tests.test_task"], + "creation_hook": "django_dbq.tests.creation_hook", + } + } +) class JobCreationHookTestCase(TestCase): - def test_creation_hook(self): - job = Job.objects.create(name='testjob') + job = Job.objects.create(name="testjob") job = Job.objects.get() - self.assertEqual(job.workspace['output'], 'creation hook ran') + self.assertEqual(job.workspace["output"], "creation hook ran") + self.assertEqual(job.workspace["job_id"], str(job.id)) def test_creation_hook_only_runs_on_create(self): - job = Job.objects.create(name='testjob') + job = Job.objects.create(name="testjob") job = Job.objects.get() - job.workspace['output'] = 'creation hook output removed' + job.workspace["output"] = "creation hook output removed" job.save() job = Job.objects.get() - self.assertEqual(job.workspace['output'], 'creation hook output removed') - - -@override_settings(JOBS={'testjob': {'tasks': ['django_dbq.tests.failing_task'], 'failure_hook': 'django_dbq.tests.failure_hook'}}) + self.assertEqual(job.workspace["output"], "creation hook output removed") + + +@override_settings( + JOBS={ + "testjob": { + "tasks": ["django_dbq.tests.test_task"], + "pre_task_hook": "django_dbq.tests.pre_task_hook", + } + } +) +class JobPreTaskHookTestCase(TestCase): + def test_pre_task_hook(self): + job = Job.objects.create(name="testjob") + Worker("default", 1)._process_job() + job = Job.objects.get() + self.assertEqual(job.state, Job.STATES.COMPLETE) + self.assertEqual(job.workspace["output"], "pre task hook ran") + self.assertEqual(job.workspace["job_id"], str(job.id)) + + +@override_settings( + JOBS={ + "testjob": { + "tasks": ["django_dbq.tests.test_task"], + "post_task_hook": "django_dbq.tests.post_task_hook", + } + } +) +class JobPostTaskHookTestCase(TestCase): + def test_post_task_hook(self): + job = Job.objects.create(name="testjob") + Worker("default", 1)._process_job() + job = Job.objects.get() + self.assertEqual(job.state, Job.STATES.COMPLETE) + self.assertEqual(job.workspace["output"], "post task hook ran") + self.assertEqual(job.workspace["job_id"], str(job.id)) + + +@override_settings( + JOBS={ + "testjob": { + "tasks": ["django_dbq.tests.failing_task"], + "failure_hook": "django_dbq.tests.failure_hook", + } + } +) class JobFailureHookTestCase(TestCase): - def test_failure_hook(self): - job = Job.objects.create(name='testjob') - process_job('default') + job = Job.objects.create(name="testjob") + Worker("default", 1)._process_job() job = Job.objects.get() self.assertEqual(job.state, Job.STATES.FAILED) - self.assertEqual(job.workspace['output'], 'failure hook ran') + self.assertEqual(job.workspace["output"], "failure hook ran") + self.assertIn("uh oh", job.workspace["exception"]) + self.assertEqual(job.workspace["job_id"], str(job.id)) -@override_settings(JOBS={'testjob': {'tasks': ['a']}}) +@override_settings(JOBS={"testjob": {"tasks": ["a"]}}) class DeleteOldJobsTestCase(TestCase): - def test_delete_old_jobs(self): - two_days_ago = datetime.utcnow() - timedelta(days=2) + two_days_ago = timezone.now() - timedelta(days=2) - j1 = Job.objects.create(name='testjob', state=Job.STATES.COMPLETE) + j1 = Job.objects.create(name="testjob", state=Job.STATES.COMPLETE) j1.created = two_days_ago j1.save() - j2 = Job.objects.create(name='testjob', state=Job.STATES.FAILED) + j2 = Job.objects.create(name="testjob", state=Job.STATES.FAILED) j2.created = two_days_ago j2.save() - j3 = Job.objects.create(name='testjob', state=Job.STATES.NEW) + j3 = Job.objects.create(name="testjob", state=Job.STATES.STOPPING) j3.created = two_days_ago j3.save() - j4 = Job.objects.create(name='testjob', state=Job.STATES.COMPLETE) + j4 = Job.objects.create(name="testjob", state=Job.STATES.NEW) + j4.created = two_days_ago + j4.save() + + j5 = Job.objects.create(name="testjob", state=Job.STATES.COMPLETE) Job.objects.delete_old() self.assertEqual(Job.objects.count(), 2) - self.assertTrue(j3 in Job.objects.all()) self.assertTrue(j4 in Job.objects.all()) + self.assertTrue(j5 in Job.objects.all()) + + def test_delete_old_jobs_with_custom_hours_argument(self): + j1 = Job.objects.create(name="testjob", state=Job.STATES.COMPLETE) + j1.created = timezone.now() - timedelta(days=5) + j1.save() + + j2 = Job.objects.create(name="testjob", state=Job.STATES.COMPLETE) + j2.created = timezone.now() - timedelta(days=3) + j2.save() + + Job.objects.delete_old(hours=24 * 4) + + self.assertEqual(Job.objects.count(), 1) + self.assertTrue(j2 in Job.objects.all()) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8ed66d8..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -django-model-utils==2.3.1 -django-uuidfield==0.5.0 -jsonfield==1.0.3 -Django>=1.8 -simplesignals==0.3.0 diff --git a/setup.py b/setup.py index 80c966b..fda66ff 100644 --- a/setup.py +++ b/setup.py @@ -8,38 +8,39 @@ import sys -name = 'django-db-queue' -package = 'django_dbq' -description = 'Simple database-backed job queue system' -url = 'http://www.dabapps.com' -author = 'DabApps' -author_email = 'contact@dabapps.com' -license = 'BSD' +name = "django-db-queue" +package = "django_dbq" +description = "Simple database-backed job queue system" +url = "http://www.dabapps.com" +author = "DabApps" +author_email = "contact@dabapps.com" +license = "BSD" install_requires = [ - "django-model-utils==2.3.1", - "django-uuidfield==0.5.0", - "jsonfield==1.0.3", - "Django>=1.7", - "simplesignals==0.3.0", + "Django>=4.2", ] long_description = """Simple database-backed job queue system""" + def get_version(package): """ Return package version as listed in `__version__` in `init.py`. """ - init_py = open(os.path.join(package, '__init__.py')).read() - return re.search("^__version__ = ['\"]([^'\"]+)['\"]", init_py, re.MULTILINE).group(1) + init_py = open(os.path.join(package, "__init__.py")).read() + return re.search("^__version__ = ['\"]([^'\"]+)['\"]", init_py, re.MULTILINE).group( + 1 + ) def get_packages(package): """ Return root package and all sub-packages. """ - return [dirpath - for dirpath, dirnames, filenames in os.walk(package) - if os.path.exists(os.path.join(dirpath, '__init__.py'))] + return [ + dirpath + for dirpath, dirnames, filenames in os.walk(package) + if os.path.exists(os.path.join(dirpath, "__init__.py")) + ] def get_package_data(package): @@ -47,20 +48,21 @@ def get_package_data(package): Return all files under the root package, that are not in a package themselves. """ - walk = [(dirpath.replace(package + os.sep, '', 1), filenames) - for dirpath, dirnames, filenames in os.walk(package) - if not os.path.exists(os.path.join(dirpath, '__init__.py'))] + walk = [ + (dirpath.replace(package + os.sep, "", 1), filenames) + for dirpath, dirnames, filenames in os.walk(package) + if not os.path.exists(os.path.join(dirpath, "__init__.py")) + ] filepaths = [] for base, filenames in walk: - filepaths.extend([os.path.join(base, filename) - for filename in filenames]) + filepaths.extend([os.path.join(base, filename) for filename in filenames]) return {package: filepaths} -if sys.argv[-1] == 'publish': +if sys.argv[-1] == "publish": os.system("python setup.py sdist upload") - args = {'version': get_version(package)} + args = {"version": get_version(package)} print("You probably want to also tag the version now:") print(" git tag -a %(version)s -m 'version %(version)s'" % args) print(" git push --tags") @@ -79,6 +81,6 @@ def get_package_data(package): packages=get_packages(package), package_data=get_package_data(package), install_requires=install_requires, - classifiers=[ - ] + classifiers=[], + python_requires=">=3.9" ) diff --git a/states.png b/states.png new file mode 100644 index 0000000..acc2858 Binary files /dev/null and b/states.png differ diff --git a/test-requirements.txt b/test-requirements.txt index 31ea0da..93545b8 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,5 @@ --r requirements.txt -pymysql==0.6.7 +mysqlclient==2.2.7 +freezegun==1.5.1 +dj-database-url==2.3.0 +psycopg2==2.9.10 +black==24.10.0 diff --git a/testsettings.py b/testsettings.py index 868c9e9..2d0d0ba 100644 --- a/testsettings.py +++ b/testsettings.py @@ -1,47 +1,23 @@ import os -import pymysql -pymysql.install_as_MySQLdb() +import dj_database_url + + +DATABASE_URL = os.environ.get("DATABASE_URL", "sqlite:///:memory:") DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.mysql', - 'NAME': 'django_db_queue', - 'PORT': os.getenv('DATABASE_PORT', 3306), - }, + "default": dj_database_url.parse(DATABASE_URL), } -INSTALLED_APPS = ( - 'django_dbq', -) +INSTALLED_APPS = ("django_dbq",) -MIDDLEWARE_CLASSES = ( - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', -) - -SECRET_KEY = 'abcde12345' +SECRET_KEY = "abcde12345" LOGGING = { - 'version': 1, - 'disable_existing_loggers': True, - 'handlers': { - 'console': { - 'level': 'DEBUG', - 'class': 'logging.StreamHandler', - }, - }, - 'root': { - 'handlers': ['console'], - 'level': 'INFO', - }, - 'loggers': { - 'django_dbq': { - 'level': 'CRITICAL', - 'propagate': True, - }, - } + "version": 1, + "disable_existing_loggers": True, + "handlers": {"console": {"level": "DEBUG", "class": "logging.StreamHandler",},}, + "root": {"handlers": ["console"], "level": "INFO",}, + "loggers": {"django_dbq": {"level": "CRITICAL", "propagate": True,},}, } + +USE_TZ = True