From f8f59377c58d8426fdb2437e93c3faca7a7e605b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikul=C3=A1=C5=A1=20Poul?= <mikulas.poul@xelix.com>
Date: Tue, 26 Aug 2025 15:38:41 +0100
Subject: [PATCH] Add README, verify examples from README works, bump version

---
 README.md                                     | 222 ++++++++++++++++++
 pyproject.toml                                |   3 +-
 tests/test_project/bookshop/__init__.py       |   0
 tests/test_project/bookshop/factories.py      |  67 ++++++
 .../bookshop/migrations/0001_initial.py       |  80 +++++++
 .../bookshop/migrations/__init__.py           |   0
 tests/test_project/bookshop/models.py         |  30 +++
 tests/test_project/settings.py                |   1 +
 tests/test_readme.py                          |  65 +++++
 tox.ini                                       |   1 +
 10 files changed, 468 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_project/bookshop/__init__.py
 create mode 100644 tests/test_project/bookshop/factories.py
 create mode 100644 tests/test_project/bookshop/migrations/0001_initial.py
 create mode 100644 tests/test_project/bookshop/migrations/__init__.py
 create mode 100644 tests/test_project/bookshop/models.py
 create mode 100644 tests/test_readme.py

diff --git a/README.md b/README.md
index e69de29..bbcc756 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,222 @@
+# django-memoized-prefetch
+
+A Django package that provides efficient memoized prefetching for processing data in chunks, reducing database queries through intelligent caching.
+In some cases it can be useful even when not processing data in chunks, for example, when there are multiple foreign keys to the same table.
+
+## Overview
+
+`django-memoized-prefetch` optimizes Django ORM queries when processing large datasets by:
+- **Reusing previously fetched objects** across chunks
+- **Memoizing prefetched objects** using LRU (Least Recently Used) cache
+- **Supporting both foreign key and many-to-many relationships**
+- **Minimizing database queries** across chunk processing operations
+
+## Installation
+
+```bash
+pip install django-memoized-prefetch
+```
+
+## Requirements
+
+- Python 3.9+
+- Django 4.2+
+- lru-dict 1.3.0+
+
+## Usage Examples
+
+<details>
+    <summary>Models used in examples, click to expand</summary>
+
+```python
+from django.db import models
+
+class Author(models.Model):
+    name = models.CharField(max_length=255)
+    email = models.EmailField()
+
+class Publisher(models.Model):
+    name = models.CharField(max_length=255)
+    country = models.CharField(max_length=100)
+
+class Category(models.Model):
+    name = models.CharField(max_length=100)
+
+class Book(models.Model):
+    title = models.CharField(max_length=255)
+    isbn = models.CharField(max_length=13)
+    author = models.ForeignKey(Author, on_delete=models.CASCADE, related_name="books")
+    translator = models.ForeignKey(Author, on_delete=models.CASCADE, related_name="translations", null=True)
+    publisher = models.ForeignKey(Publisher, on_delete=models.CASCADE, related_name="books")
+    categories = models.ManyToManyField(Category, related_name="books")
+
+class Review(models.Model):
+    book = models.ForeignKey(Book, on_delete=models.CASCADE, related_name="reviews")
+    rating = models.IntegerField()
+    comment = models.TextField()
+```
+
+</details>
+
+### Basic Usage
+
+Imagine you want to process all books, but there are too many of them to load them all into memory at once.
+You therefore need to process them in chunks.
+
+If you use just native django, it will look something like this:
+
+```python
+from chunkator import chunkator_page
+
+for chunk in chunkator_page(Book.objects.all().prefetch_related("author", "translator", "publisher"), 10_000):
+    for book in chunk:
+        print(book.author.name, book.translator.name if book.translator is not None else None)
+        print(book.publisher.name)
+```
+
+This will work, with two caveats:
+1. On each chunk, Django will make separate queries to fetch the author and translator
+2. The author, translator and publisher objects will be fetched from the database for each chunk
+
+This is the primary usecase for this package. When used like this: 
+
+```python
+from django_memoized_prefetch import MemoizedPrefetch, MemoizedPrefetchConfig
+from chunkator import chunkator_page
+
+memoized_prefetch = MemoizedPrefetch(
+    MemoizedPrefetchConfig(Author, ["author", "translator"]),
+    MemoizedPrefetchConfig(Publisher, ["publisher"], prefetch_all=True),
+)
+
+for chunk in chunkator_page(Book.objects.all(), 10_000):
+    memoized_prefetch.process_chunk(chunk)
+    
+    for book in chunk:
+        print(book.author.name, book.translator.name if book.translator is not None else None)
+        print(book.publisher.name)
+```
+
+The processing will be more efficient, because:
+1. All publishers will get fetched before processing any chunks, and they will be reused across all chunks
+2. The author and translator objects will be fetched using one query
+3. Any authors and translators that appeared in previous chunks will not be fetched again 
+
+#### Nested attributes
+
+You can also prefetch nested attributes using both dotted notation and undersore notation, for example, in this example both would work.
+
+```python
+memoized_prefetch = MemoizedPrefetch(
+    MemoizedPrefetchConfig(Publisher, ["book.publisher"]),
+    MemoizedPrefetchConfig(Author, ["book__author"]),
+)
+
+for chunk in chunkator_page(Review.objects.all(), 10000):
+    memoized_prefetch.process_chunk(chunk)
+    ...
+```
+
+### Many-to-Many Relationships
+
+Many-to-many relationships are supported as well, caching the target model, while fetching the through model for each chunk.
+
+```python
+from django_memoized_prefetch import MemoizedPrefetch, MemoizedPrefetchConfig
+from chunkator import chunkator_page
+
+# Configure for many-to-many relationships
+memoized_prefetch = MemoizedPrefetch(
+    MemoizedPrefetchConfig(
+        model=Category,
+        attributes=["categories"],
+        is_many_to_many=True,
+        through_model=Book.categories.through,
+        source_field="book_id",
+        target_field="category_id",
+    )
+)
+
+# Process books with their categories
+for chunk in chunkator_page(Book.objects.all(), 10000):
+    memoized_prefetch.process_chunk(chunk)
+    
+    for book in chunk:
+        # Categories are prefetched and available
+        category_names = [cat.name for cat in book.categories.all()]
+        print(f"Book: {book.title}, Categories: {', '.join(category_names)}")
+```
+
+### Usage outside chunked processing
+
+If you have multiple foreign keys to the same table, this package can be used to optimise the database queries even when not processing data in chunks.
+
+## Configuration Options
+
+### MemoizedPrefetchConfig Parameters
+
+- **`model`** (required): The Django model class to prefetch
+- **`attributes`** (required): List of attribute names to prefetch on your objects
+- **`queryset`** (optional): Custom queryset for the model (for additional select_related/prefetch_related)
+- **`prefetch_all`** (optional, default: False): Whether to prefetch all objects at initialisation
+- **`lru_cache_size`** (optional, default: 10,000): Maximum number of objects to keep in cache
+- **`is_many_to_many`** (optional, default: False): Set to True for many-to-many relationships
+- **`through_model`** (optional): Through model for many-to-many relationships
+- **`source_field`** (optional): Source field name in the through model
+- **`target_field`** (optional): Target field name in the through model
+
+### Advanced Configuration
+
+```python
+from django.db import models
+
+# Custom queryset with select_related
+config = MemoizedPrefetchConfig(
+    model=Author,
+    attributes=["author"],
+    queryset=Author.objects.select_related(...),
+    lru_cache_size=5000,
+)
+
+# Prefetch all objects at startup (useful for small, frequently accessed tables)
+config = MemoizedPrefetchConfig(
+    model=Publisher,
+    attributes=["publisher"],
+    prefetch_all=True,
+)
+```
+
+## Integrations with other packages.
+
+The package automatically supports `django-seal` when available, all querysets which are sealable will be automatically sealed.
+
+This package works when using `django-tenants`.
+
+## Best Practices
+
+1. **Use appropriate cache sizes**: Set `lru_cache_size` based on your expected data volume and available memory
+2. **Prefetch related objects**: Use custom querysets with `select_related` or `prefetch_related` for nested relationships
+3. **Consider prefetch_all**: Use `prefetch_all=True` for small, frequently accessed reference tables
+4. **Process in reasonable chunks**: Balance memory usage with query efficiency when choosing chunk sizes
+5. **Monitor cache hit rates**: Ensure your cache size is appropriate for your data access patterns
+
+## Testing
+
+Run the test suite:
+
+```bash
+uv run pytest
+```
+
+## License
+
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
+
+## Authors
+
+- Mikuláš Poul (mikulas.poul@xelix.com)
+- Cameron Hobbs (cameron.hobbs@xelix.com)
diff --git a/pyproject.toml b/pyproject.toml
index 0b9f423..40c6831 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "django-memoized-prefetch"
-version = "0.1.0"
+version = "0.1.1"
 description = "A memoized prefetch for Django."
 authors = [
     {name = "Mikuláš Poul", email = "mikulas.poul@xelix.com"},
@@ -154,6 +154,7 @@ parametrize-names-type = "list"
 [dependency-groups]
 dev = [
     "dirty-equals>=0.9.0",
+    "django-chunkator>=2.0.0",
     "django-seal>=1.7.1",
     "factory-boy>=3.3.3",
     "pytest-cov>=6.2.1",
diff --git a/tests/test_project/bookshop/__init__.py b/tests/test_project/bookshop/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_project/bookshop/factories.py b/tests/test_project/bookshop/factories.py
new file mode 100644
index 0000000..06080d6
--- /dev/null
+++ b/tests/test_project/bookshop/factories.py
@@ -0,0 +1,67 @@
+import random
+
+import factory
+
+from tests.test_project.bookshop.models import (
+    Author,
+    Book,
+    Category,
+    Publisher,
+    Review,
+)
+
+
+class AuthorFactory(factory.django.DjangoModelFactory):
+    name = factory.Faker("name")
+    email = factory.Faker("email")
+
+    class Meta:
+        model = Author
+
+
+class PublisherFactory(factory.django.DjangoModelFactory):
+    name = factory.Faker("company")
+    country = factory.Faker("country")
+
+    class Meta:
+        model = Publisher
+
+
+class CategoryFactory(factory.django.DjangoModelFactory):
+    name = factory.Faker("word")
+
+    class Meta:
+        model = Category
+
+
+class BookFactory(factory.django.DjangoModelFactory):
+    title = factory.Faker("sentence", nb_words=4)
+    isbn = factory.Faker("isbn13")
+    author = factory.SubFactory(AuthorFactory)
+    translator = factory.SubFactory(AuthorFactory)
+    publisher = factory.SubFactory(PublisherFactory)
+
+    @factory.post_generation
+    def categories(self, create: bool, extracted: list[Category]) -> None:
+        if not create:
+            return
+
+        if extracted:
+            self.categories.set(extracted)
+        else:
+            # Create 1-3 random categories if none provided
+            categories = CategoryFactory.create_batch(random.randint(1, 3))
+            self.categories.set(categories)
+
+    class Meta:
+        model = Book
+        skip_postgeneration_save = True
+
+
+class ReviewFactory(factory.django.DjangoModelFactory):
+    book = factory.SubFactory(BookFactory)
+    rating = factory.Faker("random_int", min=1, max=5)
+    comment = factory.Faker("text", max_nb_chars=500)
+
+    class Meta:
+        model = Review
diff --git a/tests/test_project/bookshop/migrations/0001_initial.py b/tests/test_project/bookshop/migrations/0001_initial.py
new file mode 100644
index 0000000..23c41ab
--- /dev/null
+++ b/tests/test_project/bookshop/migrations/0001_initial.py
@@ -0,0 +1,80 @@
+# Generated by Django 5.2.5 on 2025-08-26 14:29
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = []
+
+    operations = [
+        migrations.CreateModel(
+            name="Author",
+            fields=[
+                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("name", models.CharField(max_length=255)),
+                ("email", models.EmailField(max_length=254)),
+            ],
+        ),
+        migrations.CreateModel(
+            name="Category",
+            fields=[
+                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("name", models.CharField(max_length=100)),
+            ],
+        ),
+        migrations.CreateModel(
+            name="Publisher",
+            fields=[
+                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("name", models.CharField(max_length=255)),
+                ("country", models.CharField(max_length=100)),
+            ],
+        ),
+        migrations.CreateModel(
+            name="Book",
+            fields=[
+                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("title", models.CharField(max_length=255)),
+                ("isbn", models.CharField(max_length=13)),
+                (
+                    "author",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE, related_name="books", to="bookshop.author"
+                    ),
+                ),
+                (
+                    "translator",
+                    models.ForeignKey(
+                        null=True,
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="translations",
+                        to="bookshop.author",
+                    ),
+                ),
+                ("categories", models.ManyToManyField(related_name="books", to="bookshop.category")),
+                (
+                    "publisher",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE, related_name="books", to="bookshop.publisher"
+                    ),
+                ),
+            ],
+        ),
+        migrations.CreateModel(
+            name="Review",
+            fields=[
+                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("rating", models.IntegerField()),
+                ("comment", models.TextField()),
+                (
+                    "book",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE, related_name="reviews", to="bookshop.book"
+                    ),
+                ),
+            ],
+        ),
+    ]
diff --git a/tests/test_project/bookshop/migrations/__init__.py b/tests/test_project/bookshop/migrations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_project/bookshop/models.py b/tests/test_project/bookshop/models.py
new file mode 100644
index 0000000..8483636
--- /dev/null
+++ b/tests/test_project/bookshop/models.py
@@ -0,0 +1,30 @@
+from django.db import models
+
+
+class Author(models.Model):
+    name = models.CharField(max_length=255)
+    email = models.EmailField()
+
+
+class Publisher(models.Model):
+    name = models.CharField(max_length=255)
+    country = models.CharField(max_length=100)
+
+
+class Category(models.Model):
+    name = models.CharField(max_length=100)
+
+
+class Book(models.Model):
+    title = models.CharField(max_length=255)
+    isbn = models.CharField(max_length=13)
+    author = models.ForeignKey(Author, on_delete=models.CASCADE, related_name="books")
+    translator = models.ForeignKey(Author, on_delete=models.CASCADE, related_name="translations", null=True)
+    publisher = models.ForeignKey(Publisher, on_delete=models.CASCADE, related_name="books")
+    categories = models.ManyToManyField(Category, related_name="books")
+
+
+class Review(models.Model):
+    book = models.ForeignKey(Book, on_delete=models.CASCADE, related_name="reviews")
+    rating = models.IntegerField()
+    comment = models.TextField()
diff --git a/tests/test_project/settings.py b/tests/test_project/settings.py
index 5008846..2751d00 100644
--- a/tests/test_project/settings.py
+++ b/tests/test_project/settings.py
@@ -37,6 +37,7 @@
     "django.contrib.messages",
     "django.contrib.staticfiles",
     "tests.test_project.test_app",
+    "tests.test_project.bookshop",
     "seal",
 ]
 
diff --git a/tests/test_readme.py b/tests/test_readme.py
new file mode 100644
index 0000000..f998c4f
--- /dev/null
+++ b/tests/test_readme.py
@@ -0,0 +1,65 @@
+import pytest
+from chunkator import chunkator_page
+
+from django_memoized_prefetch import MemoizedPrefetch, MemoizedPrefetchConfig
+from tests.test_project.bookshop.factories import BookFactory, ReviewFactory
+from tests.test_project.bookshop.models import Author, Book, Category, Publisher, Review
+
+pytestmark = pytest.mark.django_db
+
+
+class TestReadmeExamples:
+    @pytest.fixture(autouse=True)
+    def setup(self):
+        BookFactory.create_batch(100)
+        ReviewFactory.create_batch(100)
+
+    def test_basic_naive(self):
+        for chunk in chunkator_page(Book.objects.all().prefetch_related("author", "translator", "publisher"), 10_000):
+            for book in chunk:
+                print(book.author.name, book.translator.name if book.translator is not None else None)
+                print(book.publisher.name)
+
+    def test_basic(self):
+        memoized_prefetch = MemoizedPrefetch(
+            MemoizedPrefetchConfig(Author, ["author", "translator"]),
+            MemoizedPrefetchConfig(Publisher, ["publisher"], prefetch_all=True),
+        )
+
+        for chunk in chunkator_page(Book.objects.all(), 10_000):
+            memoized_prefetch.process_chunk(chunk)
+
+            for book in chunk:
+                print(book.author.name, book.translator.name if book.translator is not None else None)
+                print(book.publisher.name)
+
+    def test_nested(self):
+        memoized_prefetch = MemoizedPrefetch(
+            MemoizedPrefetchConfig(Publisher, ["book.publisher"]),
+            MemoizedPrefetchConfig(Author, ["book__author"]),
+        )
+
+        for chunk in chunkator_page(Review.objects.all(), 10000):
+            memoized_prefetch.process_chunk(chunk)
+
+    def test_m2m(self):
+        # Configure for many-to-many relationships
+        memoized_prefetch = MemoizedPrefetch(
+            MemoizedPrefetchConfig(
+                model=Category,
+                attributes=["categories"],
+                is_many_to_many=True,
+                through_model=Book.categories.through,
+                source_field="book_id",
+                target_field="category_id",
+            )
+        )
+
+        # Process books with their categories
+        for chunk in chunkator_page(Book.objects.all(), 10000):
+            memoized_prefetch.process_chunk(chunk)
+
+            for book in chunk:
+                # Categories are prefetched and available
+                category_names = [cat.name for cat in book.categories.all()]
+                print(f"Book: {book.title}, Categories: {', '.join(category_names)}")
diff --git a/tox.ini b/tox.ini
index aa570f5..04ce6ae 100644
--- a/tox.ini
+++ b/tox.ini
@@ -21,5 +21,6 @@ deps=
     dirty-equals
     django-seal
     factory-boy
+    django-chunkator
 commands=
     pytest