From bbe2c76d2f7ab2013488c58f2947fb889e552f84 Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Wed, 17 Dec 2025 12:57:57 +0000 Subject: [PATCH 01/13] added freshness field --- website/api/views.py | 15 ++++++- website/management/commands/run_daily.py | 4 ++ .../commands/update_project_freshness.py | 40 ++++++++++++++++++ website/migrations/0264_project_freshness.py | 17 ++++++++ website/models.py | 42 +++++++++++++++++++ website/serializers.py | 5 +-- website/tests/test_project_aggregation.py | 10 +---- 7 files changed, 118 insertions(+), 15 deletions(-) create mode 100644 website/management/commands/update_project_freshness.py create mode 100644 website/migrations/0264_project_freshness.py diff --git a/website/api/views.py b/website/api/views.py index 29bf9a2f99..8690a3827a 100644 --- a/website/api/views.py +++ b/website/api/views.py @@ -853,9 +853,20 @@ def filter(self, request, *args, **kwargs): total_forks=Coalesce(Sum("repos__forks"), 0), ) - # Freshness is NOT a DB field (SerializerMethodField) if freshness: - pass # Safe no-op + try: + freshness_val = float(freshness) + if not 0 <= freshness_val <= 100: + return Response( + {"error": "Invalid 'freshness' parameter: must be between 0 and 100"}, + status=status.HTTP_400_BAD_REQUEST, + ) + projects = projects.filter(freshness__gte=freshness_val) + except (ValueError, TypeError): + return Response( + {"error": "Invalid 'freshness' parameter: must be a number"}, + status=status.HTTP_400_BAD_REQUEST, + ) # SAFE stars validation if stars: diff --git a/website/management/commands/run_daily.py b/website/management/commands/run_daily.py index d13cb8d18d..7f5c6b1a40 100644 --- a/website/management/commands/run_daily.py +++ b/website/management/commands/run_daily.py @@ -45,6 +45,10 @@ def handle(self, *args, **options): call_command("cron_send_reminders") except Exception as e: logger.error("Error sending user reminders", exc_info=True) + try: + call_command("update_project_freshness") + except Exception as e: + logger.error("Error updating project freshness", exc_info=True) except Exception as e: logger.error("Error in daily tasks", exc_info=True) raise diff --git a/website/management/commands/update_project_freshness.py b/website/management/commands/update_project_freshness.py new file mode 100644 index 0000000000..76a0638e62 --- /dev/null +++ b/website/management/commands/update_project_freshness.py @@ -0,0 +1,40 @@ +import time + +from django.core.management.base import BaseCommand + +from website.models import Project + + +class Command(BaseCommand): + help = "Recalculate and update freshness score for all projects" + + def handle(self, *args, **options): + start_time = time.time() + + projects = Project.objects.all() + total = projects.count() + + processed = 0 + errors = 0 + + self.stdout.write(f"Starting freshness update for {total} projects") + + for idx, project in enumerate(projects, start=1): + try: + freshness = project.calculate_freshness() + project.freshness = freshness + project.save(update_fields=["freshness"]) + processed += 1 + except Exception as e: + errors += 1 + self.stderr.write(f"[ERROR] Project ID {project.id}: {str(e)}") + + if idx % 100 == 0: + self.stdout.write(f"Processed {idx}/{total} projects...") + + duration = round(time.time() - start_time, 2) + + self.stdout.write(self.style.SUCCESS("Freshness update completed")) + self.stdout.write(f"Processed: {processed}") + self.stdout.write(f"Errors: {errors}") + self.stdout.write(f"Execution time: {duration}s") diff --git a/website/migrations/0264_project_freshness.py b/website/migrations/0264_project_freshness.py new file mode 100644 index 0000000000..b113a78832 --- /dev/null +++ b/website/migrations/0264_project_freshness.py @@ -0,0 +1,17 @@ +# Generated by Django 5.2.9 on 2025-12-17 12:06 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("website", "0263_githubissue_githubissue_pr_merged_idx_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="project", + name="freshness", + field=models.DecimalField(db_index=True, decimal_places=2, default=0.0, max_digits=5), + ), + ] diff --git a/website/models.py b/website/models.py index dd3d3747bb..ead58dd0c8 100644 --- a/website/models.py +++ b/website/models.py @@ -1396,6 +1396,48 @@ class Project(models.Model): logo = models.ImageField(upload_to="project_logos", null=True, blank=True, max_length=255) created = models.DateTimeField(auto_now_add=True) # Standardized field name modified = models.DateTimeField(auto_now=True) # Standardized field name + freshness = models.DecimalField(max_digits=5, decimal_places=2, default=0.0, db_index=True) + + def calculate_freshness(self): + """ + Calculate freshness using a Bumper-style activity decay model, + based on repository update recency (proxy for activity graph). + """ + now = timezone.now() + + last_7_days = now - timedelta(days=7) + last_30_days = now - timedelta(days=30) + last_90_days = now - timedelta(days=90) + + if not repos: + return 0.0 + + qs = repos.filter(is_archived=False) + + if not qs.exists(): + return 0.0 + + active_7 = qs.filter(updated_at__gte=last_7_days).count() + active_30 = qs.filter( + updated_at__lt=last_7_days, + updated_at__gte=last_30_days, + ).count() + active_90 = qs.filter( + updated_at__lt=last_30_days, + updated_at__gte=last_90_days, + ).count() + + # Bumper-style decay weights + raw_score = active_7 * 1.0 + active_30 * 0.6 + active_90 * 0.3 + + if raw_score == 0: + return 0.0 + + # Normalize to 0–100 + MAX_SCORE = 20 # ~20 repos active recently = very fresh + freshness = min((raw_score / MAX_SCORE) * 100, 100) + + return round(freshness, 2) def save(self, *args, **kwargs): # Always ensure a valid slug exists before saving diff --git a/website/serializers.py b/website/serializers.py index 76a72b6b4c..e290553018 100644 --- a/website/serializers.py +++ b/website/serializers.py @@ -127,7 +127,7 @@ class Meta: class ProjectSerializer(serializers.ModelSerializer): - freshness = serializers.SerializerMethodField() + freshness = serializers.DecimalField(max_digits=5, decimal_places=2, read_only=True) total_stars = serializers.IntegerField(read_only=True) total_forks = serializers.IntegerField(read_only=True) @@ -140,9 +140,6 @@ class Meta: fields = "__all__" read_only_fields = ("slug", "contributors") - def get_freshness(self, obj): - return obj.fetch_freshness() - class ContributorSerializer(serializers.ModelSerializer): class Meta: diff --git a/website/tests/test_project_aggregation.py b/website/tests/test_project_aggregation.py index 5f66a1e1b8..8750702d58 100644 --- a/website/tests/test_project_aggregation.py +++ b/website/tests/test_project_aggregation.py @@ -14,15 +14,7 @@ class ProjectAggregationTestCase(TestCase): """Tests for stars/forks aggregation in Project API endpoints""" def setUp(self): - # ✅ Patch freshness (already correct) - self.freshness_patcher = mock.patch( - "website.serializers.ProjectSerializer.get_freshness", - return_value=None, - ) - self.freshness_patcher.start() - self.addCleanup(self.freshness_patcher.stop) - - # ✅ Patch prefetch_related to avoid invalid 'contributors' + # Patch prefetch_related to avoid invalid 'contributors' self.prefetch_patcher = mock.patch( "website.api.views.Project.objects.prefetch_related", return_value=Project.objects.all(), From 8777dccbae828d28da5b4637094a228f8cdd96f4 Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Wed, 17 Dec 2025 13:57:46 +0000 Subject: [PATCH 02/13] added tests --- website/api/views.py | 97 +++---------- website/models.py | 20 +-- website/tests/test_api.py | 132 +++++++++++++++++ website/tests/test_project_aggregation.py | 59 +++++++- website/tests/test_project_freshness.py | 51 +++++++ .../test_update_project_freshness_command.py | 136 ++++++++++++++++++ 6 files changed, 404 insertions(+), 91 deletions(-) create mode 100644 website/tests/test_project_freshness.py create mode 100644 website/tests/test_update_project_freshness_command.py diff --git a/website/api/views.py b/website/api/views.py index 8690a3827a..75ddd724e3 100644 --- a/website/api/views.py +++ b/website/api/views.py @@ -757,6 +757,22 @@ def list(self, request, *args, **kwargs): total_stars=Coalesce(Sum("repos__stars"), Value(0)), total_forks=Coalesce(Sum("repos__forks"), Value(0)), ) + freshness = request.query_params.get("freshness") + + if freshness is not None: + try: + freshness_val = float(freshness) + if not 0 <= freshness_val <= 100: + return Response( + {"error": "Invalid 'freshness' parameter: must be between 0 and 100"}, + status=status.HTTP_400_BAD_REQUEST, + ) + projects = projects.filter(freshness__gte=freshness_val) + except (ValueError, TypeError): + return Response( + {"error": "Invalid 'freshness' parameter: must be a valid number"}, + status=status.HTTP_400_BAD_REQUEST, + ) stars = request.query_params.get("stars") forks = request.query_params.get("forks") @@ -840,87 +856,6 @@ def search(self, request, *args, **kwargs): status=200, ) - @action(detail=False, methods=["get"]) - def filter(self, request, *args, **kwargs): - freshness = request.query_params.get("freshness", None) - stars = request.query_params.get("stars", None) - forks = request.query_params.get("forks", None) - tags = request.query_params.get("tags", None) - - # Annotate Project with aggregated stars and forks from related Repos - projects = Project.objects.annotate( - total_stars=Coalesce(Sum("repos__stars"), 0), - total_forks=Coalesce(Sum("repos__forks"), 0), - ) - - if freshness: - try: - freshness_val = float(freshness) - if not 0 <= freshness_val <= 100: - return Response( - {"error": "Invalid 'freshness' parameter: must be between 0 and 100"}, - status=status.HTTP_400_BAD_REQUEST, - ) - projects = projects.filter(freshness__gte=freshness_val) - except (ValueError, TypeError): - return Response( - {"error": "Invalid 'freshness' parameter: must be a number"}, - status=status.HTTP_400_BAD_REQUEST, - ) - - # SAFE stars validation - if stars: - try: - stars_int = int(stars) - if stars_int < 0: - return Response( - {"error": "Invalid 'stars' parameter: must be non-negative"}, - status=status.HTTP_400_BAD_REQUEST, - ) - projects = projects.filter(total_stars__gte=stars_int) - except (ValueError, TypeError): - return Response( - {"error": "Invalid 'stars' parameter: must be an integer"}, - status=status.HTTP_400_BAD_REQUEST, - ) - - # SAFE forks validation - if forks: - try: - forks_int = int(forks) - if forks_int < 0: - return Response( - {"error": "Invalid 'forks' parameter: must be non-negative"}, - status=status.HTTP_400_BAD_REQUEST, - ) - projects = projects.filter(total_forks__gte=forks_int) - except (ValueError, TypeError): - return Response( - {"error": "Invalid 'forks' parameter: must be an integer"}, - status=status.HTTP_400_BAD_REQUEST, - ) - - if tags: - projects = projects.filter(tags__name__in=tags.split(",")).distinct() - - project_data = [] - for project in projects: - contributors_data = [] - for contributor in project.contributors.all(): - contributor_info = ContributorSerializer(contributor) - contributors_data.append(contributor_info.data) - - contributors_data.sort(key=lambda x: x["contributions"], reverse=True) - - project_info = ProjectSerializer(project).data - project_info["contributors"] = contributors_data - project_data.append(project_info) - - return Response( - {"count": len(project_data), "projects": project_data}, - status=200, - ) - class AuthApiViewset(viewsets.ModelViewSet): http_method_names = ("delete",) diff --git a/website/models.py b/website/models.py index ead58dd0c8..7e9149fdcf 100644 --- a/website/models.py +++ b/website/models.py @@ -1409,22 +1409,24 @@ def calculate_freshness(self): last_30_days = now - timedelta(days=30) last_90_days = now - timedelta(days=90) - if not repos: - return 0.0 - - qs = repos.filter(is_archived=False) + # Start from non-archived repos for this project + qs = self.repos.filter(is_archived=False) if not qs.exists(): return 0.0 - active_7 = qs.filter(updated_at__gte=last_7_days).count() + qs = qs.exclude(last_commit_date__isnull=True) + + active_7 = qs.filter(last_commit_date__gte=last_7_days).count() + active_30 = qs.filter( - updated_at__lt=last_7_days, - updated_at__gte=last_30_days, + last_commit_date__lt=last_7_days, + last_commit_date__gte=last_30_days, ).count() + active_90 = qs.filter( - updated_at__lt=last_30_days, - updated_at__gte=last_90_days, + last_commit_date__lt=last_30_days, + last_commit_date__gte=last_90_days, ).count() # Bumper-style decay weights diff --git a/website/tests/test_api.py b/website/tests/test_api.py index 70dfb03285..5c9050a6d7 100644 --- a/website/tests/test_api.py +++ b/website/tests/test_api.py @@ -13,6 +13,7 @@ from rest_framework import status from rest_framework.test import APITestCase +from website.models import Organization, Project, Repo from website.utils import rebuild_safe_url, validate_file_type @@ -293,3 +294,134 @@ def test_password_reset_known_email_sends_email(self): self.assertEqual(len(mail.outbox), 1, "Email should be sent for known accounts") print("✓ Correct: Email sent for known account") + + +class ProjectFreshnessFilteringTestCase(APITestCase): + """Test cases for Project API freshness filtering""" + + def setUp(self): + """Set up test data""" + self.org = Organization.objects.create(name="Test Organization", url="https://test.org") + + # Create projects with different freshness scores + self.high_freshness_project = Project.objects.create( + name="High Freshness", organization=self.org, url="https://github.com/test/high", freshness=85.50 + ) + + self.medium_freshness_project = Project.objects.create( + name="Medium Freshness", organization=self.org, url="https://github.com/test/medium", freshness=50.25 + ) + + self.low_freshness_project = Project.objects.create( + name="Low Freshness", organization=self.org, url="https://github.com/test/low", freshness=15.75 + ) + + self.zero_freshness_project = Project.objects.create( + name="Zero Freshness", organization=self.org, url="https://github.com/test/zero", freshness=0.0 + ) + + def test_filter_by_min_freshness_threshold(self): + """Test filtering projects by valid freshness threshold""" + response = self.client.get("/api/v1/projects/?freshness=50") + + self.assertEqual(response.status_code, 200) + data = response.json() + + # Should return projects with freshness >= 50 + self.assertEqual(len(data["results"]), 2) + names = [p["name"] for p in data["results"]] + self.assertIn("High Freshness", names) + self.assertIn("Medium Freshness", names) + + def test_filter_by_high_freshness(self): + """Test filtering with high freshness threshold""" + response = self.client.get("/api/v1/projects/?freshness=80") + + self.assertEqual(response.status_code, 200) + data = response.json() + + # Only high freshness project should match + self.assertEqual(len(data["results"]), 1) + self.assertEqual(data["results"][0]["name"], "High Freshness") + + def test_filter_freshness_invalid_negative(self): + """Test that negative freshness values are rejected""" + response = self.client.get("/api/v1/projects/?freshness=-10") + + self.assertEqual(response.status_code, 400) + self.assertIn("must be between 0 and 100", response.json()["error"]) + + def test_filter_freshness_invalid_over_100(self): + """Test that freshness values over 100 are rejected""" + response = self.client.get("/api/v1/projects/?freshness=150") + + self.assertEqual(response.status_code, 400) + self.assertIn("must be between 0 and 100", response.json()["error"]) + + def test_filter_freshness_invalid_non_numeric(self): + """Test that non-numeric freshness values are rejected""" + response = self.client.get("/api/v1/projects/?freshness=invalid") + + self.assertEqual(response.status_code, 400) + self.assertIn("must be a valid number", response.json()["error"]) + + def test_filter_freshness_decimal_value(self): + """Test filtering with decimal freshness value""" + response = self.client.get("/api/v1/projects/?freshness=50.5") + + self.assertEqual(response.status_code, 200) + data = response.json() + + # Should return projects with freshness >= 50.5 + self.assertEqual(len(data["results"]), 1) + self.assertEqual(data["results"][0]["name"], "High Freshness") + + def test_filter_freshness_combined_with_other_filters(self): + """Test freshness filter combined with other filters""" + # Add repos for star filtering + Repo.objects.create( + project=self.high_freshness_project, + name="popular-repo", + repo_url="https://github.com/test/popular", + stars=1000, + forks=100, + ) + Repo.objects.create( + project=self.low_freshness_project, + name="unpopular-repo", + repo_url="https://github.com/test/unpopular", + stars=10, + forks=5, + ) + + # Filter by both freshness and stars + response = self.client.get("/api/v1/projects/?freshness=50&stars=500") + + self.assertEqual(response.status_code, 200) + data = response.json() + + # Should return only high freshness project with enough stars + self.assertEqual(len(data["results"]), 1) + self.assertEqual(data["results"][0]["name"], "High Freshness") + + def test_filter_without_freshness_parameter(self): + """Test that filtering works when freshness parameter is not provided""" + response = self.client.get("/api/v1/projects/") + + self.assertEqual(response.status_code, 200) + data = response.json() + + # Should return all projects + self.assertEqual(len(data["results"]), 4) + + def test_freshness_field_in_api_response(self): + """Test that freshness field is included in API response""" + response = self.client.get("/api/v1/projects/") + + self.assertEqual(response.status_code, 200) + data = response.json() + + # Check that freshness field exists in response + for project in data["results"]: + self.assertIn("freshness", project) + self.assertIsNotNone(project["freshness"]) diff --git a/website/tests/test_project_aggregation.py b/website/tests/test_project_aggregation.py index 8750702d58..dff6d88c6c 100644 --- a/website/tests/test_project_aggregation.py +++ b/website/tests/test_project_aggregation.py @@ -5,7 +5,7 @@ from rest_framework import status from rest_framework.test import APIClient -from website.models import Project, Repo +from website.models import Organization, Project, Repo User = get_user_model() @@ -156,3 +156,60 @@ def test_filter_zero_values_valid(self): self.assertEqual(response.status_code, status.HTTP_200_OK) # Should return all projects (all have >= 0 stars/forks) self.assertEqual(len(response.data["results"]), 3) + + def test_freshness_calculation_integration(self): + """Integration test for freshness calculation with real data flow""" + from datetime import timedelta + + from django.utils import timezone + + # Create project with repos + org = Organization.objects.create(name="Integration Org", url="https://int.org") + project = Project.objects.create( + name="Integration Project", organization=org, url="https://github.com/int/project" + ) + + now = timezone.now() + + # Add repos with different activity levels + Repo.objects.create( + project=project, + name="very-active", + repo_url="https://github.com/int/active", + is_archived=False, + last_commit_date=now - timedelta(days=2), + ) + Repo.objects.create( + project=project, + name="somewhat-active", + repo_url="https://github.com/int/somewhat", + is_archived=False, + last_commit_date=now - timedelta(days=20), + ) + Repo.objects.create( + project=project, + name="old-active", + repo_url="https://github.com/int/old", + is_archived=False, + last_commit_date=now - timedelta(days=60), + ) + Repo.objects.create( + project=project, + name="archived", + repo_url="https://github.com/int/archived", + is_archived=True, + last_commit_date=now - timedelta(days=1), # Should be ignored + ) + + # Calculate freshness + freshness = project.calculate_freshness() + + # Expected: 1*1.0 + 1*0.6 + 1*0.3 = 1.9, normalized: (1.9/20)*100 = 9.5 + self.assertEqual(freshness, 9.5) + + # Save and verify persistence + project.freshness = freshness + project.save() + + project.refresh_from_db() + self.assertEqual(float(project.freshness), 9.5) diff --git a/website/tests/test_project_freshness.py b/website/tests/test_project_freshness.py new file mode 100644 index 0000000000..8ecabebb71 --- /dev/null +++ b/website/tests/test_project_freshness.py @@ -0,0 +1,51 @@ +""" +Tests for Project freshness calculation functionality. +""" +from datetime import timedelta + +from django.test import TestCase +from django.utils import timezone + +from website.models import Organization, Project, Repo + + +class ProjectFreshnessCalculationTestCase(TestCase): + def setUp(self): + self.org = Organization.objects.create(name="Test Organization", url="https://test.org") + self.project = Project.objects.create( + name="Test Project", organization=self.org, url="https://github.com/test/project" + ) + self.now = timezone.now() + + def test_freshness_no_repos(self): + freshness = self.project.calculate_freshness() + self.assertEqual(freshness, 0.0) + + def test_freshness_all_archived_repos(self): + Repo.objects.create( + project=self.project, + name="archived-repo", + repo_url="https://github.com/test/archived", + is_archived=True, + ) + freshness = self.project.calculate_freshness() + self.assertEqual(freshness, 0.0) + + def test_freshness_ignores_archived_and_counts_active(self): + Repo.objects.create( + project=self.project, + name="active", + repo_url="https://github.com/test/active", + is_archived=False, + last_commit_date=self.now - timedelta(days=2), + ) + Repo.objects.create( + project=self.project, + name="archived", + repo_url="https://github.com/test/archived", + is_archived=True, + last_commit_date=self.now - timedelta(days=1), + ) + + freshness = self.project.calculate_freshness() + self.assertGreater(freshness, 0.0) diff --git a/website/tests/test_update_project_freshness_command.py b/website/tests/test_update_project_freshness_command.py new file mode 100644 index 0000000000..57495d535b --- /dev/null +++ b/website/tests/test_update_project_freshness_command.py @@ -0,0 +1,136 @@ +""" +Tests for the update_project_freshness management command. +""" +from datetime import timedelta +from io import StringIO +from unittest.mock import patch + +from django.core.management import call_command +from django.test import TestCase +from django.utils import timezone + +from website.models import Organization, Project, Repo + + +class UpdateProjectFreshnessCommandTestCase(TestCase): + """Test cases for update_project_freshness management command""" + + def setUp(self): + """Set up test data""" + self.org = Organization.objects.create(name="Test Org", url="https://test.org") + self.now = timezone.now() + + def test_command_updates_all_projects(self): + """Test that command updates freshness for all projects""" + # Create projects with different activity levels + project1 = Project.objects.create( + name="Active Project", organization=self.org, url="https://github.com/test/active" + ) + Repo.objects.create( + project=project1, + name="active-repo", + repo_url="https://github.com/test/active-repo", + is_archived=False, + last_commit_date=self.now - timedelta(days=2), + ) + + project2 = Project.objects.create( + name="Inactive Project", organization=self.org, url="https://github.com/test/inactive" + ) + Repo.objects.create( + project=project2, + name="old-repo", + repo_url="https://github.com/test/old-repo", + is_archived=False, + last_commit_date=self.now - timedelta(days=100), + ) + + project3 = Project.objects.create( + name="No Repos Project", organization=self.org, url="https://github.com/test/empty" + ) + + # Run command + out = StringIO() + call_command("update_project_freshness", stdout=out) + + # Verify all projects were updated + project1.refresh_from_db() + project2.refresh_from_db() + project3.refresh_from_db() + + self.assertGreater(project1.freshness, 0.0) + self.assertEqual(project2.freshness, 0.0) + self.assertEqual(project3.freshness, 0.0) + + # Check output + output = out.getvalue() + self.assertIn("Starting freshness update", output) + self.assertIn("Processed: 3", output) + self.assertIn("Errors: 0", output) + self.assertIn("Freshness update completed", output) + + def test_command_handles_errors_gracefully(self): + """Test that command handles individual project errors without stopping""" + project1 = Project.objects.create( + name="Good Project", organization=self.org, url="https://github.com/test/good" + ) + Repo.objects.create( + project=project1, + name="good-repo", + repo_url="https://github.com/test/good-repo", + is_archived=False, + last_commit_date=self.now - timedelta(days=5), + ) + + project2 = Project.objects.create( + name="Error Project", organization=self.org, url="https://github.com/test/error" + ) + + out = StringIO() + err = StringIO() + + # Mock calculate_freshness to raise error for one project + original_calculate = Project.calculate_freshness + + def mock_calculate(self): + if self.name == "Error Project": + raise ValueError("Test error") + return original_calculate(self) + + with patch.object(Project, "calculate_freshness", mock_calculate): + call_command("update_project_freshness", stdout=out, stderr=err) + + # Check that good project was updated + project1.refresh_from_db() + self.assertEqual(float(project1.freshness), 5.0) + + # Check error was logged + error_output = err.getvalue() + self.assertIn(f"[ERROR] Project ID {project2.id}", error_output) + self.assertIn("Test error", error_output) + + # Check summary shows 1 error + output = out.getvalue() + self.assertIn("Processed: 1", output) + self.assertIn("Errors: 1", output) + + def test_command_execution_time_reported(self): + """Test that command reports execution time""" + Project.objects.create(name="Test Project", organization=self.org, url="https://github.com/test/project") + + out = StringIO() + call_command("update_project_freshness", stdout=out) + + output = out.getvalue() + self.assertIn("Execution time:", output) + self.assertIn("s", output) # Should have 's' for seconds + + def test_command_with_zero_projects(self): + """Test command behavior when there are no projects""" + out = StringIO() + call_command("update_project_freshness", stdout=out) + + output = out.getvalue() + self.assertIn("Starting freshness update for 0 projects", output) + self.assertIn("Processed: 0", output) + self.assertIn("Errors: 0", output) From 9e436d69813af29482b2b5006b821c71f501b872 Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Wed, 17 Dec 2025 14:03:59 +0000 Subject: [PATCH 03/13] n+1 queries resolved --- website/models.py | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/website/models.py b/website/models.py index 7e9149fdcf..4dcc2b6555 100644 --- a/website/models.py +++ b/website/models.py @@ -1401,7 +1401,6 @@ class Project(models.Model): def calculate_freshness(self): """ Calculate freshness using a Bumper-style activity decay model, - based on repository update recency (proxy for activity graph). """ now = timezone.now() @@ -1409,34 +1408,31 @@ def calculate_freshness(self): last_30_days = now - timedelta(days=30) last_90_days = now - timedelta(days=90) - # Start from non-archived repos for this project - qs = self.repos.filter(is_archived=False) - - if not qs.exists(): + repos = getattr(self, "repos", None) + if not repos: return 0.0 - qs = qs.exclude(last_commit_date__isnull=True) - - active_7 = qs.filter(last_commit_date__gte=last_7_days).count() - - active_30 = qs.filter( - last_commit_date__lt=last_7_days, - last_commit_date__gte=last_30_days, - ).count() - - active_90 = qs.filter( - last_commit_date__lt=last_30_days, - last_commit_date__gte=last_90_days, - ).count() + counts = repos.filter(is_archived=False).aggregate( + active_7=Count( + "id", + filter=Q(updated_at__gte=last_7_days), + ), + active_30=Count( + "id", + filter=Q(updated_at__lt=last_7_days, updated_at__gte=last_30_days), + ), + active_90=Count( + "id", + filter=Q(updated_at__lt=last_30_days, updated_at__gte=last_90_days), + ), + ) - # Bumper-style decay weights - raw_score = active_7 * 1.0 + active_30 * 0.6 + active_90 * 0.3 + raw_score = counts["active_7"] * 1.0 + counts["active_30"] * 0.6 + counts["active_90"] * 0.3 if raw_score == 0: return 0.0 - # Normalize to 0–100 - MAX_SCORE = 20 # ~20 repos active recently = very fresh + MAX_SCORE = 20 freshness = min((raw_score / MAX_SCORE) * 100, 100) return round(freshness, 2) From 57a9cf4b3004ba1cecb584a397c5ac495884c8c4 Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Wed, 17 Dec 2025 15:33:12 +0000 Subject: [PATCH 04/13] added more tests --- website/models.py | 22 ++++++++++++++++------ website/tests/test_project_freshness.py | 23 +++++++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/website/models.py b/website/models.py index 4dcc2b6555..c8cdf9a8bd 100644 --- a/website/models.py +++ b/website/models.py @@ -21,7 +21,7 @@ from django.core.files.storage import default_storage from django.core.validators import MaxValueValidator, MinValueValidator, URLValidator from django.db import models, transaction -from django.db.models import Count, F +from django.db.models import Count, F, Q from django.db.models.signals import post_delete, post_save from django.dispatch import receiver from django.urls import reverse @@ -1401,6 +1401,7 @@ class Project(models.Model): def calculate_freshness(self): """ Calculate freshness using a Bumper-style activity decay model, + based on GitHub commit recency. """ now = timezone.now() @@ -1412,18 +1413,27 @@ def calculate_freshness(self): if not repos: return 0.0 - counts = repos.filter(is_archived=False).aggregate( + counts = repos.filter( + is_archived=False, + last_commit_date__isnull=False, + ).aggregate( active_7=Count( "id", - filter=Q(updated_at__gte=last_7_days), + filter=Q(last_commit_date__gte=last_7_days), ), active_30=Count( "id", - filter=Q(updated_at__lt=last_7_days, updated_at__gte=last_30_days), + filter=Q( + last_commit_date__lt=last_7_days, + last_commit_date__gte=last_30_days, + ), ), active_90=Count( "id", - filter=Q(updated_at__lt=last_30_days, updated_at__gte=last_90_days), + filter=Q( + last_commit_date__lt=last_30_days, + last_commit_date__gte=last_90_days, + ), ), ) @@ -1432,7 +1442,7 @@ def calculate_freshness(self): if raw_score == 0: return 0.0 - MAX_SCORE = 20 + MAX_SCORE = 20 # ~20 actively maintained repos = fully fresh freshness = min((raw_score / MAX_SCORE) * 100, 100) return round(freshness, 2) diff --git a/website/tests/test_project_freshness.py b/website/tests/test_project_freshness.py index 8ecabebb71..263349e7e7 100644 --- a/website/tests/test_project_freshness.py +++ b/website/tests/test_project_freshness.py @@ -49,3 +49,26 @@ def test_freshness_ignores_archived_and_counts_active(self): freshness = self.project.calculate_freshness() self.assertGreater(freshness, 0.0) + + def test_freshness_exact_boundary_7_days(self): + Repo.objects.create( + project=self.project, + name="boundary-7", + repo_url="https://github.com/test/boundary-7", + is_archived=False, + last_commit_date=timezone.now() - timedelta(days=6, hours=23), + ) + freshness = self.project.calculate_freshness() + self.assertEqual(freshness, 5.0) + + def test_freshness_max_score_capping(self): + for i in range(25): + Repo.objects.create( + project=self.project, + name=f"repo-{i}", + repo_url=f"https://github.com/test/repo-{i}", + is_archived=False, + last_commit_date=self.now - timedelta(days=1), + ) + freshness = self.project.calculate_freshness() + self.assertEqual(freshness, 100.0) From 889787816f8d07f61f160bcd3d3107b23701b75b Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Wed, 17 Dec 2025 15:58:29 +0000 Subject: [PATCH 05/13] added missing last_commit_date --- website/tests/test_project_freshness.py | 1 + 1 file changed, 1 insertion(+) diff --git a/website/tests/test_project_freshness.py b/website/tests/test_project_freshness.py index 263349e7e7..a10b209b77 100644 --- a/website/tests/test_project_freshness.py +++ b/website/tests/test_project_freshness.py @@ -27,6 +27,7 @@ def test_freshness_all_archived_repos(self): name="archived-repo", repo_url="https://github.com/test/archived", is_archived=True, + last_commit_date=self.now - timedelta(days=1), ) freshness = self.project.calculate_freshness() self.assertEqual(freshness, 0.0) From 0d15f53ea471f6ff99e69565a25223e97f4d371e Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Wed, 17 Dec 2025 16:17:15 +0000 Subject: [PATCH 06/13] nitpick --- website/models.py | 6 +----- website/tests/test_project_freshness.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/website/models.py b/website/models.py index c8cdf9a8bd..16fd5ed36a 100644 --- a/website/models.py +++ b/website/models.py @@ -1409,11 +1409,7 @@ def calculate_freshness(self): last_30_days = now - timedelta(days=30) last_90_days = now - timedelta(days=90) - repos = getattr(self, "repos", None) - if not repos: - return 0.0 - - counts = repos.filter( + counts = self.repos.filter( is_archived=False, last_commit_date__isnull=False, ).aggregate( diff --git a/website/tests/test_project_freshness.py b/website/tests/test_project_freshness.py index a10b209b77..2c1da038c3 100644 --- a/website/tests/test_project_freshness.py +++ b/website/tests/test_project_freshness.py @@ -57,7 +57,7 @@ def test_freshness_exact_boundary_7_days(self): name="boundary-7", repo_url="https://github.com/test/boundary-7", is_archived=False, - last_commit_date=timezone.now() - timedelta(days=6, hours=23), + last_commit_date=self.now() - timedelta(days=6, hours=23), ) freshness = self.project.calculate_freshness() self.assertEqual(freshness, 5.0) From 4550116ddc35e6c5ba425fba79454e0f8df90ed6 Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Wed, 17 Dec 2025 16:19:06 +0000 Subject: [PATCH 07/13] added missing tests --- website/tests/test_project_freshness.py | 59 +++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/website/tests/test_project_freshness.py b/website/tests/test_project_freshness.py index 2c1da038c3..6298dbbcb4 100644 --- a/website/tests/test_project_freshness.py +++ b/website/tests/test_project_freshness.py @@ -73,3 +73,62 @@ def test_freshness_max_score_capping(self): ) freshness = self.project.calculate_freshness() self.assertEqual(freshness, 100.0) + + def test_freshness_repo_with_null_last_commit_date(self): + """ + Repos with last_commit_date=None should be excluded from freshness calculation. + """ + Repo.objects.create( + project=self.project, + name="no-commit-data", + repo_url="https://github.com/test/no-commit", + is_archived=False, + last_commit_date=None, + ) + + freshness = self.project.calculate_freshness() + self.assertEqual(freshness, 0.0) + + def test_freshness_multiple_repos_across_time_windows(self): + """ + Test freshness calculation with repos spanning 7/30/90 day windows. + """ + # 2 repos in last 7 days + Repo.objects.create( + project=self.project, + name="recent-1", + repo_url="https://github.com/test/recent-1", + is_archived=False, + last_commit_date=self.now - timedelta(days=2), + ) + Repo.objects.create( + project=self.project, + name="recent-2", + repo_url="https://github.com/test/recent-2", + is_archived=False, + last_commit_date=self.now - timedelta(days=5), + ) + + # 1 repo in 8–30 day window + Repo.objects.create( + project=self.project, + name="medium", + repo_url="https://github.com/test/medium", + is_archived=False, + last_commit_date=self.now - timedelta(days=15), + ) + + # 1 repo in 31–90 day window + Repo.objects.create( + project=self.project, + name="older", + repo_url="https://github.com/test/older", + is_archived=False, + last_commit_date=self.now - timedelta(days=45), + ) + + freshness = self.project.calculate_freshness() + + # raw_score = 2*1.0 + 1*0.6 + 1*0.3 = 2.9 + # freshness = (2.9 / 20) * 100 = 14.5 + self.assertEqual(freshness, 14.5) From 2471406503d93a3cbd0d1d9bc23ab0a2baca638d Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Wed, 17 Dec 2025 16:32:33 +0000 Subject: [PATCH 08/13] runtime error resolved --- website/tests/test_project_freshness.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/tests/test_project_freshness.py b/website/tests/test_project_freshness.py index 6298dbbcb4..d406a86d53 100644 --- a/website/tests/test_project_freshness.py +++ b/website/tests/test_project_freshness.py @@ -57,7 +57,7 @@ def test_freshness_exact_boundary_7_days(self): name="boundary-7", repo_url="https://github.com/test/boundary-7", is_archived=False, - last_commit_date=self.now() - timedelta(days=6, hours=23), + last_commit_date=self.now - timedelta(days=6, hours=23), ) freshness = self.project.calculate_freshness() self.assertEqual(freshness, 5.0) From 3829d7ff11af36b21e2c8f192864c690a8ad9060 Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Thu, 18 Dec 2025 21:07:27 +0000 Subject: [PATCH 09/13] batch commit updated --- .../commands/update_project_freshness.py | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/website/management/commands/update_project_freshness.py b/website/management/commands/update_project_freshness.py index 76a0638e62..efaa1fd3f3 100644 --- a/website/management/commands/update_project_freshness.py +++ b/website/management/commands/update_project_freshness.py @@ -1,9 +1,12 @@ import time from django.core.management.base import BaseCommand +from django.db import transaction from website.models import Project +BATCH_SIZE = 500 + class Command(BaseCommand): help = "Recalculate and update freshness score for all projects" @@ -11,26 +14,29 @@ class Command(BaseCommand): def handle(self, *args, **options): start_time = time.time() - projects = Project.objects.all() - total = projects.count() + qs = Project.objects.only("id") + total = qs.count() processed = 0 errors = 0 self.stdout.write(f"Starting freshness update for {total} projects") - for idx, project in enumerate(projects, start=1): - try: - freshness = project.calculate_freshness() - project.freshness = freshness - project.save(update_fields=["freshness"]) - processed += 1 - except Exception as e: - errors += 1 - self.stderr.write(f"[ERROR] Project ID {project.id}: {str(e)}") - - if idx % 100 == 0: - self.stdout.write(f"Processed {idx}/{total} projects...") + for offset in range(0, total, BATCH_SIZE): + batch = qs[offset : offset + BATCH_SIZE] + + with transaction.atomic(): + for project in batch: + try: + freshness = project.calculate_freshness() + project.freshness = freshness + project.save(update_fields=["freshness"]) + processed += 1 + except Exception as e: + errors += 1 + self.stderr.write(f"[ERROR] Project ID {project.id}: {str(e)}") + + self.stdout.write(f"Processed {min(offset + BATCH_SIZE, total)}/{total} projects...") duration = round(time.time() - start_time, 2) From 08b37bff341eb5f5a4876924a96aa9ac49c14e36 Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Thu, 18 Dec 2025 21:13:49 +0000 Subject: [PATCH 10/13] rollback fixed --- .../commands/update_project_freshness.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/website/management/commands/update_project_freshness.py b/website/management/commands/update_project_freshness.py index efaa1fd3f3..35be2a9972 100644 --- a/website/management/commands/update_project_freshness.py +++ b/website/management/commands/update_project_freshness.py @@ -25,16 +25,16 @@ def handle(self, *args, **options): for offset in range(0, total, BATCH_SIZE): batch = qs[offset : offset + BATCH_SIZE] - with transaction.atomic(): - for project in batch: - try: + for project in batch: + try: + with transaction.atomic(): freshness = project.calculate_freshness() project.freshness = freshness project.save(update_fields=["freshness"]) - processed += 1 - except Exception as e: - errors += 1 - self.stderr.write(f"[ERROR] Project ID {project.id}: {str(e)}") + processed += 1 + except Exception as e: + errors += 1 + self.stderr.write(f"[ERROR] Project ID {project.id}: {str(e)}") self.stdout.write(f"Processed {min(offset + BATCH_SIZE, total)}/{total} projects...") From c934c8c34e97ef027da554d1fec1a4a6f005fbd7 Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Thu, 18 Dec 2025 21:20:18 +0000 Subject: [PATCH 11/13] concurrency fixed --- .../commands/update_project_freshness.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/website/management/commands/update_project_freshness.py b/website/management/commands/update_project_freshness.py index 35be2a9972..7bc16210c1 100644 --- a/website/management/commands/update_project_freshness.py +++ b/website/management/commands/update_project_freshness.py @@ -23,18 +23,16 @@ def handle(self, *args, **options): self.stdout.write(f"Starting freshness update for {total} projects") for offset in range(0, total, BATCH_SIZE): - batch = qs[offset : offset + BATCH_SIZE] - - for project in batch: - try: - with transaction.atomic(): - freshness = project.calculate_freshness() - project.freshness = freshness + try: + with transaction.atomic(): + batch = list(qs.select_for_update()[offset : offset + BATCH_SIZE]) + for project in batch: + project.freshness = project.calculate_freshness() project.save(update_fields=["freshness"]) - processed += 1 - except Exception as e: - errors += 1 - self.stderr.write(f"[ERROR] Project ID {project.id}: {str(e)}") + processed += 1 + except Exception as e: + errors += 1 + self.stderr.write(f"[ERROR] Project ID {project.id}: {str(e)}") self.stdout.write(f"Processed {min(offset + BATCH_SIZE, total)}/{total} projects...") From 6e3ebb2deb5bf621ab0e2caa884c955f7a389caf Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Thu, 18 Dec 2025 21:28:16 +0000 Subject: [PATCH 12/13] per-project transactions to avoid batch-wide rollbacks --- .../commands/update_project_freshness.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/website/management/commands/update_project_freshness.py b/website/management/commands/update_project_freshness.py index 7bc16210c1..313a6e1366 100644 --- a/website/management/commands/update_project_freshness.py +++ b/website/management/commands/update_project_freshness.py @@ -23,16 +23,20 @@ def handle(self, *args, **options): self.stdout.write(f"Starting freshness update for {total} projects") for offset in range(0, total, BATCH_SIZE): - try: - with transaction.atomic(): - batch = list(qs.select_for_update()[offset : offset + BATCH_SIZE]) - for project in batch: + batch_ids = list(qs.values_list("id", flat=True)[offset : offset + BATCH_SIZE]) + + for project_id in batch_ids: + try: + with transaction.atomic(): + project = Project.objects.select_for_update().get(pk=project_id) + project.freshness = project.calculate_freshness() project.save(update_fields=["freshness"]) processed += 1 - except Exception as e: - errors += 1 - self.stderr.write(f"[ERROR] Project ID {project.id}: {str(e)}") + + except Exception as e: + errors += 1 + self.stderr.write(f"[ERROR] Project ID {project_id}: {str(e)}") self.stdout.write(f"Processed {min(offset + BATCH_SIZE, total)}/{total} projects...") From ecd3df702fa9ac65683957d2919b22fcd395c9f5 Mon Sep 17 00:00:00 2001 From: Nachiket Roy Date: Thu, 18 Dec 2025 21:44:16 +0000 Subject: [PATCH 13/13] Progress message imporoved --- website/management/commands/update_project_freshness.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/website/management/commands/update_project_freshness.py b/website/management/commands/update_project_freshness.py index 313a6e1366..4efb881862 100644 --- a/website/management/commands/update_project_freshness.py +++ b/website/management/commands/update_project_freshness.py @@ -38,7 +38,10 @@ def handle(self, *args, **options): errors += 1 self.stderr.write(f"[ERROR] Project ID {project_id}: {str(e)}") - self.stdout.write(f"Processed {min(offset + BATCH_SIZE, total)}/{total} projects...") + self.stdout.write( + f"Progress: {min(offset + BATCH_SIZE, total)}/{total} attempted " + f"({processed} successful, {errors} errors)" + ) duration = round(time.time() - start_time, 2)