diff --git a/readthedocs/analytics/migrations/0008_no_cascade.py b/readthedocs/analytics/migrations/0008_no_cascade.py new file mode 100644 index 00000000000..783ce120087 --- /dev/null +++ b/readthedocs/analytics/migrations/0008_no_cascade.py @@ -0,0 +1,39 @@ +# Generated by Django 5.2.3 on 2025-08-06 18:28 + +import django.db.models.deletion +from django.db import migrations +from django.db import models +from django_safemigrate import Safe + + +class Migration(migrations.Migration): + dependencies = [ + ("analytics", "0007_index_on_pageview_date"), + ("builds", "0064_healthcheck"), + ("projects", "0152_create_gh_app_integration"), + ] + + safe = Safe.before_deploy() + + operations = [ + migrations.AlterField( + model_name="pageview", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, + related_name="page_views", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="pageview", + name="version", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.DO_NOTHING, + related_name="page_views", + to="builds.version", + verbose_name="Version", + ), + ), + ] diff --git a/readthedocs/analytics/models.py b/readthedocs/analytics/models.py index 58813ce325e..81247892de1 100644 --- a/readthedocs/analytics/models.py +++ b/readthedocs/analytics/models.py @@ -59,7 +59,7 @@ class PageView(models.Model): project = models.ForeignKey( Project, related_name="page_views", - on_delete=models.CASCADE, + on_delete=models.DO_NOTHING, ) # NOTE: this could potentially be removed, # since isn't being used and not all page @@ -68,7 +68,7 @@ class PageView(models.Model): Version, verbose_name=_("Version"), related_name="page_views", - on_delete=models.CASCADE, + on_delete=models.DO_NOTHING, null=True, ) path = models.CharField( diff --git a/readthedocs/analytics/tasks/__init__.py b/readthedocs/analytics/tasks/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/analytics/tasks/pageviews.py b/readthedocs/analytics/tasks/pageviews.py new file mode 100644 index 00000000000..1a9f96e410f --- /dev/null +++ b/readthedocs/analytics/tasks/pageviews.py @@ -0,0 +1,24 @@ +import structlog + +from readthedocs.analytics.models import PageView +from readthedocs.worker import app + + +log = structlog.get_logger(__name__) + + +@app.task(queue="web") +def delete_project_pageviews(project_slug, version_slug=None): + """ + Delete all PageView objects for a given project slug, or a specific version if version_slug is provided. + """ + queryset = PageView.objects.filter(project__slug=project_slug) + if version_slug is not None: + queryset = queryset.filter(version__slug=version_slug) + count, _ = queryset.delete() + log.info( + "Deleted PageViews for project", + project_slug=project_slug, + version_slug=version_slug, + count=count, + ) diff --git a/readthedocs/analytics/tests/test_tasks.py b/readthedocs/analytics/tests/test_tasks.py new file mode 100644 index 00000000000..89d7efb02d8 --- /dev/null +++ b/readthedocs/analytics/tests/test_tasks.py @@ -0,0 +1,74 @@ +from django.contrib.auth import get_user_model +from django.contrib.auth.models import User +from django.test import TestCase +from readthedocs.projects.models import Project +from readthedocs.builds.models import Version +from readthedocs.analytics.models import PageView +from django.utils import timezone +from django.conf import settings +from django_dynamic_fixture import get + +class PageViewTaskTests(TestCase): + + def setUp(self): + self.user = get(User) + self.project = get(Project, users=[self.user], slug="test-project") + self.project.save() + + # Create some PageViews for the project + get( + PageView, + project=self.project, + path="/index.html", + full_path="/en/latest/index.html", + view_count=5, + date=timezone.now().date(), + status=200, + ) + get( + PageView, + project=self.project, + path="/about.html", + full_path="/en/latest/about.html", + view_count=2, + date=timezone.now().date(), + status=200, + ) + + def test_pageview_cleanup(self): + # The PageViews should be present initially + self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 2) + + # Delete the project + self.project.delete() + + # The PageViews should be deleted by the background task + self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 0) + + def test_pageview_cleanup_on_version_delete(self): + # Create a version for the project + version = get(Version, project=self.project, slug="v1.0", verbose_name="v1.0") + version.save() + + # Create PageViews for this version + get( + PageView, + project=self.project, + version=version, + path="/v1.0/index.html", + full_path="/en/v1.0/index.html", + view_count=3, + date=timezone.now().date(), + status=200, + ) + + # There should be 3 PageViews now (2 from setUp, 1 for version) + self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 3) + self.assertEqual(PageView.objects.filter(version__slug=version.slug).count(), 1) + + # Delete the version + version.delete() + + # The PageView for this version should be deleted, others remain + self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 2) + self.assertEqual(PageView.objects.filter(version__slug=version.slug).count(), 0) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 907abf5d51d..40cc0a76f95 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -671,7 +671,7 @@ def save(self, *args, **kwargs): def delete(self, *args, **kwargs): from readthedocs.projects.tasks.utils import clean_project_resources - # Remove extra resources + # Remove HTML files, analytics data, etc. clean_project_resources(self) super().delete(*args, **kwargs) diff --git a/readthedocs/projects/tasks/utils.py b/readthedocs/projects/tasks/utils.py index 9a1fa8a0184..5e90438353c 100644 --- a/readthedocs/projects/tasks/utils.py +++ b/readthedocs/projects/tasks/utils.py @@ -9,6 +9,7 @@ from django.db.models import Q from django.utils import timezone +from readthedocs.analytics.tasks.pageviews import delete_project_pageviews from readthedocs.builds.constants import BUILD_FINAL_STATES from readthedocs.builds.constants import BUILD_STATE_CANCELLED from readthedocs.builds.constants import EXTERNAL @@ -95,6 +96,10 @@ def clean_project_resources(project, version=None, version_slug=None): else: project.imported_files.all().delete() + # Remove PageViews for this project async, + # since they can be very slow to delete. + delete_project_pageviews.delay(project_slug=project.slug, version_slug=version_slug) + @app.task() def finish_unhealthy_builds():