Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions readthedocs/analytics/migrations/0008_no_cascade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Generated by Django 5.2.3 on 2025-08-06 18:28

import django.db.models.deletion
from django.db import migrations
from django.db import models
from django_safemigrate import Safe


class Migration(migrations.Migration):
dependencies = [
("analytics", "0007_index_on_pageview_date"),
("builds", "0064_healthcheck"),
("projects", "0152_create_gh_app_integration"),
]

safe = Safe.before_deploy()

operations = [
migrations.AlterField(
model_name="pageview",
name="project",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="page_views",
to="projects.project",
),
),
migrations.AlterField(
model_name="pageview",
name="version",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="page_views",
to="builds.version",
verbose_name="Version",
),
),
]
4 changes: 2 additions & 2 deletions readthedocs/analytics/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class PageView(models.Model):
project = models.ForeignKey(
Project,
related_name="page_views",
on_delete=models.CASCADE,
on_delete=models.DO_NOTHING,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose you need SET_NULL here, otherwise there will be db integrity issues.

)
# NOTE: this could potentially be removed,
# since isn't being used and not all page
Expand All @@ -68,7 +68,7 @@ class PageView(models.Model):
Version,
verbose_name=_("Version"),
related_name="page_views",
on_delete=models.CASCADE,
on_delete=models.DO_NOTHING,
null=True,
)
path = models.CharField(
Expand Down
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we already have a analytics/tasks.py file -- there is no need to create a new directory for this.

Empty file.
24 changes: 24 additions & 0 deletions readthedocs/analytics/tasks/pageviews.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import structlog

from readthedocs.analytics.models import PageView
from readthedocs.worker import app


log = structlog.get_logger(__name__)


@app.task(queue="web")
def delete_project_pageviews(project_slug, version_slug=None):
"""
Delete all PageView objects for a given project slug, or a specific version if version_slug is provided.
"""
queryset = PageView.objects.filter(project__slug=project_slug)
if version_slug is not None:
queryset = queryset.filter(version__slug=version_slug)
count, _ = queryset.delete()
log.info(
"Deleted PageViews for project",
project_slug=project_slug,
version_slug=version_slug,
count=count,
)
74 changes: 74 additions & 0 deletions readthedocs/analytics/tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from django.contrib.auth import get_user_model
from django.contrib.auth.models import User
from django.test import TestCase
from readthedocs.projects.models import Project
from readthedocs.builds.models import Version
from readthedocs.analytics.models import PageView
from django.utils import timezone
from django.conf import settings
from django_dynamic_fixture import get

class PageViewTaskTests(TestCase):

def setUp(self):
self.user = get(User)
self.project = get(Project, users=[self.user], slug="test-project")
self.project.save()

# Create some PageViews for the project
get(
PageView,
project=self.project,
path="/index.html",
full_path="/en/latest/index.html",
view_count=5,
date=timezone.now().date(),
status=200,
)
get(
PageView,
project=self.project,
path="/about.html",
full_path="/en/latest/about.html",
view_count=2,
date=timezone.now().date(),
status=200,
)

def test_pageview_cleanup(self):
# The PageViews should be present initially
self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 2)

# Delete the project
self.project.delete()

# The PageViews should be deleted by the background task
self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 0)
Comment on lines +45 to +46
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are testing the pageviews were deleted, but we don't know if they were deleted because of the task or the DB cascade. I think you want to assert that your task was called here.


def test_pageview_cleanup_on_version_delete(self):
# Create a version for the project
version = get(Version, project=self.project, slug="v1.0", verbose_name="v1.0")
version.save()

# Create PageViews for this version
get(
PageView,
project=self.project,
version=version,
path="/v1.0/index.html",
full_path="/en/v1.0/index.html",
view_count=3,
date=timezone.now().date(),
status=200,
)

# There should be 3 PageViews now (2 from setUp, 1 for version)
self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 3)
self.assertEqual(PageView.objects.filter(version__slug=version.slug).count(), 1)

# Delete the version
version.delete()

# The PageView for this version should be deleted, others remain
self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 2)
self.assertEqual(PageView.objects.filter(version__slug=version.slug).count(), 0)
2 changes: 1 addition & 1 deletion readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ def save(self, *args, **kwargs):
def delete(self, *args, **kwargs):
from readthedocs.projects.tasks.utils import clean_project_resources

# Remove extra resources
# Remove HTML files, analytics data, etc.
clean_project_resources(self)

super().delete(*args, **kwargs)
Expand Down
5 changes: 5 additions & 0 deletions readthedocs/projects/tasks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from django.db.models import Q
from django.utils import timezone

from readthedocs.analytics.tasks.pageviews import delete_project_pageviews
from readthedocs.builds.constants import BUILD_FINAL_STATES
from readthedocs.builds.constants import BUILD_STATE_CANCELLED
from readthedocs.builds.constants import EXTERNAL
Expand Down Expand Up @@ -95,6 +96,10 @@ def clean_project_resources(project, version=None, version_slug=None):
else:
project.imported_files.all().delete()
Comment on lines 96 to 97
Copy link
Member Author

@ericholscher ericholscher Aug 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why we're deleting these here explicitly -- was this a similar issue and we're trying to be more efficient with the delete? Should they also be done in a task in a similar way?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code probably does nothing, since we have CASCADE in the definition of the model: https://github.com/readthedocs/readthedocs.org/blob/main/readthedocs/projects/models.py#L1549-L1561

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a version is de-activated, this function is also called.


# Remove PageViews for this project async,
# since they can be very slow to delete.
Comment on lines +99 to +100
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to update the docstring of this function to reflect this.

delete_project_pageviews.delay(project_slug=project.slug, version_slug=version_slug)


@app.task()
def finish_unhealthy_builds():
Expand Down