-
-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Attempt to move Pageview deletion to a background task #12386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b48f542
822ce61
94fdd15
d2a6280
2accaed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Generated by Django 5.2.3 on 2025-08-06 18:28 | ||
|
||
import django.db.models.deletion | ||
from django.db import migrations | ||
from django.db import models | ||
from django_safemigrate import Safe | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("analytics", "0007_index_on_pageview_date"), | ||
("builds", "0064_healthcheck"), | ||
("projects", "0152_create_gh_app_integration"), | ||
] | ||
|
||
safe = Safe.before_deploy() | ||
|
||
operations = [ | ||
migrations.AlterField( | ||
model_name="pageview", | ||
name="project", | ||
field=models.ForeignKey( | ||
on_delete=django.db.models.deletion.DO_NOTHING, | ||
related_name="page_views", | ||
to="projects.project", | ||
), | ||
), | ||
migrations.AlterField( | ||
model_name="pageview", | ||
name="version", | ||
field=models.ForeignKey( | ||
null=True, | ||
on_delete=django.db.models.deletion.DO_NOTHING, | ||
related_name="page_views", | ||
to="builds.version", | ||
verbose_name="Version", | ||
), | ||
), | ||
] |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we already have a |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import structlog | ||
|
||
from readthedocs.analytics.models import PageView | ||
from readthedocs.worker import app | ||
|
||
|
||
log = structlog.get_logger(__name__) | ||
|
||
|
||
@app.task(queue="web") | ||
def delete_project_pageviews(project_slug, version_slug=None): | ||
""" | ||
Delete all PageView objects for a given project slug, or a specific version if version_slug is provided. | ||
""" | ||
queryset = PageView.objects.filter(project__slug=project_slug) | ||
if version_slug is not None: | ||
queryset = queryset.filter(version__slug=version_slug) | ||
count, _ = queryset.delete() | ||
log.info( | ||
"Deleted PageViews for project", | ||
project_slug=project_slug, | ||
version_slug=version_slug, | ||
count=count, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from django.contrib.auth import get_user_model | ||
from django.contrib.auth.models import User | ||
from django.test import TestCase | ||
from readthedocs.projects.models import Project | ||
from readthedocs.builds.models import Version | ||
from readthedocs.analytics.models import PageView | ||
from django.utils import timezone | ||
from django.conf import settings | ||
from django_dynamic_fixture import get | ||
|
||
class PageViewTaskTests(TestCase): | ||
|
||
def setUp(self): | ||
self.user = get(User) | ||
self.project = get(Project, users=[self.user], slug="test-project") | ||
self.project.save() | ||
|
||
# Create some PageViews for the project | ||
get( | ||
PageView, | ||
project=self.project, | ||
path="/index.html", | ||
full_path="/en/latest/index.html", | ||
view_count=5, | ||
date=timezone.now().date(), | ||
status=200, | ||
) | ||
get( | ||
PageView, | ||
project=self.project, | ||
path="/about.html", | ||
full_path="/en/latest/about.html", | ||
view_count=2, | ||
date=timezone.now().date(), | ||
status=200, | ||
) | ||
|
||
def test_pageview_cleanup(self): | ||
# The PageViews should be present initially | ||
self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 2) | ||
|
||
# Delete the project | ||
self.project.delete() | ||
|
||
# The PageViews should be deleted by the background task | ||
self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 0) | ||
Comment on lines
+45
to
+46
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are testing the pageviews were deleted, but we don't know if they were deleted because of the task or the DB cascade. I think you want to assert that your task was called here. |
||
|
||
def test_pageview_cleanup_on_version_delete(self): | ||
# Create a version for the project | ||
version = get(Version, project=self.project, slug="v1.0", verbose_name="v1.0") | ||
version.save() | ||
|
||
# Create PageViews for this version | ||
get( | ||
PageView, | ||
project=self.project, | ||
version=version, | ||
path="/v1.0/index.html", | ||
full_path="/en/v1.0/index.html", | ||
view_count=3, | ||
date=timezone.now().date(), | ||
status=200, | ||
) | ||
|
||
# There should be 3 PageViews now (2 from setUp, 1 for version) | ||
self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 3) | ||
self.assertEqual(PageView.objects.filter(version__slug=version.slug).count(), 1) | ||
|
||
# Delete the version | ||
version.delete() | ||
|
||
# The PageView for this version should be deleted, others remain | ||
self.assertEqual(PageView.objects.filter(project__slug='test-project').count(), 2) | ||
self.assertEqual(PageView.objects.filter(version__slug=version.slug).count(), 0) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
from django.db.models import Q | ||
from django.utils import timezone | ||
|
||
from readthedocs.analytics.tasks.pageviews import delete_project_pageviews | ||
from readthedocs.builds.constants import BUILD_FINAL_STATES | ||
from readthedocs.builds.constants import BUILD_STATE_CANCELLED | ||
from readthedocs.builds.constants import EXTERNAL | ||
|
@@ -95,6 +96,10 @@ def clean_project_resources(project, version=None, version_slug=None): | |
else: | ||
project.imported_files.all().delete() | ||
Comment on lines
96
to
97
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm curious why we're deleting these here explicitly -- was this a similar issue and we're trying to be more efficient with the delete? Should they also be done in a task in a similar way? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code probably does nothing, since we have CASCADE in the definition of the model: https://github.com/readthedocs/readthedocs.org/blob/main/readthedocs/projects/models.py#L1549-L1561 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When a version is de-activated, this function is also called. |
||
|
||
# Remove PageViews for this project async, | ||
# since they can be very slow to delete. | ||
Comment on lines
+99
to
+100
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be good to update the docstring of this function to reflect this. |
||
delete_project_pageviews.delay(project_slug=project.slug, version_slug=version_slug) | ||
|
||
|
||
@app.task() | ||
def finish_unhealthy_builds(): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suppose you need
SET_NULL
here, otherwise there will be db integrity issues.