From 282c84055743eeffbe17af0297a11254763fbe90 Mon Sep 17 00:00:00 2001 From: Andrew Cheng Date: Thu, 2 Jul 2026 16:18:37 +0000 Subject: [PATCH 1/2] Add a base_version filter --- pulpcore/app/viewsets/content.py | 6 +++ pulpcore/app/viewsets/custom_filters.py | 51 ++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/pulpcore/app/viewsets/content.py b/pulpcore/app/viewsets/content.py index 0fd5f7024f6..48a1481d885 100644 --- a/pulpcore/app/viewsets/content.py +++ b/pulpcore/app/viewsets/content.py @@ -23,6 +23,7 @@ ContentAddedRepositoryVersionFilter, ContentRemovedRepositoryVersionFilter, ContentRepositoryVersionFilter, + RepositoryVersionBaseFilter, ) @@ -125,6 +126,10 @@ class ContentFilter(BaseFilterSet): Return Content which was added in this repository version. repository_version_removed: Return Content which was removed from this repository version. + base_version: + When combined with repository_version_added / repository_version_removed, compute the + net difference relative to this base repository version instead of the filtered + version's immediate predecessor. Has no effect on its own. orphaned_for: Return Content which has been orphaned for a given number of minutes; -1 uses ORPHAN_PROTECTION_TIME value. @@ -135,6 +140,7 @@ class ContentFilter(BaseFilterSet): repository_version = ContentRepositoryVersionFilter() repository_version_added = ContentAddedRepositoryVersionFilter() repository_version_removed = ContentRemovedRepositoryVersionFilter() + base_version = RepositoryVersionBaseFilter() orphaned_for = OrphanedFilter( help_text="Minutes Content has been orphaned for. -1 uses ORPHAN_PROTECTION_TIME." ) diff --git a/pulpcore/app/viewsets/custom_filters.py b/pulpcore/app/viewsets/custom_filters.py index 72db8022db7..468802055d7 100644 --- a/pulpcore/app/viewsets/custom_filters.py +++ b/pulpcore/app/viewsets/custom_filters.py @@ -162,6 +162,53 @@ def filter(self, qs, value): """ raise NotImplementedError() + def get_base_version(self, field_name="base_version"): + """ + Resolve the companion ``base_version`` filter, if the request supplied one. + + The added/removed filters use this to diff against an *arbitrary* base repository + version instead of the filtered version's immediate predecessor. + + Args: + field_name (string): The name of the companion base-version filter on the filterset. + + Returns: + pulpcore.app.models.RepositoryVersion or None: The resolved base version, or None + when no base version was supplied. + """ + if self.parent is None: + return None + base_value = self.parent.form.cleaned_data.get(field_name) + if not base_value: + return None + return self.get_repository_version(base_value) + + +class RepositoryVersionBaseFilter(RepoVersionHrefPrnFilter): + """ + Companion filter that designates the base repository version for + ``repository_version_added`` / ``repository_version_removed`` diffs. + + On its own this filter does not alter the queryset. It is consumed by the added/removed + filters to compute the net difference between two arbitrary repository versions, rather than + the single-step difference against the filtered version's immediate predecessor. + """ + + def __init__(self, *args, **kwargs): + kwargs.setdefault( + "help_text", + _( + "Repository Version referenced by HREF/PRN to use as the base for " + "repository_version_added / repository_version_removed. When set, added/removed " + "content is computed relative to this version instead of the immediate predecessor." + ), + ) + super().__init__(*args, **kwargs) + + def filter(self, qs, value): + # No-op on its own; the value is consumed by the added/removed filters. + return qs + class RepositoryVersionFilter(RepoVersionHrefPrnFilter): """ @@ -251,7 +298,7 @@ def filter(self, qs, value): return qs repo_version = self.get_repository_version(value) - return qs.filter(pk__in=repo_version.added()) + return qs.filter(pk__in=repo_version.added(base_version=self.get_base_version())) class ContentRemovedRepositoryVersionFilter(RepoVersionHrefPrnFilter): @@ -273,7 +320,7 @@ def filter(self, qs, value): return qs repo_version = self.get_repository_version(value) - return qs.filter(pk__in=repo_version.removed()) + return qs.filter(pk__in=repo_version.removed(base_version=self.get_base_version())) class CharInFilter(BaseInFilter, CharFilter): From 3b22794120a0c2eee70d9f5c3459e07d3b8587b6 Mon Sep 17 00:00:00 2001 From: Andrew Cheng Date: Thu, 2 Jul 2026 20:56:48 +0000 Subject: [PATCH 2/2] implement unnesting and add a temporary changelog --- CHANGES/+base-version-content-filter.feature | 6 ++++ pulpcore/app/models/repository.py | 31 ++++++++++++++------ 2 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 CHANGES/+base-version-content-filter.feature diff --git a/CHANGES/+base-version-content-filter.feature b/CHANGES/+base-version-content-filter.feature new file mode 100644 index 00000000000..8037ffaf119 --- /dev/null +++ b/CHANGES/+base-version-content-filter.feature @@ -0,0 +1,6 @@ +Added a ``base_version`` filter to the content list endpoints. When combined with +``repository_version_added`` or ``repository_version_removed``, it returns the net set of content +added or removed between two arbitrary repository versions instead of only the single-step +difference against the filtered version's immediate predecessor. The diff is computed database-side +via ``unnest`` subqueries over each version's ``content_ids``, avoiding loading the arrays into +memory or passing them as per-query parameters. diff --git a/pulpcore/app/models/repository.py b/pulpcore/app/models/repository.py index 5470fb5d238..7c1ba1b1c0a 100644 --- a/pulpcore/app/models/repository.py +++ b/pulpcore/app/models/repository.py @@ -1000,13 +1000,26 @@ def get_content(self, content_qs=None): content_ids = self.content_ids if len(content_ids) >= 65535: # Workaround for PostgreSQL's limit on the number of parameters in a query - content_ids = ( - RepositoryVersion.objects.filter(pk=self.pk) - .annotate(cids=Func(F("content_ids"), function="unnest")) - .values_list("cids", flat=True) - ) + content_ids = self.content_ids_subquery() return content_qs.filter(pk__in=content_ids) + def content_ids_subquery(self): + """ + Return this version's ``content_ids`` as a database-side ``unnest`` subquery. + + Using a subquery keeps the content unit UUIDs inside PostgreSQL instead of loading the + whole array into Python and passing each UUID as a bound query parameter. This avoids the + per-query parameter limit and the memory/serialization cost for large repository versions. + + Returns: + django.db.models.QuerySet: A values queryset yielding the content unit UUIDs. + """ + return ( + RepositoryVersion.objects.filter(pk=self.pk) + .annotate(cids=Func(F("content_ids"), function="unnest")) + .values_list("cids", flat=True) + ) + @property def content(self): """ @@ -1119,8 +1132,8 @@ def added(self, base_version=None): if not base_version: return Content.objects.filter(version_memberships__version_added=self) - return Content.objects.filter(pk__in=self.content_ids).exclude( - pk__in=base_version.content_ids + return Content.objects.filter(pk__in=self.content_ids_subquery()).exclude( + pk__in=base_version.content_ids_subquery() ) def removed(self, base_version=None): @@ -1134,8 +1147,8 @@ def removed(self, base_version=None): if not base_version: return Content.objects.filter(version_memberships__version_removed=self) - return Content.objects.filter(pk__in=base_version.content_ids).exclude( - pk__in=self.content_ids + return Content.objects.filter(pk__in=base_version.content_ids_subquery()).exclude( + pk__in=self.content_ids_subquery() ) def contains(self, content):