From 8513855211b36cf612e817dd088baaf2db921aa1 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 01:36:52 +0200 Subject: [PATCH 01/19] perf(tags): batch_mode + per-batch bulk inheritance during import (Phase B Stage 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wraps the import / reimport hot loop in `tag_inheritance.batch_mode()` and bulk-applies inherited Product tags per batch *before* `post_process_findings_batch` dispatches, so rules / deduplication see inherited tags on `finding.tags`. Changes: - `process_findings` (DefaultImporter + DefaultReImporter) now runs its finding-creation loop inside `batch_mode()`. Per batch, right after `apply_import_tags_for_batch`, calls a new helper `apply_inherited_tags_for_findings(batch_findings)` that bulk-syncs inherited tags on the batch's Findings plus the Endpoints (V2) / Locations (V3) reachable from them via FK chain. Inheritance is therefore applied to the persisted children before the post-process task dispatches. - `inherit_instance_tags` in `dojo/tags_signals.py` now early-returns when `tag_inheritance.is_in_batch_mode()`, so the batch wrap transparently suppresses per-row inheritance work for any caller — including `bulk_create` cleanup paths that invoke it manually. `inherit_tags_on_instance` post_save delegates to that helper, so the gate also covers signal-driven fires. - `EndpointManager.get_or_create_endpoints` replaces its per-row `inherit_instance_tags(ep)` cleanup loop with a single `apply_inherited_tags_for_endpoints(created)` bulk call. Inside the importer the per-batch helper already covers these endpoints via `Endpoint.status_endpoint.finding`; the bulk call is kept as a defensive hook for any non-importer caller. - `propagate_tags_on_product_sync` (used by the product-tag-toggle Celery task) gains an early-exit when neither system-wide nor per-product inheritance is enabled, eliminating ~9 wasted reads per call on inheritance-off products. State transitions (toggling either flag) still trigger a full sweep through their existing signal handlers. - `Location` gains `iter_related_products()`: a related-manager (`self.products` + `self.findings`) implementation of `all_related_products()` that returns `list[Product]`. Callers that pre-issue `prefetch_related("products__product__tags", "findings__finding__test__engagement__product__tags")` get zero extra queries per Location. The existing JOIN'd `all_related_products()` is kept for the per-instance signal path where prefetching is not possible. - `_inherited_tag_names_for_location` (the per-Location callback used to compute the inherited target set) switches to `iter_related_products()`; both call sites (`propagate_tags_on_product_sync` V3 branch and `apply_inherited_tags_for_findings` V3 branch) now prefetch the chain. Query-count impact on `unittests/test_tag_inheritance_perf.py` (pins updated in this commit): | Hot path | Before | After | Δ | |-----------------------------------------|--------:|-------:|------:| | ZAP scan import V2 (19 findings) | 1385 | 477 | -908 | | ZAP scan import V3 | 1263 | 945 | -318 | | ZAP reimport no-change V2 | 69 | 75 | +6 | | ZAP reimport no-change V3 | 87 | 102 | +15 | | Product tag add → 100 locations (V3) | 316 | 125 | -191 | | Product tag remove → 100 locations (V3) | 266 | 75 | -191 | Small reimport-no-change regressions are the unavoidable per-batch helper read cost (2 reads × Finding + 2 reads × Endpoint/Location + 1 product tags read). Real-work imports drop significantly because per-row `_manage_inherited_tags` work no longer fires inside the loop. --- dojo/importers/default_importer.py | 19 +++++ dojo/importers/default_reimporter.py | 19 +++++ dojo/importers/endpoint_manager.py | 16 ++-- dojo/location/models.py | 34 ++++++++ dojo/product/helpers.py | 109 +++++++++++++++++++++++-- dojo/tags_signals.py | 7 ++ unittests/test_tag_inheritance_perf.py | 13 +-- 7 files changed, 197 insertions(+), 20 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 428b8ad01a7..b4e047225a0 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -5,6 +5,7 @@ from django.db.models.query_utils import Q from django.urls import reverse +from dojo import tag_inheritance from dojo.celery_dispatch import dojo_dispatch_task from dojo.finding import helper as finding_helper from dojo.importers.base_importer import BaseImporter, Parser @@ -18,6 +19,7 @@ Test_Import, ) from dojo.notifications.helper import async_create_notification +from dojo.product.helpers import apply_inherited_tags_for_findings from dojo.tag_utils import bulk_apply_parser_tags from dojo.utils import get_full_url, perform_product_grading from dojo.validators import clean_tags @@ -161,6 +163,19 @@ def process_findings( self, parsed_findings: list[Finding], **kwargs: dict, + ) -> list[Finding]: + # Whole hot loop runs under `batch_mode()`: per-row inheritance signals + # for the findings/endpoints/locations created below are suppressed. + # Inheritance is then applied in bulk per-batch (right before + # `post_process_findings_batch` dispatch) so rules/dedup see inherited + # tags on `finding.tags`. + with tag_inheritance.batch_mode(): + return self._process_findings_internal(parsed_findings, **kwargs) + + def _process_findings_internal( + self, + parsed_findings: list[Finding], + **kwargs: dict, ) -> list[Finding]: # Batched post-processing (no chord): dispatch a task per 1000 findings or on final finding batch_finding_ids: list[int] = [] @@ -266,6 +281,10 @@ def process_findings( findings_with_parser_tags.clear() # Apply import-time tags before post-processing so rules/deduplication see them. self.apply_import_tags_for_batch(batch_findings) + # Apply inherited Product tags to this batch's findings (and + # their endpoints/locations) BEFORE post_process_findings_batch + # dispatches, so rules/dedup see inherited tags on .tags. + apply_inherited_tags_for_findings(batch_findings) batch_findings.clear() finding_ids_batch = list(batch_finding_ids) batch_finding_ids.clear() diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 3df3c0bd4cc..62f6da12c6e 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -6,6 +6,7 @@ from django.db.models.query_utils import Q import dojo.finding.helper as finding_helper +from dojo import tag_inheritance from dojo.celery_dispatch import dojo_dispatch_task from dojo.finding.deduplication import ( find_candidates_for_deduplication_hash, @@ -24,6 +25,7 @@ Test, Test_Import, ) +from dojo.product.helpers import apply_inherited_tags_for_findings from dojo.tag_utils import bulk_apply_parser_tags from dojo.utils import perform_product_grading from dojo.validators import clean_tags @@ -263,6 +265,19 @@ def process_findings( the finding may be appended to a new or existing group based upon user selection at import time """ + # Whole hot loop runs under `batch_mode()`: per-row inheritance signals + # for the findings/endpoints/locations created below are suppressed. + # Inheritance is then applied in bulk per-batch (right before + # `post_process_findings_batch` dispatch) so rules/dedup see inherited + # tags on `finding.tags`. + with tag_inheritance.batch_mode(): + return self._process_findings_internal(parsed_findings, **kwargs) + + def _process_findings_internal( + self, + parsed_findings: list[Finding], + **kwargs: dict, + ) -> tuple[list[Finding], list[Finding], list[Finding], list[Finding]]: self.deduplication_algorithm = self.determine_deduplication_algorithm() # Only process findings with the same service value (or None) # Even though the service values is used in the hash_code calculation, @@ -422,6 +437,10 @@ def process_findings( findings_with_parser_tags.clear() # Apply import-time tags before post-processing so rules/deduplication see them. self.apply_import_tags_for_batch(batch_findings) + # Apply inherited Product tags to this batch's findings (and + # their endpoints/locations) BEFORE post_process_findings_batch + # dispatches, so rules/dedup see inherited tags on .tags. + apply_inherited_tags_for_findings(batch_findings) batch_findings.clear() finding_ids_batch = list(batch_finding_ids) batch_finding_ids.clear() diff --git a/dojo/importers/endpoint_manager.py b/dojo/importers/endpoint_manager.py index 21e9673ed8c..edf541cc762 100644 --- a/dojo/importers/endpoint_manager.py +++ b/dojo/importers/endpoint_manager.py @@ -14,7 +14,7 @@ Finding, Product, ) -from dojo.tags_signals import inherit_instance_tags +from dojo.product.helpers import apply_inherited_tags_for_endpoints logger = logging.getLogger(__name__) @@ -231,10 +231,16 @@ def get_or_create_endpoints(self) -> tuple[dict[EndpointUniqueKey, Endpoint], li if to_create: created = Endpoint.objects.bulk_create(to_create, batch_size=1000) endpoints_by_key.update(zip(to_create_keys, created, strict=True)) - # bulk_create bypasses post_save signals, so manually trigger tag inheritance - # this is not ideal, but we need to take a separate look at the tag inheritance feature itself later - for ep in created: - inherit_instance_tags(ep) + # bulk_create bypasses post_save so per-row inheritance signals never + # fire here. The importer hot path already covers these endpoints via + # the per-batch `apply_inherited_tags_for_findings` sweep (it picks + # them up through `Endpoint.status_finding.finding`), so this call is + # redundant for the importer. We keep a bulk call anyway as a defensive + # measure: if anything outside the importer ever bulk-creates endpoints + # through this manager, they still receive their inherited Product tags + # instead of silently missing them. The bulk helper costs ~2 queries + # when there's nothing to apply, vs N per-row signal fires. + apply_inherited_tags_for_endpoints(created) self._endpoints_to_create.clear() return endpoints_by_key, created diff --git a/dojo/location/models.py b/dojo/location/models.py index 48ffeb6878a..e617ad4e755 100644 --- a/dojo/location/models.py +++ b/dojo/location/models.py @@ -231,6 +231,40 @@ def all_related_products(self) -> QuerySet[Product]: | Q(engagement__test__finding__locations__location=self), ).distinct() + def iter_related_products(self) -> list[Product]: + """ + Prefetch-friendly equivalent of `all_related_products()`. + + Walks `self.products.all()` (LocationProductReference) and + `self.findings.all()` (LocationFindingReference -> Finding -> Test -> + Engagement -> Product) via Django related managers, so a caller that + already issued + + Location.objects.filter(...).prefetch_related( + "products__product__tags", + "findings__finding__test__engagement__product__tags", + ) + + gets every Product (and its tags) in 0 extra queries per Location. + + Use this method from bulk paths where many Locations are processed at + once. The original `all_related_products()` still issues a single + DISTINCT JOIN query and is kept for per-instance signal paths where + prefetching is not possible. + """ + seen: set[int] = set() + result: list[Product] = [] + for ref in self.products.all(): + if ref.product_id not in seen: + seen.add(ref.product_id) + result.append(ref.product) + for ref in self.findings.all(): + product = ref.finding.test.engagement.product + if product.id not in seen: + seen.add(product.id) + result.append(product) + return result + def products_to_inherit_tags_from(self) -> list[Product]: from dojo.utils import get_system_setting # noqa: PLC0415 system_wide_inherit = get_system_setting("enable_product_tag_inheritance") diff --git a/dojo/product/helpers.py b/dojo/product/helpers.py index 7bbb2937103..c1b86007e8a 100644 --- a/dojo/product/helpers.py +++ b/dojo/product/helpers.py @@ -9,6 +9,7 @@ from dojo.location.models import Location from dojo.models import Endpoint, Engagement, Finding, Product, Test from dojo.tag_utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances +from dojo.utils import get_system_setting logger = logging.getLogger(__name__) @@ -31,52 +32,142 @@ def propagate_tags_on_product_sync(product): Product's current tags, and applies adds/removes via the bulk tag helpers. Both `tags` and `inherited_tags` fields are kept in sync. """ - target_names = {tag.name for tag in product.tags.all()} + # Skip the full child sweep when inheritance is disabled both system-wide + # and on this product. Without this gate the importer hot path pays ~9 + # queries per scan (one product-tags read + one list/through-table read per + # child kind) even when no inheritance work is possible. State transitions + # (toggling the flag on/off) still trigger a full sweep via the m2m_changed + # handler on `Product.tags.through` and the per-product flag save handler. + if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): + return + inherited_tag_names = {tag.name for tag in product.tags.all()} logger.debug("Propagating tags from %s to all engagements", product) _sync_inheritance_for_qs( Engagement.objects.filter(product=product), - target_names_per_child=lambda _child: target_names, + target_names_per_child=lambda _child: inherited_tag_names, ) logger.debug("Propagating tags from %s to all tests", product) _sync_inheritance_for_qs( Test.objects.filter(engagement__product=product), - target_names_per_child=lambda _child: target_names, + target_names_per_child=lambda _child: inherited_tag_names, ) logger.debug("Propagating tags from %s to all findings", product) _sync_inheritance_for_qs( Finding.objects.filter(test__engagement__product=product), - target_names_per_child=lambda _child: target_names, + target_names_per_child=lambda _child: inherited_tag_names, ) if settings.V3_FEATURE_LOCATIONS: logger.debug("Propagating tags from %s to all locations", product) location_qs = Location.objects.filter( Q(products__product=product) | Q(findings__finding__test__engagement__product=product), - ).distinct() + ).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE) # Locations can be linked to multiple products, so the inherited target # is the union of every related product's tags. Compute per-location. _sync_inheritance_for_qs( location_qs, - target_names_per_child=_location_target_names, + target_names_per_child=_inherited_tag_names_for_location, ) else: logger.debug("Propagating tags from %s to all endpoints", product) _sync_inheritance_for_qs( Endpoint.objects.filter(product=product), - target_names_per_child=lambda _child: target_names, + target_names_per_child=lambda _child: inherited_tag_names, ) -def _location_target_names(location): +def apply_inherited_tags_for_endpoints(endpoints): + """ + Bulk inheritance for a list of Endpoints, e.g. those just created via + `Endpoint.objects.bulk_create` (which bypasses post_save signals). + + All endpoints are assumed to share a single Product — true for the + importer's `EndpointManager`, which is per-product. If callers ever + mix products, split the list before calling. + """ + if not endpoints: + return + product = endpoints[0].product + if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): + return + inherited_tag_names = {tag.name for tag in product.tags.all()} + _sync_inheritance_for_qs( + Endpoint.objects.filter(id__in=[e.pk for e in endpoints]), + target_names_per_child=lambda _child: inherited_tag_names, + ) + + +def apply_inherited_tags_for_findings(findings): + """ + Per-batch bulk inheritance for findings created during an import. + + Apply the owning Product's inherited tags to the given findings plus the + Endpoints (V2) / Locations (V3) reachable from them. Called from the + importer hot path right before each batch dispatches to + `post_process_findings_batch` so rules / deduplication see inherited tags + on `finding.tags`. + + Test and Engagement inheritance is handled by their own post_save handlers + (those run outside the importer's `batch_mode()`, so per-instance signal + work fires normally and applies inheritance on create). + """ + if not findings: + return + # Single-product invariant inside one importer call. Smart upload calls + # this per-product so the assumption holds there too. + product = findings[0].test.engagement.product + if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): + return + inherited_tag_names = {tag.name for tag in product.tags.all()} + finding_ids = [f.pk for f in findings] + + _sync_inheritance_for_qs( + Finding.objects.filter(id__in=finding_ids), + target_names_per_child=lambda _child: inherited_tag_names, + ) + if settings.V3_FEATURE_LOCATIONS: + _sync_inheritance_for_qs( + Location.objects.filter(findings__finding_id__in=finding_ids).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE), + target_names_per_child=_inherited_tag_names_for_location, + ) + else: + _sync_inheritance_for_qs( + Endpoint.objects.filter(status_endpoint__finding_id__in=finding_ids).distinct(), + target_names_per_child=lambda _child: inherited_tag_names, + ) + + +def _inherited_tag_names_for_location(location): + """ + Compute the tag-name set this Location should have as `inherited_tags`. + + Unlike Finding / Test / Engagement / Endpoint (each owned by exactly one + Product), a Location can be attached to multiple Products — directly via + `LocationProductReference` or indirectly via `LocationFindingReference` + -> Finding -> Test -> Engagement -> Product. The target inherited set is + therefore the UNION of every related Product's tags. + + Used as the `target_names_per_child` callback for `_sync_inheritance_for_qs` + on Location querysets; it must be called per Location because each Location + has its own set of related Products. Uses `iter_related_products()` so + that an upstream `prefetch_related(...)` reduces per-call cost to 0 + queries. + """ names: set[str] = set() - for related_product in location.all_related_products(): + for related_product in location.iter_related_products(): if related_product is None: continue names.update(tag.name for tag in related_product.tags.all()) return names +_LOCATION_PREFETCH_FOR_INHERITANCE = ( + "products__product__tags", + "findings__finding__test__engagement__product__tags", +) + + def _sync_inheritance_for_qs(queryset, *, target_names_per_child): """ Sync inherited_tags + tags for every child in `queryset` to its target tag set. diff --git a/dojo/tags_signals.py b/dojo/tags_signals.py index 93ac1886930..b0383bfa1f3 100644 --- a/dojo/tags_signals.py +++ b/dojo/tags_signals.py @@ -48,6 +48,12 @@ def make_inherited_tags_sticky(sender, instance, action, **kwargs): def inherit_instance_tags(instance): """Usually nothing to do when saving a model, except for new models?""" + # Suppress per-instance inheritance work inside an active batch. The + # caller (signal handler or bulk_create cleanup) need not know about + # batch_mode; whoever opened the batch is responsible for the bulk + # apply at exit. + if tag_inheritance.is_in_batch_mode(): + return if inherit_product_tags(instance): # TODO: Is this change OK to make? # tag_list = instance._tags_tagulous.get_tag_list() @@ -70,6 +76,7 @@ def inherit_tags_on_instance(sender, instance, created, **kwargs): # (create OR update), repeatedly re-applying inherited tags to children # whose tag state had not changed. Sticky enforcement on user-driven # tag edits is handled by `make_inherited_tags_sticky` (m2m_changed). + # `inherit_instance_tags` itself early-returns when a batch is active. if not created: return inherit_instance_tags(instance) diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index 2b1a3681a9a..8ee6410c205 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -379,13 +379,14 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 53 # V3 location paths. Pre-Phase-A: 4532 add, 4307 remove. - EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 316 - EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 266 + EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 125 + EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 75 @override_settings( CELERY_TASK_ALWAYS_EAGER=True, CELERY_TASK_EAGER_PROPAGATES=True, + SECURE_SSL_REDIRECT=False, ) @versioned_fixtures class TagInheritanceImportPerfBaselines(DojoAPITestCase): @@ -497,7 +498,7 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # import path because the previous process-global signal-disconnect was # narrower in scope (Location.tags.through only). Net-positive trade for # eliminating the threading bug; full Phase B reductions land in Stage 2. - EXPECTED_ZAP_IMPORT_V2 = 1385 - EXPECTED_ZAP_IMPORT_V3 = 1263 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 87 + EXPECTED_ZAP_IMPORT_V2 = 477 + EXPECTED_ZAP_IMPORT_V3 = 945 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 75 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 102 From 455871b00520c8239653ea56b5d00ab4e33f7851 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 10:27:05 +0200 Subject: [PATCH 02/19] refactor(tags): centralize inheritance helpers in dojo/tag_inheritance Move scattered tag-inheritance logic into dojo/tag_inheritance.py without changing runtime behavior: - _manage_inherited_tags relocated from dojo/models.py (replaced by a lazy-import shim to avoid an import cycle through dojo.utils). - get_products, inherit_product_tags, get_products_to_inherit_tags_from, propagate_inheritance, inherit_instance_tags, inherit_linked_instance_tags relocated from dojo/tags_signals.py; receivers there now delegate. - propagate_tags_on_product_sync, apply_inherited_tags_for_endpoints, apply_inherited_tags_for_findings, _sync_inheritance_for_qs and _inherited_tag_names_for_location relocated from dojo/product/helpers.py; the Celery wrapper propagate_tags_on_product stays in product/helpers.py. Backward-compat re-exports preserved at every original import site so external callers (dojo/location, dojo/importers, unittests, pro/) keep working unchanged. Adjusts unittests.test_tag_inheritance mock targets to the new module path. Bumps EXPECTED_ZAP_IMPORT_V2/V3 baselines (477->470, 945->938) to the actual query counts; the prior pin was already drifting on this branch before the move. --- dojo/models.py | 28 +- dojo/product/helpers.py | 222 +-------------- dojo/tag_inheritance.py | 368 ++++++++++++++++++++++++- dojo/tags_signals.py | 84 +----- unittests/test_tag_inheritance.py | 28 +- unittests/test_tag_inheritance_perf.py | 4 +- 6 files changed, 398 insertions(+), 336 deletions(-) diff --git a/dojo/models.py b/dojo/models.py index 420d85909eb..540a44bc27e 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -41,7 +41,7 @@ from polymorphic.managers import PolymorphicManager from polymorphic.models import PolymorphicModel from tagulous.models import TagField -from tagulous.models.managers import FakeTagRelatedManager +from tagulous.models.managers import FakeTagRelatedManager # noqa: F401 -- backward compat re-export from titlecase import titlecase from dojo.base_models.base import BaseModel @@ -111,27 +111,11 @@ def _get_statistics_for_queryset(qs, annotation_factory): def _manage_inherited_tags(obj, incoming_inherited_tags, potentially_existing_tags=None): - # get copies of the current tag lists - if potentially_existing_tags is None: - potentially_existing_tags = [] - current_inherited_tags = [] if isinstance(obj.inherited_tags, FakeTagRelatedManager) else [tag.name for tag in obj.inherited_tags.all()] - tag_list = potentially_existing_tags if isinstance(obj.tags, FakeTagRelatedManager) or len(potentially_existing_tags) > 0 else [tag.name for tag in obj.tags.all()] - # Clean existing tag list from the old inherited tags. This represents the tags on the object and not the product - cleaned_tag_list = [tag for tag in tag_list if tag not in current_inherited_tags] - # Add the incoming inherited tag list - if incoming_inherited_tags: - for tag in incoming_inherited_tags: - if tag not in cleaned_tag_list: - cleaned_tag_list.append(tag) - # Update the current list of inherited tags. iteratively do this because of tagulous object restraints - if isinstance(obj.inherited_tags, FakeTagRelatedManager): - obj.inherited_tags.set_tag_list(incoming_inherited_tags) - if incoming_inherited_tags: - obj.tags.set_tag_list(cleaned_tag_list) - else: - obj.inherited_tags.set(incoming_inherited_tags) - if incoming_inherited_tags: - obj.tags.set(cleaned_tag_list) + # Backward-compat shim. Implementation lives in dojo.tag_inheritance; lazy + # import keeps dojo.models loadable before dojo.tag_inheritance (which + # transitively imports dojo.utils -> dojo.models) is ready. + from dojo.tag_inheritance import _manage_inherited_tags as _impl # noqa: PLC0415 + return _impl(obj, incoming_inherited_tags, potentially_existing_tags=potentially_existing_tags) def copy_model_util(model_in_database, exclude_fields: list[str] | None = None): diff --git a/dojo/product/helpers.py b/dojo/product/helpers.py index c1b86007e8a..47a9e71bd34 100644 --- a/dojo/product/helpers.py +++ b/dojo/product/helpers.py @@ -1,15 +1,16 @@ import contextlib import logging -from collections import defaultdict - -from django.conf import settings -from django.db.models import Q from dojo.celery import app -from dojo.location.models import Location -from dojo.models import Endpoint, Engagement, Finding, Product, Test -from dojo.tag_utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances -from dojo.utils import get_system_setting +from dojo.models import Product +from dojo.tag_inheritance import ( + _LOCATION_PREFETCH_FOR_INHERITANCE, # noqa: F401 -- backward compat re-export + _inherited_tag_names_for_location, # noqa: F401 -- backward compat re-export + _sync_inheritance_for_qs, # noqa: F401 -- backward compat re-export + apply_inherited_tags_for_endpoints, # noqa: F401 -- backward compat re-export + apply_inherited_tags_for_findings, # noqa: F401 -- backward compat re-export + propagate_tags_on_product_sync, +) logger = logging.getLogger(__name__) @@ -19,208 +20,3 @@ def propagate_tags_on_product(product_id, *args, **kwargs): with contextlib.suppress(Product.DoesNotExist): product = Product.objects.get(id=product_id) propagate_tags_on_product_sync(product) - - -def propagate_tags_on_product_sync(product): - """ - Bulk-apply Product tag changes to all children using through-table SQL. - - Replaces the previous per-row `.save()` loop. For every child model owned - by the product (Engagement, Test, Finding, plus Endpoint or Location - depending on the V3_FEATURE_LOCATIONS flag), reads the existing - `inherited_tags` per child in one query, computes the diff against the - Product's current tags, and applies adds/removes via the bulk tag - helpers. Both `tags` and `inherited_tags` fields are kept in sync. - """ - # Skip the full child sweep when inheritance is disabled both system-wide - # and on this product. Without this gate the importer hot path pays ~9 - # queries per scan (one product-tags read + one list/through-table read per - # child kind) even when no inheritance work is possible. State transitions - # (toggling the flag on/off) still trigger a full sweep via the m2m_changed - # handler on `Product.tags.through` and the per-product flag save handler. - if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): - return - inherited_tag_names = {tag.name for tag in product.tags.all()} - - logger.debug("Propagating tags from %s to all engagements", product) - _sync_inheritance_for_qs( - Engagement.objects.filter(product=product), - target_names_per_child=lambda _child: inherited_tag_names, - ) - logger.debug("Propagating tags from %s to all tests", product) - _sync_inheritance_for_qs( - Test.objects.filter(engagement__product=product), - target_names_per_child=lambda _child: inherited_tag_names, - ) - logger.debug("Propagating tags from %s to all findings", product) - _sync_inheritance_for_qs( - Finding.objects.filter(test__engagement__product=product), - target_names_per_child=lambda _child: inherited_tag_names, - ) - if settings.V3_FEATURE_LOCATIONS: - logger.debug("Propagating tags from %s to all locations", product) - location_qs = Location.objects.filter( - Q(products__product=product) - | Q(findings__finding__test__engagement__product=product), - ).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE) - # Locations can be linked to multiple products, so the inherited target - # is the union of every related product's tags. Compute per-location. - _sync_inheritance_for_qs( - location_qs, - target_names_per_child=_inherited_tag_names_for_location, - ) - else: - logger.debug("Propagating tags from %s to all endpoints", product) - _sync_inheritance_for_qs( - Endpoint.objects.filter(product=product), - target_names_per_child=lambda _child: inherited_tag_names, - ) - - -def apply_inherited_tags_for_endpoints(endpoints): - """ - Bulk inheritance for a list of Endpoints, e.g. those just created via - `Endpoint.objects.bulk_create` (which bypasses post_save signals). - - All endpoints are assumed to share a single Product — true for the - importer's `EndpointManager`, which is per-product. If callers ever - mix products, split the list before calling. - """ - if not endpoints: - return - product = endpoints[0].product - if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): - return - inherited_tag_names = {tag.name for tag in product.tags.all()} - _sync_inheritance_for_qs( - Endpoint.objects.filter(id__in=[e.pk for e in endpoints]), - target_names_per_child=lambda _child: inherited_tag_names, - ) - - -def apply_inherited_tags_for_findings(findings): - """ - Per-batch bulk inheritance for findings created during an import. - - Apply the owning Product's inherited tags to the given findings plus the - Endpoints (V2) / Locations (V3) reachable from them. Called from the - importer hot path right before each batch dispatches to - `post_process_findings_batch` so rules / deduplication see inherited tags - on `finding.tags`. - - Test and Engagement inheritance is handled by their own post_save handlers - (those run outside the importer's `batch_mode()`, so per-instance signal - work fires normally and applies inheritance on create). - """ - if not findings: - return - # Single-product invariant inside one importer call. Smart upload calls - # this per-product so the assumption holds there too. - product = findings[0].test.engagement.product - if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): - return - inherited_tag_names = {tag.name for tag in product.tags.all()} - finding_ids = [f.pk for f in findings] - - _sync_inheritance_for_qs( - Finding.objects.filter(id__in=finding_ids), - target_names_per_child=lambda _child: inherited_tag_names, - ) - if settings.V3_FEATURE_LOCATIONS: - _sync_inheritance_for_qs( - Location.objects.filter(findings__finding_id__in=finding_ids).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE), - target_names_per_child=_inherited_tag_names_for_location, - ) - else: - _sync_inheritance_for_qs( - Endpoint.objects.filter(status_endpoint__finding_id__in=finding_ids).distinct(), - target_names_per_child=lambda _child: inherited_tag_names, - ) - - -def _inherited_tag_names_for_location(location): - """ - Compute the tag-name set this Location should have as `inherited_tags`. - - Unlike Finding / Test / Engagement / Endpoint (each owned by exactly one - Product), a Location can be attached to multiple Products — directly via - `LocationProductReference` or indirectly via `LocationFindingReference` - -> Finding -> Test -> Engagement -> Product. The target inherited set is - therefore the UNION of every related Product's tags. - - Used as the `target_names_per_child` callback for `_sync_inheritance_for_qs` - on Location querysets; it must be called per Location because each Location - has its own set of related Products. Uses `iter_related_products()` so - that an upstream `prefetch_related(...)` reduces per-call cost to 0 - queries. - """ - names: set[str] = set() - for related_product in location.iter_related_products(): - if related_product is None: - continue - names.update(tag.name for tag in related_product.tags.all()) - return names - - -_LOCATION_PREFETCH_FOR_INHERITANCE = ( - "products__product__tags", - "findings__finding__test__engagement__product__tags", -) - - -def _sync_inheritance_for_qs(queryset, *, target_names_per_child): - """ - Sync inherited_tags + tags for every child in `queryset` to its target tag set. - - target_names_per_child: callable(child) -> set[str]. - - Issues bulk SQL: one through-table read for current inherited_tags, then - bulk add/remove on `tags` and `inherited_tags` fields. - """ - children = list(queryset) - if not children: - return - - model_class = type(children[0]) - inherited_field = model_class._meta.get_field("inherited_tags") - inherited_through = inherited_field.remote_field.through - inherited_tag_model = inherited_field.related_model - - # Resolve through-table FK column for the source side. - source_field_name = None - for field in inherited_through._meta.fields: - if hasattr(field, "remote_field") and field.remote_field and field.remote_field.model == model_class: - source_field_name = field.name - break - - child_ids = [c.pk for c in children] - # One query: pull every (child_id, tag_name) pair from the inherited_tags through table. - existing_pairs = inherited_through.objects.filter( - **{f"{source_field_name}__in": child_ids}, - ).values_list(source_field_name, f"{inherited_tag_model._meta.model_name}__name") - - old_inherited_by_child: dict[int, set[str]] = defaultdict(set) - for child_id, tag_name in existing_pairs: - old_inherited_by_child[child_id].add(tag_name) - - # Compute per-child diff and bucket by tag name. - add_map: dict[str, list] = defaultdict(list) - remove_map: dict[str, list] = defaultdict(list) - for child in children: - target = target_names_per_child(child) - old = old_inherited_by_child.get(child.pk, set()) - for name in target - old: - add_map[name].append(child) - for name in old - target: - remove_map[name].append(child) - - # Apply adds. Both `tags` and `inherited_tags` get the same set of new - # inherited names — `_manage_inherited_tags` did the same. - if add_map: - bulk_add_tag_mapping(add_map, tag_field_name="inherited_tags") - bulk_add_tag_mapping(add_map, tag_field_name="tags") - - # Apply removes. - for name, instances in remove_map.items(): - bulk_remove_tags_from_instances(name, instances, tag_field_name="inherited_tags") - bulk_remove_tags_from_instances(name, instances, tag_field_name="tags") diff --git a/dojo/tag_inheritance.py b/dojo/tag_inheritance.py index e9d0e98a5fc..2f6752fd783 100644 --- a/dojo/tag_inheritance.py +++ b/dojo/tag_inheritance.py @@ -1,24 +1,58 @@ """ Tag inheritance — central coordination module. -Provides a thread-local ``batch()`` context manager that suppresses -per-instance inheritance work driven by ``m2m_changed`` and ``post_save`` -signals. While inside a batch, the signal handlers in -``dojo/tags_signals.py`` early-return; the calling code is responsible for -applying inheritance in bulk (e.g. via the importer's existing -``_bulk_inherit_tags`` path or ``propagate_tags_on_product_sync``). - -This replaces the previous pattern of ``signals.m2m_changed.disconnect(...)`` -in importer hot loops, which was process-global and unsafe under threaded -gunicorn / Celery thread pools / ASGI threadpools (see PR description for -the full rationale). +Provides: + +- ``batch_mode()`` — thread-local context manager that suppresses + per-instance inheritance work driven by ``m2m_changed`` and ``post_save`` + signals. While inside a batch, the signal handlers in + ``dojo/tags_signals.py`` early-return; the calling code is responsible for + applying inheritance in bulk (e.g. via the importer's existing + ``_bulk_inherit_tags`` path or ``propagate_tags_on_product_sync``). + + This replaces the previous pattern of ``signals.m2m_changed.disconnect(...)`` + in importer hot loops, which was process-global and unsafe under threaded + gunicorn / Celery thread pools / ASGI threadpools (see PR description for + the full rationale). + +- The per-instance inheritance helpers previously scattered across + ``dojo/tags_signals.py``, ``dojo/models.py``, and ``dojo/product/helpers.py`` + (``_manage_inherited_tags``, ``get_products``, ``inherit_product_tags``, + ``get_products_to_inherit_tags_from``, ``propagate_inheritance``, + ``inherit_instance_tags``, ``inherit_linked_instance_tags``). + +- The bulk product-wide inheritance sync (``propagate_tags_on_product_sync``) + plus per-batch importer helpers (``apply_inherited_tags_for_findings`` / + ``apply_inherited_tags_for_endpoints``) and their shared ``_sync_inheritance_for_qs`` + primitive. + +Model imports are deferred to function bodies to keep this module loadable +before ``dojo.models`` finishes initialising. """ from __future__ import annotations import contextlib +import logging import threading +from collections import defaultdict from contextlib import contextmanager +from django.conf import settings +from django.db.models import Q +from tagulous.models.managers import FakeTagRelatedManager + +# Top-level imports of dojo internals are safe here because +# ``dojo.tag_inheritance`` is loaded lazily — never during the initial +# evaluation of ``dojo.models``. By the time anything imports this module +# (signals registration, importers, the per-model ``inherit_tags()`` shim +# in ``dojo.models``), the full model layer is initialised. +from dojo.location.models import Location +from dojo.models import Endpoint, Engagement, Finding, Product, Test +from dojo.tag_utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances +from dojo.utils import get_system_setting + +logger = logging.getLogger(__name__) + _state = threading.local() @@ -52,3 +86,315 @@ def batch_mode(): # Clean up the attribute so leak-free thread reuse stays simple. with contextlib.suppress(AttributeError): del _state.depth + + +# --------------------------------------------------------------------------- +# Per-instance inheritance helpers (relocated from dojo/models.py + +# dojo/tags_signals.py). Logic unchanged. +# --------------------------------------------------------------------------- + + +def _manage_inherited_tags(obj, incoming_inherited_tags, potentially_existing_tags=None): + # get copies of the current tag lists + if potentially_existing_tags is None: + potentially_existing_tags = [] + current_inherited_tags = [] if isinstance(obj.inherited_tags, FakeTagRelatedManager) else [tag.name for tag in obj.inherited_tags.all()] + tag_list = potentially_existing_tags if isinstance(obj.tags, FakeTagRelatedManager) or len(potentially_existing_tags) > 0 else [tag.name for tag in obj.tags.all()] + # Clean existing tag list from the old inherited tags. This represents the tags on the object and not the product + cleaned_tag_list = [tag for tag in tag_list if tag not in current_inherited_tags] + # Add the incoming inherited tag list + if incoming_inherited_tags: + for tag in incoming_inherited_tags: + if tag not in cleaned_tag_list: + cleaned_tag_list.append(tag) + # Update the current list of inherited tags. iteratively do this because of tagulous object restraints + if isinstance(obj.inherited_tags, FakeTagRelatedManager): + obj.inherited_tags.set_tag_list(incoming_inherited_tags) + if incoming_inherited_tags: + obj.tags.set_tag_list(cleaned_tag_list) + else: + obj.inherited_tags.set(incoming_inherited_tags) + if incoming_inherited_tags: + obj.tags.set(cleaned_tag_list) + + +def get_products(instance): + if isinstance(instance, Product): + return [instance] + if isinstance(instance, Endpoint): + return [instance.product] + if isinstance(instance, Engagement): + return [instance.product] + if isinstance(instance, Test): + return [instance.engagement.product] + if isinstance(instance, Finding): + return [instance.test.engagement.product] + if isinstance(instance, Location): + return list(instance.all_related_products()) + return [] + + +def inherit_product_tags(instance) -> bool: + products = get_products(instance) + # Save a read in the db + if any(product.enable_product_tag_inheritance for product in products if product): + return True + + return get_system_setting("enable_product_tag_inheritance") + + +def get_products_to_inherit_tags_from(instance): + products = get_products(instance) + system_wide_inherit = get_system_setting("enable_product_tag_inheritance") + + return [ + product for product in products if product.enable_product_tag_inheritance or system_wide_inherit + ] + + +def propagate_inheritance(instance, tag_list=None): + # Get the expected product tags + if tag_list is None: + tag_list = [] + product_inherited_tags = [ + tag.name + for product in get_products_to_inherit_tags_from(instance) + for tag in product.tags.all() + ] + existing_inherited_tags = [tag.name for tag in instance.inherited_tags.all()] + # Check if product tags already matches inherited tags + product_tags_equals_inherited_tags = product_inherited_tags == existing_inherited_tags + # Check if product tags have already been inherited + tags_have_already_been_inherited = set(product_inherited_tags) <= set(tag_list) + return not (product_tags_equals_inherited_tags and tags_have_already_been_inherited) + + +def inherit_instance_tags(instance): + """Usually nothing to do when saving a model, except for new models?""" + # Suppress per-instance inheritance work inside an active batch. The + # caller (signal handler or bulk_create cleanup) need not know about + # batch_mode; whoever opened the batch is responsible for the bulk + # apply at exit. + if is_in_batch_mode(): + return + if inherit_product_tags(instance): + # TODO: Is this change OK to make? + # tag_list = instance._tags_tagulous.get_tag_list() + tag_list = instance.tags.get_tag_list() + if propagate_inheritance(instance, tag_list=tag_list): + instance.inherit_tags(tag_list) + + +def inherit_linked_instance_tags(instance): + inherit_instance_tags(instance.location) + + +# --------------------------------------------------------------------------- +# Bulk product-wide inheritance (relocated from dojo/product/helpers.py). +# Logic unchanged. +# --------------------------------------------------------------------------- + + +def propagate_tags_on_product_sync(product): + """ + Bulk-apply Product tag changes to all children using through-table SQL. + + Replaces the previous per-row `.save()` loop. For every child model owned + by the product (Engagement, Test, Finding, plus Endpoint or Location + depending on the V3_FEATURE_LOCATIONS flag), reads the existing + `inherited_tags` per child in one query, computes the diff against the + Product's current tags, and applies adds/removes via the bulk tag + helpers. Both `tags` and `inherited_tags` fields are kept in sync. + """ + # Skip the full child sweep when inheritance is disabled both system-wide + # and on this product. Without this gate the importer hot path pays ~9 + # queries per scan (one product-tags read + one list/through-table read per + # child kind) even when no inheritance work is possible. State transitions + # (toggling the flag on/off) still trigger a full sweep via the m2m_changed + # handler on `Product.tags.through` and the per-product flag save handler. + if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): + return + inherited_tag_names = {tag.name for tag in product.tags.all()} + + logger.debug("Propagating tags from %s to all engagements", product) + _sync_inheritance_for_qs( + Engagement.objects.filter(product=product), + target_names_per_child=lambda _child: inherited_tag_names, + ) + logger.debug("Propagating tags from %s to all tests", product) + _sync_inheritance_for_qs( + Test.objects.filter(engagement__product=product), + target_names_per_child=lambda _child: inherited_tag_names, + ) + logger.debug("Propagating tags from %s to all findings", product) + _sync_inheritance_for_qs( + Finding.objects.filter(test__engagement__product=product), + target_names_per_child=lambda _child: inherited_tag_names, + ) + if settings.V3_FEATURE_LOCATIONS: + logger.debug("Propagating tags from %s to all locations", product) + location_qs = Location.objects.filter( + Q(products__product=product) + | Q(findings__finding__test__engagement__product=product), + ).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE) + # Locations can be linked to multiple products, so the inherited target + # is the union of every related product's tags. Compute per-location. + _sync_inheritance_for_qs( + location_qs, + target_names_per_child=_inherited_tag_names_for_location, + ) + else: + logger.debug("Propagating tags from %s to all endpoints", product) + _sync_inheritance_for_qs( + Endpoint.objects.filter(product=product), + target_names_per_child=lambda _child: inherited_tag_names, + ) + + +def apply_inherited_tags_for_endpoints(endpoints): + """ + Bulk inheritance for a list of Endpoints, e.g. those just created via + `Endpoint.objects.bulk_create` (which bypasses post_save signals). + + All endpoints are assumed to share a single Product — true for the + importer's `EndpointManager`, which is per-product. If callers ever + mix products, split the list before calling. + """ + if not endpoints: + return + product = endpoints[0].product + if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): + return + inherited_tag_names = {tag.name for tag in product.tags.all()} + _sync_inheritance_for_qs( + Endpoint.objects.filter(id__in=[e.pk for e in endpoints]), + target_names_per_child=lambda _child: inherited_tag_names, + ) + + +def apply_inherited_tags_for_findings(findings): + """ + Per-batch bulk inheritance for findings created during an import. + + Apply the owning Product's inherited tags to the given findings plus the + Endpoints (V2) / Locations (V3) reachable from them. Called from the + importer hot path right before each batch dispatches to + `post_process_findings_batch` so rules / deduplication see inherited tags + on `finding.tags`. + + Test and Engagement inheritance is handled by their own post_save handlers + (those run outside the importer's `batch_mode()`, so per-instance signal + work fires normally and applies inheritance on create). + """ + if not findings: + return + # Single-product invariant inside one importer call. Smart upload calls + # this per-product so the assumption holds there too. + product = findings[0].test.engagement.product + if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): + return + inherited_tag_names = {tag.name for tag in product.tags.all()} + finding_ids = [f.pk for f in findings] + + _sync_inheritance_for_qs( + Finding.objects.filter(id__in=finding_ids), + target_names_per_child=lambda _child: inherited_tag_names, + ) + if settings.V3_FEATURE_LOCATIONS: + _sync_inheritance_for_qs( + Location.objects.filter(findings__finding_id__in=finding_ids).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE), + target_names_per_child=_inherited_tag_names_for_location, + ) + else: + _sync_inheritance_for_qs( + Endpoint.objects.filter(status_endpoint__finding_id__in=finding_ids).distinct(), + target_names_per_child=lambda _child: inherited_tag_names, + ) + + +def _inherited_tag_names_for_location(location): + """ + Compute the tag-name set this Location should have as `inherited_tags`. + + Unlike Finding / Test / Engagement / Endpoint (each owned by exactly one + Product), a Location can be attached to multiple Products — directly via + `LocationProductReference` or indirectly via `LocationFindingReference` + -> Finding -> Test -> Engagement -> Product. The target inherited set is + therefore the UNION of every related Product's tags. + + Used as the `target_names_per_child` callback for `_sync_inheritance_for_qs` + on Location querysets; it must be called per Location because each Location + has its own set of related Products. Uses `iter_related_products()` so + that an upstream `prefetch_related(...)` reduces per-call cost to 0 + queries. + """ + names: set[str] = set() + for related_product in location.iter_related_products(): + if related_product is None: + continue + names.update(tag.name for tag in related_product.tags.all()) + return names + + +_LOCATION_PREFETCH_FOR_INHERITANCE = ( + "products__product__tags", + "findings__finding__test__engagement__product__tags", +) + + +def _sync_inheritance_for_qs(queryset, *, target_names_per_child): + """ + Sync inherited_tags + tags for every child in `queryset` to its target tag set. + + target_names_per_child: callable(child) -> set[str]. + + Issues bulk SQL: one through-table read for current inherited_tags, then + bulk add/remove on `tags` and `inherited_tags` fields. + """ + children = list(queryset) + if not children: + return + + model_class = type(children[0]) + inherited_field = model_class._meta.get_field("inherited_tags") + inherited_through = inherited_field.remote_field.through + inherited_tag_model = inherited_field.related_model + + # Resolve through-table FK column for the source side. + source_field_name = None + for field in inherited_through._meta.fields: + if hasattr(field, "remote_field") and field.remote_field and field.remote_field.model == model_class: + source_field_name = field.name + break + + child_ids = [c.pk for c in children] + # One query: pull every (child_id, tag_name) pair from the inherited_tags through table. + existing_pairs = inherited_through.objects.filter( + **{f"{source_field_name}__in": child_ids}, + ).values_list(source_field_name, f"{inherited_tag_model._meta.model_name}__name") + + old_inherited_by_child: dict[int, set[str]] = defaultdict(set) + for child_id, tag_name in existing_pairs: + old_inherited_by_child[child_id].add(tag_name) + + # Compute per-child diff and bucket by tag name. + add_map: dict[str, list] = defaultdict(list) + remove_map: dict[str, list] = defaultdict(list) + for child in children: + target = target_names_per_child(child) + old = old_inherited_by_child.get(child.pk, set()) + for name in target - old: + add_map[name].append(child) + for name in old - target: + remove_map[name].append(child) + + # Apply adds. Both `tags` and `inherited_tags` get the same set of new + # inherited names — `_manage_inherited_tags` did the same. + if add_map: + bulk_add_tag_mapping(add_map, tag_field_name="inherited_tags") + bulk_add_tag_mapping(add_map, tag_field_name="tags") + + # Apply removes. + for name, instances in remove_map.items(): + bulk_remove_tags_from_instances(name, instances, tag_field_name="inherited_tags") + bulk_remove_tags_from_instances(name, instances, tag_field_name="tags") diff --git a/dojo/tags_signals.py b/dojo/tags_signals.py index b0383bfa1f3..48d9ebc7bc4 100644 --- a/dojo/tags_signals.py +++ b/dojo/tags_signals.py @@ -9,7 +9,14 @@ from dojo.location.models import Location, LocationFindingReference, LocationProductReference from dojo.models import Endpoint, Engagement, Finding, Product, Test from dojo.product import helpers as async_product_funcs -from dojo.utils import get_system_setting +from dojo.tag_inheritance import ( + get_products, # noqa: F401 -- backward compat re-export + get_products_to_inherit_tags_from, # noqa: F401 -- backward compat re-export + inherit_instance_tags, # noqa: F401 -- backward compat re-export + inherit_linked_instance_tags, # noqa: F401 -- backward compat re-export + inherit_product_tags, + propagate_inheritance, +) logger = logging.getLogger(__name__) @@ -46,26 +53,6 @@ def make_inherited_tags_sticky(sender, instance, action, **kwargs): instance.inherit_tags(tag_list) -def inherit_instance_tags(instance): - """Usually nothing to do when saving a model, except for new models?""" - # Suppress per-instance inheritance work inside an active batch. The - # caller (signal handler or bulk_create cleanup) need not know about - # batch_mode; whoever opened the batch is responsible for the bulk - # apply at exit. - if tag_inheritance.is_in_batch_mode(): - return - if inherit_product_tags(instance): - # TODO: Is this change OK to make? - # tag_list = instance._tags_tagulous.get_tag_list() - tag_list = instance.tags.get_tag_list() - if propagate_inheritance(instance, tag_list=tag_list): - instance.inherit_tags(tag_list) - - -def inherit_linked_instance_tags(instance: LocationFindingReference | LocationProductReference): - inherit_instance_tags(instance.location) - - @receiver(signals.post_save, sender=Endpoint) @receiver(signals.post_save, sender=Engagement) @receiver(signals.post_save, sender=Test) @@ -79,61 +66,10 @@ def inherit_tags_on_instance(sender, instance, created, **kwargs): # `inherit_instance_tags` itself early-returns when a batch is active. if not created: return - inherit_instance_tags(instance) + tag_inheritance.inherit_instance_tags(instance) @receiver(signals.post_save, sender=LocationFindingReference) @receiver(signals.post_save, sender=LocationProductReference) def inherit_tags_on_linked_instance(sender, instance, created, **kwargs): - inherit_linked_instance_tags(instance) - - -def propagate_inheritance(instance, tag_list=None): - # Get the expected product tags - if tag_list is None: - tag_list = [] - product_inherited_tags = [ - tag.name - for product in get_products_to_inherit_tags_from(instance) - for tag in product.tags.all() - ] - existing_inherited_tags = [tag.name for tag in instance.inherited_tags.all()] - # Check if product tags already matches inherited tags - product_tags_equals_inherited_tags = product_inherited_tags == existing_inherited_tags - # Check if product tags have already been inherited - tags_have_already_been_inherited = set(product_inherited_tags) <= set(tag_list) - return not (product_tags_equals_inherited_tags and tags_have_already_been_inherited) - - -def inherit_product_tags(instance) -> bool: - products = get_products(instance) - # Save a read in the db - if any(product.enable_product_tag_inheritance for product in products if product): - return True - - return get_system_setting("enable_product_tag_inheritance") - - -def get_products_to_inherit_tags_from(instance) -> list[Product]: - products = get_products(instance) - system_wide_inherit = get_system_setting("enable_product_tag_inheritance") - - return [ - product for product in products if product.enable_product_tag_inheritance or system_wide_inherit - ] - - -def get_products(instance) -> list[Product]: - if isinstance(instance, Product): - return [instance] - if isinstance(instance, Endpoint): - return [instance.product] - if isinstance(instance, Engagement): - return [instance.product] - if isinstance(instance, Test): - return [instance.engagement.product] - if isinstance(instance, Finding): - return [instance.test.engagement.product] - if isinstance(instance, Location): - return list(instance.all_related_products()) - return [] + tag_inheritance.inherit_linked_instance_tags(instance) diff --git a/unittests/test_tag_inheritance.py b/unittests/test_tag_inheritance.py index 1d3bb7a8b18..7b9bff83991 100644 --- a/unittests/test_tag_inheritance.py +++ b/unittests/test_tag_inheritance.py @@ -118,32 +118,32 @@ def _make_product(self, *, per_product_flag): p.enable_product_tag_inheritance = per_product_flag return p - @patch("dojo.tags_signals.get_system_setting", return_value=True) - @patch("dojo.tags_signals.get_products") + @patch("dojo.tag_inheritance.get_system_setting", return_value=True) + @patch("dojo.tag_inheritance.get_products") def test_system_setting_on_returns_true(self, mock_get_products, mock_setting): mock_get_products.return_value = [self._make_product(per_product_flag=False)] self.assertTrue(inherit_product_tags(MagicMock())) - @patch("dojo.tags_signals.get_system_setting", return_value=False) - @patch("dojo.tags_signals.get_products") + @patch("dojo.tag_inheritance.get_system_setting", return_value=False) + @patch("dojo.tag_inheritance.get_products") def test_per_product_flag_on_system_off_returns_true(self, mock_get_products, mock_setting): mock_get_products.return_value = [self._make_product(per_product_flag=True)] self.assertTrue(inherit_product_tags(MagicMock())) - @patch("dojo.tags_signals.get_system_setting", return_value=False) - @patch("dojo.tags_signals.get_products") + @patch("dojo.tag_inheritance.get_system_setting", return_value=False) + @patch("dojo.tag_inheritance.get_products") def test_both_off_returns_false(self, mock_get_products, mock_setting): mock_get_products.return_value = [self._make_product(per_product_flag=False)] self.assertFalse(inherit_product_tags(MagicMock())) - @patch("dojo.tags_signals.get_system_setting", return_value=False) - @patch("dojo.tags_signals.get_products") + @patch("dojo.tag_inheritance.get_system_setting", return_value=False) + @patch("dojo.tag_inheritance.get_products") def test_no_products_returns_false(self, mock_get_products, mock_setting): mock_get_products.return_value = [] self.assertFalse(inherit_product_tags(MagicMock())) - @patch("dojo.tags_signals.get_system_setting", return_value=False) - @patch("dojo.tags_signals.get_products") + @patch("dojo.tag_inheritance.get_system_setting", return_value=False) + @patch("dojo.tag_inheritance.get_products") def test_none_entries_in_product_list_are_skipped(self, mock_get_products, mock_setting): mock_get_products.return_value = [None, self._make_product(per_product_flag=False)] self.assertFalse(inherit_product_tags(MagicMock())) @@ -177,28 +177,28 @@ def _make_product(self, tag_names): product.tags.all.return_value = [self._tag(n) for n in tag_names] return product - @patch("dojo.tags_signals.get_products_to_inherit_tags_from") + @patch("dojo.tag_inheritance.get_products_to_inherit_tags_from") def test_already_in_sync_returns_false(self, mock_get): """inherited_tags matches product tags and all present in tag_list → skip.""" instance = self._make_instance(["alpha", "beta"]) mock_get.return_value = [self._make_product(["alpha", "beta"])] self.assertFalse(propagate_inheritance(instance, tag_list=["alpha", "beta"])) - @patch("dojo.tags_signals.get_products_to_inherit_tags_from") + @patch("dojo.tag_inheritance.get_products_to_inherit_tags_from") def test_product_tags_changed_returns_true(self, mock_get): """Stored inherited_tags differ from current product tags → must propagate.""" instance = self._make_instance(["old"]) mock_get.return_value = [self._make_product(["new"])] self.assertTrue(propagate_inheritance(instance, tag_list=["old", "new"])) - @patch("dojo.tags_signals.get_products_to_inherit_tags_from") + @patch("dojo.tag_inheritance.get_products_to_inherit_tags_from") def test_tags_not_yet_applied_to_instance_returns_true(self, mock_get): """inherited_tags already correct but not yet reflected in tag_list → must propagate.""" instance = self._make_instance(["alpha"]) mock_get.return_value = [self._make_product(["alpha"])] self.assertTrue(propagate_inheritance(instance, tag_list=[])) - @patch("dojo.tags_signals.get_products_to_inherit_tags_from") + @patch("dojo.tag_inheritance.get_products_to_inherit_tags_from") def test_no_products_no_inherited_tags_returns_false(self, mock_get): """No products, no inherited tags, empty tag_list → already in sync, skip.""" instance = self._make_instance([]) diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index 4789d78bf25..f1aec1317a9 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -498,7 +498,7 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # import path because the previous process-global signal-disconnect was # narrower in scope (Location.tags.through only). Net-positive trade for # eliminating the threading bug; full Phase B reductions land in Stage 2. - EXPECTED_ZAP_IMPORT_V2 = 477 - EXPECTED_ZAP_IMPORT_V3 = 945 + EXPECTED_ZAP_IMPORT_V2 = 470 + EXPECTED_ZAP_IMPORT_V3 = 938 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 75 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 102 From 34a32fac5e4d9c9884e69f800a8d6b17d2fe8e08 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 10:37:37 +0200 Subject: [PATCH 03/19] refactor(tags): group tag modules under dojo/tags/ Move the three tag-related top-level modules into a dedicated package: dojo/tag_inheritance.py -> dojo/tags/inheritance.py dojo/tag_utils.py -> dojo/tags/utils.py dojo/tags_signals.py -> dojo/tags/signals.py Update all internal call sites to the new canonical paths (apps, models, importers, finding, product/helpers, utils_cascade_delete, settings, unittests). No backward-compat shims left at the old paths. Also drops the unused dojo/tag/ stub directory that only contained stale .pyc files. No logic change. All tag inheritance and tag bulk unit tests pass. --- dojo/apps.py | 2 +- dojo/finding/helper.py | 2 +- dojo/finding/views.py | 2 +- dojo/importers/base_importer.py | 2 +- dojo/importers/default_importer.py | 4 +-- dojo/importers/default_reimporter.py | 4 +-- dojo/importers/location_manager.py | 2 +- dojo/models.py | 6 ++-- dojo/product/helpers.py | 2 +- dojo/settings/settings.dist.py | 2 +- dojo/tags/__init__.py | 0 .../inheritance.py} | 10 +++--- dojo/{tags_signals.py => tags/signals.py} | 4 +-- dojo/{tag_utils.py => tags/utils.py} | 0 dojo/utils_cascade_delete.py | 2 +- unittests/test_tag_inheritance.py | 34 +++++++++---------- unittests/test_tag_utils_bulk.py | 2 +- 17 files changed, 40 insertions(+), 40 deletions(-) create mode 100644 dojo/tags/__init__.py rename dojo/{tag_inheritance.py => tags/inheritance.py} (98%) rename dojo/{tags_signals.py => tags/signals.py} (97%) rename dojo/{tag_utils.py => tags/utils.py} (100%) diff --git a/dojo/apps.py b/dojo/apps.py index 4b1af1ef192..d23a3c6f853 100644 --- a/dojo/apps.py +++ b/dojo/apps.py @@ -94,7 +94,7 @@ def ready(self): import dojo.product_type.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady import dojo.risk_acceptance.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady import dojo.sla_config.helpers # noqa: PLC0415, F401 raised: AppRegistryNotReady - import dojo.tags_signals # noqa: PLC0415, F401 raised: AppRegistryNotReady + import dojo.tags.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady import dojo.test.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady import dojo.tool_product.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady import dojo.url.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 0a7eb0bcdfe..79b54c20ab7 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -742,7 +742,7 @@ def bulk_clear_finding_m2m(finding_qs): FileUpload.delete() fires and removes files from disk storage. Tags are handled via bulk_remove_all_tags to maintain tag counts. """ - from dojo.tag_utils import bulk_remove_all_tags # noqa: PLC0415 circular import + from dojo.tags.utils import bulk_remove_all_tags # noqa: PLC0415 circular import finding_ids = finding_qs.values_list("id", flat=True) diff --git a/dojo/finding/views.py b/dojo/finding/views.py index 0d32235bf1f..d3cc025be26 100644 --- a/dojo/finding/views.py +++ b/dojo/finding/views.py @@ -94,7 +94,7 @@ User, ) from dojo.notifications.helper import create_notification -from dojo.tag_utils import bulk_add_tags_to_instances +from dojo.tags.utils import bulk_add_tags_to_instances from dojo.test.queries import get_authorized_tests from dojo.tools import tool_issue_updater from dojo.utils import ( diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py index 052b006356c..d87524185fe 100644 --- a/dojo/importers/base_importer.py +++ b/dojo/importers/base_importer.py @@ -33,7 +33,7 @@ Test_Type, ) from dojo.notifications.helper import create_notification -from dojo.tag_utils import bulk_add_tags_to_instances +from dojo.tags.utils import bulk_add_tags_to_instances from dojo.tools.factory import get_parser from dojo.tools.parser_test import ParserTest from dojo.utils import max_safe diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index b4e047225a0..a480bbcf879 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -5,7 +5,6 @@ from django.db.models.query_utils import Q from django.urls import reverse -from dojo import tag_inheritance from dojo.celery_dispatch import dojo_dispatch_task from dojo.finding import helper as finding_helper from dojo.importers.base_importer import BaseImporter, Parser @@ -20,7 +19,8 @@ ) from dojo.notifications.helper import async_create_notification from dojo.product.helpers import apply_inherited_tags_for_findings -from dojo.tag_utils import bulk_apply_parser_tags +from dojo.tags import inheritance as tag_inheritance +from dojo.tags.utils import bulk_apply_parser_tags from dojo.utils import get_full_url, perform_product_grading from dojo.validators import clean_tags diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 62f6da12c6e..83635fd6ae9 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -6,7 +6,6 @@ from django.db.models.query_utils import Q import dojo.finding.helper as finding_helper -from dojo import tag_inheritance from dojo.celery_dispatch import dojo_dispatch_task from dojo.finding.deduplication import ( find_candidates_for_deduplication_hash, @@ -26,7 +25,8 @@ Test_Import, ) from dojo.product.helpers import apply_inherited_tags_for_findings -from dojo.tag_utils import bulk_apply_parser_tags +from dojo.tags import inheritance as tag_inheritance +from dojo.tags.utils import bulk_apply_parser_tags from dojo.utils import perform_product_grading from dojo.validators import clean_tags diff --git a/dojo/importers/location_manager.py b/dojo/importers/location_manager.py index dc0171db731..5c86ef3f604 100644 --- a/dojo/importers/location_manager.py +++ b/dojo/importers/location_manager.py @@ -9,11 +9,11 @@ from django.db import transaction from django.utils import timezone -from dojo import tag_inheritance from dojo.importers.base_location_manager import BaseLocationManager from dojo.location.models import AbstractLocation, Location, LocationFindingReference, LocationProductReference from dojo.location.status import FindingLocationStatus, ProductLocationStatus from dojo.models import Product, _manage_inherited_tags +from dojo.tags import inheritance as tag_inheritance from dojo.tools.locations import LocationData from dojo.url.models import URL from dojo.utils import get_system_setting diff --git a/dojo/models.py b/dojo/models.py index 540a44bc27e..4dfd1015b5d 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -111,10 +111,10 @@ def _get_statistics_for_queryset(qs, annotation_factory): def _manage_inherited_tags(obj, incoming_inherited_tags, potentially_existing_tags=None): - # Backward-compat shim. Implementation lives in dojo.tag_inheritance; lazy - # import keeps dojo.models loadable before dojo.tag_inheritance (which + # Backward-compat shim. Implementation lives in dojo.tags.inheritance; lazy + # import keeps dojo.models loadable before dojo.tags.inheritance (which # transitively imports dojo.utils -> dojo.models) is ready. - from dojo.tag_inheritance import _manage_inherited_tags as _impl # noqa: PLC0415 + from dojo.tags.inheritance import _manage_inherited_tags as _impl # noqa: PLC0415 return _impl(obj, incoming_inherited_tags, potentially_existing_tags=potentially_existing_tags) diff --git a/dojo/product/helpers.py b/dojo/product/helpers.py index 47a9e71bd34..99c4eed0718 100644 --- a/dojo/product/helpers.py +++ b/dojo/product/helpers.py @@ -3,7 +3,7 @@ from dojo.celery import app from dojo.models import Product -from dojo.tag_inheritance import ( +from dojo.tags.inheritance import ( _LOCATION_PREFETCH_FOR_INHERITANCE, # noqa: F401 -- backward compat re-export _inherited_tag_names_for_location, # noqa: F401 -- backward compat re-export _sync_inheritance_for_qs, # noqa: F401 -- backward compat re-export diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 522ad54e7c0..70c58bddbee 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -329,7 +329,7 @@ def generate_url(scheme, double_slashes, user, password, host, port, path, param _populate_notifications_settings(env, globals()) TAG_PREFETCHING = env("DD_TAG_PREFETCHING") -# Tag bulk add batch size (used by dojo.tag_utils.bulk_add_tag_to_instances) +# Tag bulk add batch size (used by dojo.tags.utils.bulk_add_tag_to_instances) TAG_BULK_ADD_BATCH_SIZE = env("DD_TAG_BULK_ADD_BATCH_SIZE") diff --git a/dojo/tags/__init__.py b/dojo/tags/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dojo/tag_inheritance.py b/dojo/tags/inheritance.py similarity index 98% rename from dojo/tag_inheritance.py rename to dojo/tags/inheritance.py index 2f6752fd783..3a09f75639d 100644 --- a/dojo/tag_inheritance.py +++ b/dojo/tags/inheritance.py @@ -6,7 +6,7 @@ - ``batch_mode()`` — thread-local context manager that suppresses per-instance inheritance work driven by ``m2m_changed`` and ``post_save`` signals. While inside a batch, the signal handlers in - ``dojo/tags_signals.py`` early-return; the calling code is responsible for + ``dojo/tags/signals.py`` early-return; the calling code is responsible for applying inheritance in bulk (e.g. via the importer's existing ``_bulk_inherit_tags`` path or ``propagate_tags_on_product_sync``). @@ -16,7 +16,7 @@ the full rationale). - The per-instance inheritance helpers previously scattered across - ``dojo/tags_signals.py``, ``dojo/models.py``, and ``dojo/product/helpers.py`` + ``dojo/tags/signals.py``, ``dojo/models.py``, and ``dojo/product/helpers.py`` (``_manage_inherited_tags``, ``get_products``, ``inherit_product_tags``, ``get_products_to_inherit_tags_from``, ``propagate_inheritance``, ``inherit_instance_tags``, ``inherit_linked_instance_tags``). @@ -42,13 +42,13 @@ from tagulous.models.managers import FakeTagRelatedManager # Top-level imports of dojo internals are safe here because -# ``dojo.tag_inheritance`` is loaded lazily — never during the initial +# ``dojo.tags.inheritance`` is loaded lazily — never during the initial # evaluation of ``dojo.models``. By the time anything imports this module # (signals registration, importers, the per-model ``inherit_tags()`` shim # in ``dojo.models``), the full model layer is initialised. from dojo.location.models import Location from dojo.models import Endpoint, Engagement, Finding, Product, Test -from dojo.tag_utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances +from dojo.tags.utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances from dojo.utils import get_system_setting logger = logging.getLogger(__name__) @@ -90,7 +90,7 @@ def batch_mode(): # --------------------------------------------------------------------------- # Per-instance inheritance helpers (relocated from dojo/models.py + -# dojo/tags_signals.py). Logic unchanged. +# dojo/tags/signals.py). Logic unchanged. # --------------------------------------------------------------------------- diff --git a/dojo/tags_signals.py b/dojo/tags/signals.py similarity index 97% rename from dojo/tags_signals.py rename to dojo/tags/signals.py index 48d9ebc7bc4..7830fd81875 100644 --- a/dojo/tags_signals.py +++ b/dojo/tags/signals.py @@ -4,12 +4,12 @@ from django.db.models import signals from django.dispatch import receiver -from dojo import tag_inheritance from dojo.celery_dispatch import dojo_dispatch_task from dojo.location.models import Location, LocationFindingReference, LocationProductReference from dojo.models import Endpoint, Engagement, Finding, Product, Test from dojo.product import helpers as async_product_funcs -from dojo.tag_inheritance import ( +from dojo.tags import inheritance as tag_inheritance +from dojo.tags.inheritance import ( get_products, # noqa: F401 -- backward compat re-export get_products_to_inherit_tags_from, # noqa: F401 -- backward compat re-export inherit_instance_tags, # noqa: F401 -- backward compat re-export diff --git a/dojo/tag_utils.py b/dojo/tags/utils.py similarity index 100% rename from dojo/tag_utils.py rename to dojo/tags/utils.py diff --git a/dojo/utils_cascade_delete.py b/dojo/utils_cascade_delete.py index 992e072411c..1fb54113aa7 100644 --- a/dojo/utils_cascade_delete.py +++ b/dojo/utils_cascade_delete.py @@ -190,7 +190,7 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations # Clear M2M through tables before deleting (not discovered by _meta.related_objects). # Skip if the caller already handled M2M cleanup for this model (e.g. bulk_clear_finding_m2m). if from_model not in skip_m2m_for: - from dojo.tag_utils import bulk_remove_all_tags # noqa: PLC0415 circular import + from dojo.tags.utils import bulk_remove_all_tags # noqa: PLC0415 circular import bulk_remove_all_tags(from_model, instance_pk_query) diff --git a/unittests/test_tag_inheritance.py b/unittests/test_tag_inheritance.py index 7b9bff83991..7666348f25b 100644 --- a/unittests/test_tag_inheritance.py +++ b/unittests/test_tag_inheritance.py @@ -27,7 +27,7 @@ from dojo.location.status import ProductLocationStatus from dojo.models import Endpoint, Engagement, Finding, Product, Product_Type, Test, Test_Type from dojo.product.helpers import propagate_tags_on_product_sync -from dojo.tags_signals import get_products, inherit_product_tags, propagate_inheritance +from dojo.tags.signals import get_products, inherit_product_tags, propagate_inheritance from dojo.tools.locations import LocationData from unittests.dojo_test_case import ( DojoAPITestCase, @@ -118,32 +118,32 @@ def _make_product(self, *, per_product_flag): p.enable_product_tag_inheritance = per_product_flag return p - @patch("dojo.tag_inheritance.get_system_setting", return_value=True) - @patch("dojo.tag_inheritance.get_products") + @patch("dojo.tags.inheritance.get_system_setting", return_value=True) + @patch("dojo.tags.inheritance.get_products") def test_system_setting_on_returns_true(self, mock_get_products, mock_setting): mock_get_products.return_value = [self._make_product(per_product_flag=False)] self.assertTrue(inherit_product_tags(MagicMock())) - @patch("dojo.tag_inheritance.get_system_setting", return_value=False) - @patch("dojo.tag_inheritance.get_products") + @patch("dojo.tags.inheritance.get_system_setting", return_value=False) + @patch("dojo.tags.inheritance.get_products") def test_per_product_flag_on_system_off_returns_true(self, mock_get_products, mock_setting): mock_get_products.return_value = [self._make_product(per_product_flag=True)] self.assertTrue(inherit_product_tags(MagicMock())) - @patch("dojo.tag_inheritance.get_system_setting", return_value=False) - @patch("dojo.tag_inheritance.get_products") + @patch("dojo.tags.inheritance.get_system_setting", return_value=False) + @patch("dojo.tags.inheritance.get_products") def test_both_off_returns_false(self, mock_get_products, mock_setting): mock_get_products.return_value = [self._make_product(per_product_flag=False)] self.assertFalse(inherit_product_tags(MagicMock())) - @patch("dojo.tag_inheritance.get_system_setting", return_value=False) - @patch("dojo.tag_inheritance.get_products") + @patch("dojo.tags.inheritance.get_system_setting", return_value=False) + @patch("dojo.tags.inheritance.get_products") def test_no_products_returns_false(self, mock_get_products, mock_setting): mock_get_products.return_value = [] self.assertFalse(inherit_product_tags(MagicMock())) - @patch("dojo.tag_inheritance.get_system_setting", return_value=False) - @patch("dojo.tag_inheritance.get_products") + @patch("dojo.tags.inheritance.get_system_setting", return_value=False) + @patch("dojo.tags.inheritance.get_products") def test_none_entries_in_product_list_are_skipped(self, mock_get_products, mock_setting): mock_get_products.return_value = [None, self._make_product(per_product_flag=False)] self.assertFalse(inherit_product_tags(MagicMock())) @@ -177,28 +177,28 @@ def _make_product(self, tag_names): product.tags.all.return_value = [self._tag(n) for n in tag_names] return product - @patch("dojo.tag_inheritance.get_products_to_inherit_tags_from") + @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") def test_already_in_sync_returns_false(self, mock_get): """inherited_tags matches product tags and all present in tag_list → skip.""" instance = self._make_instance(["alpha", "beta"]) mock_get.return_value = [self._make_product(["alpha", "beta"])] self.assertFalse(propagate_inheritance(instance, tag_list=["alpha", "beta"])) - @patch("dojo.tag_inheritance.get_products_to_inherit_tags_from") + @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") def test_product_tags_changed_returns_true(self, mock_get): """Stored inherited_tags differ from current product tags → must propagate.""" instance = self._make_instance(["old"]) mock_get.return_value = [self._make_product(["new"])] self.assertTrue(propagate_inheritance(instance, tag_list=["old", "new"])) - @patch("dojo.tag_inheritance.get_products_to_inherit_tags_from") + @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") def test_tags_not_yet_applied_to_instance_returns_true(self, mock_get): """inherited_tags already correct but not yet reflected in tag_list → must propagate.""" instance = self._make_instance(["alpha"]) mock_get.return_value = [self._make_product(["alpha"])] self.assertTrue(propagate_inheritance(instance, tag_list=[])) - @patch("dojo.tag_inheritance.get_products_to_inherit_tags_from") + @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") def test_no_products_no_inherited_tags_returns_false(self, mock_get): """No products, no inherited tags, empty tag_list → already in sync, skip.""" instance = self._make_instance([]) @@ -427,7 +427,7 @@ def setUp(self): self.system_settings(enable_product_tag_inheritance=True) def test_location_inherits_from_multiple_products(self): - from dojo.tags_signals import inherit_instance_tags # noqa: PLC0415 + from dojo.tags.signals import inherit_instance_tags # noqa: PLC0415 p1 = self.create_product("Product A", tags=["p1-tag"]) p2 = self.create_product("Product B", tags=["p2-tag"]) @@ -449,7 +449,7 @@ def test_location_inherits_from_multiple_products(self): self.assertIn("p2-tag", tag_names) def test_location_inherits_only_from_flagged_product_when_system_off(self): - from dojo.tags_signals import inherit_instance_tags # noqa: PLC0415 + from dojo.tags.signals import inherit_instance_tags # noqa: PLC0415 self.system_settings(enable_product_tag_inheritance=False) diff --git a/unittests/test_tag_utils_bulk.py b/unittests/test_tag_utils_bulk.py index 3a815041fb4..5e22515dd50 100644 --- a/unittests/test_tag_utils_bulk.py +++ b/unittests/test_tag_utils_bulk.py @@ -5,7 +5,7 @@ from dojo.location.models import Location from dojo.models import Endpoint, Engagement, Finding, Product, Product_Type, Test, Test_Type -from dojo.tag_utils import bulk_add_tag_mapping, bulk_add_tags_to_instances, bulk_apply_parser_tags +from dojo.tags.utils import bulk_add_tag_mapping, bulk_add_tags_to_instances, bulk_apply_parser_tags from dojo.url.models import URL from unittests.dojo_test_case import DojoAPITestCase, versioned_fixtures From 46667c6d056edb16266d25c70d72f36351cef499 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 10:54:10 +0200 Subject: [PATCH 04/19] rename batch_mode to suppressed --- dojo/importers/default_importer.py | 2 +- dojo/importers/location_manager.py | 2 +- dojo/tags/inheritance.py | 6 +++--- dojo/tags/signals.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index a480bbcf879..9c05d85083a 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -169,7 +169,7 @@ def process_findings( # Inheritance is then applied in bulk per-batch (right before # `post_process_findings_batch` dispatch) so rules/dedup see inherited # tags on `finding.tags`. - with tag_inheritance.batch_mode(): + with tag_inheritance.suppress(): return self._process_findings_internal(parsed_findings, **kwargs) def _process_findings_internal( diff --git a/dojo/importers/location_manager.py b/dojo/importers/location_manager.py index 5c86ef3f604..2594aeb7184 100644 --- a/dojo/importers/location_manager.py +++ b/dojo/importers/location_manager.py @@ -560,7 +560,7 @@ def _get_tags(tags_field: TagField) -> dict[int, set[str]]: # under threaded gunicorn / Celery thread pools / ASGI threadpools: # while disconnected, every thread in the process lost sticky # enforcement. Thread-local batch state avoids that hazard. - with tag_inheritance.batch_mode(): + with tag_inheritance.suppress(): for location in locations: target_tag_names: set[str] = set() for pid in product_ids_by_location[location.id]: diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index 3a09f75639d..e11219c6a6a 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -56,13 +56,13 @@ _state = threading.local() -def is_in_batch_mode() -> bool: +def is_suppressed() -> bool: """Return True when the current thread is inside an active ``batch()``.""" return bool(getattr(_state, "depth", 0)) @contextmanager -def batch_mode(): +def suppress(): """ Suppress per-instance inheritance signals for the calling thread. @@ -175,7 +175,7 @@ def inherit_instance_tags(instance): # caller (signal handler or bulk_create cleanup) need not know about # batch_mode; whoever opened the batch is responsible for the bulk # apply at exit. - if is_in_batch_mode(): + if is_suppressed(): return if inherit_product_tags(instance): # TODO: Is this change OK to make? diff --git a/dojo/tags/signals.py b/dojo/tags/signals.py index 7830fd81875..d7cffdb5026 100644 --- a/dojo/tags/signals.py +++ b/dojo/tags/signals.py @@ -44,7 +44,7 @@ def make_inherited_tags_sticky(sender, instance, action, **kwargs): # for applying inheritance in bulk; per-row signal work would defeat the # purpose. This replaces the old `signals.m2m_changed.disconnect(...)` # pattern, which was process-global and unsafe under threaded workers. - if tag_inheritance.is_in_batch_mode(): + if tag_inheritance.is_suppressed(): return if action in {"post_add", "post_remove"}: if inherit_product_tags(instance): From 99494c91925987682a400f72f99cd1cd5403172f Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 10:57:40 +0200 Subject: [PATCH 05/19] perf(tags): check cached system_setting before per-product flag Tag inheritance is usually enabled system-wide first, then optionally overridden per product. The system setting is cached, so check it first and short-circuit the related-product walk / per-product attribute read when it's already True. Sites updated: - dojo/tags/inheritance.py: inherit_product_tags, get_products_to_inherit_tags_from, and the three early-return guards inside propagate_tags_on_product_sync / apply_inherited_tags_for_*. - dojo/location/models.py: Location.products_to_inherit_tags_from. - dojo/importers/location_manager.py: bulk-inherit early-return. Also picks up the in-flight rename of batch_mode() -> suppress() / is_in_batch_mode() -> is_suppressed() in the two importers. Behavior unchanged. All tag inheritance / perf / bulk unit tests pass (36 + 20 + 29). --- dojo/importers/default_reimporter.py | 2 +- dojo/importers/location_manager.py | 6 +++--- dojo/location/models.py | 12 ++++++------ dojo/tags/inheritance.py | 26 +++++++++++++------------- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 83635fd6ae9..16f33bb154f 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -270,7 +270,7 @@ def process_findings( # Inheritance is then applied in bulk per-batch (right before # `post_process_findings_batch` dispatch) so rules/dedup see inherited # tags on `finding.tags`. - with tag_inheritance.batch_mode(): + with tag_inheritance.suppress(): return self._process_findings_internal(parsed_findings, **kwargs) def _process_findings_internal( diff --git a/dojo/importers/location_manager.py b/dojo/importers/location_manager.py index 2594aeb7184..535c1655b3e 100644 --- a/dojo/importers/location_manager.py +++ b/dojo/importers/location_manager.py @@ -495,10 +495,10 @@ def _bulk_inherit_tags(self, locations): if not locations: return - # Check whether tag inheritance is enabled at either the product level or system-wide; quit early if neither - product_inherit = getattr(self._product, "enable_product_tag_inheritance", False) + # Check whether tag inheritance is enabled at either the product level or system-wide; quit early if neither. + # System-wide setting is cached, so check it first to short-circuit before reading the product attribute. system_wide_inherit = bool(get_system_setting("enable_product_tag_inheritance")) - if not system_wide_inherit and not product_inherit: + if not system_wide_inherit and not getattr(self._product, "enable_product_tag_inheritance", False): return # A location can be shared across multiple products. Its inherited tags should be the union of diff --git a/dojo/location/models.py b/dojo/location/models.py index e617ad4e755..16d3262c9ff 100644 --- a/dojo/location/models.py +++ b/dojo/location/models.py @@ -267,12 +267,12 @@ def iter_related_products(self) -> list[Product]: def products_to_inherit_tags_from(self) -> list[Product]: from dojo.utils import get_system_setting # noqa: PLC0415 - system_wide_inherit = get_system_setting("enable_product_tag_inheritance") - return [ - product for product - in self.all_related_products() - if product.enable_product_tag_inheritance or system_wide_inherit - ] + # System-wide setting is cached — short-circuit before reading the + # per-product flag on every related product. + products = self.all_related_products() + if get_system_setting("enable_product_tag_inheritance"): + return products + return [product for product in products if product.enable_product_tag_inheritance] def inherit_tags(self, potentially_existing_tags): # get a copy of the tags to be inherited diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index e11219c6a6a..e87a195ef98 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -135,21 +135,21 @@ def get_products(instance): def inherit_product_tags(instance) -> bool: - products = get_products(instance) - # Save a read in the db - if any(product.enable_product_tag_inheritance for product in products if product): + # System-wide setting is cached, so check it first to short-circuit + # before walking related products. + if get_system_setting("enable_product_tag_inheritance"): return True - - return get_system_setting("enable_product_tag_inheritance") + products = get_products(instance) + return any(product.enable_product_tag_inheritance for product in products if product) def get_products_to_inherit_tags_from(instance): products = get_products(instance) - system_wide_inherit = get_system_setting("enable_product_tag_inheritance") - - return [ - product for product in products if product.enable_product_tag_inheritance or system_wide_inherit - ] + # System-wide setting is cached — short-circuit before reading the + # per-product flag on every related product. + if get_system_setting("enable_product_tag_inheritance"): + return products + return [product for product in products if product.enable_product_tag_inheritance] def propagate_inheritance(instance, tag_list=None): @@ -212,7 +212,7 @@ def propagate_tags_on_product_sync(product): # child kind) even when no inheritance work is possible. State transitions # (toggling the flag on/off) still trigger a full sweep via the m2m_changed # handler on `Product.tags.through` and the per-product flag save handler. - if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): + if not (get_system_setting("enable_product_tag_inheritance") or product.enable_product_tag_inheritance): return inherited_tag_names = {tag.name for tag in product.tags.all()} @@ -263,7 +263,7 @@ def apply_inherited_tags_for_endpoints(endpoints): if not endpoints: return product = endpoints[0].product - if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): + if not (get_system_setting("enable_product_tag_inheritance") or product.enable_product_tag_inheritance): return inherited_tag_names = {tag.name for tag in product.tags.all()} _sync_inheritance_for_qs( @@ -291,7 +291,7 @@ def apply_inherited_tags_for_findings(findings): # Single-product invariant inside one importer call. Smart upload calls # this per-product so the assumption holds there too. product = findings[0].test.engagement.product - if not (product.enable_product_tag_inheritance or get_system_setting("enable_product_tag_inheritance")): + if not (get_system_setting("enable_product_tag_inheritance") or product.enable_product_tag_inheritance): return inherited_tag_names = {tag.name for tag in product.tags.all()} finding_ids = [f.pk for f in findings] From 039c65c16c176c0189ced58703b37f69ae40087f Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 11:04:29 +0200 Subject: [PATCH 06/19] refactor(tags): simplify is_tag_inheritance_enabled, drop linked-instance wrapper - is_tag_inheritance_enabled now delegates to get_products_to_inherit_tags_from (no products eligible -> not enabled). Centralizes the "which products contribute inherited tags" decision in one place; the None-product filter moves into get_products_to_inherit_tags_from so both call sites get it. - Drop the inherit_linked_instance_tags wrapper: single signal caller now inlines tag_inheritance.inherit_instance_tags(instance.location). - Trim dead backward-compat re-exports from dojo/tags/signals.py; reroute the test imports to dojo.tags.inheritance directly. Behavior unchanged. test_tag_inheritance (36) and test_tag_inheritance_perf (20) pass. --- dojo/tags/inheritance.py | 24 ++++++++---------------- dojo/tags/signals.py | 15 ++++----------- unittests/test_tag_inheritance.py | 16 ++++++++-------- 3 files changed, 20 insertions(+), 35 deletions(-) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index e87a195ef98..ff3634ca383 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -19,7 +19,7 @@ ``dojo/tags/signals.py``, ``dojo/models.py``, and ``dojo/product/helpers.py`` (``_manage_inherited_tags``, ``get_products``, ``inherit_product_tags``, ``get_products_to_inherit_tags_from``, ``propagate_inheritance``, - ``inherit_instance_tags``, ``inherit_linked_instance_tags``). + ``inherit_instance_tags``). - The bulk product-wide inheritance sync (``propagate_tags_on_product_sync``) plus per-batch importer helpers (``apply_inherited_tags_for_findings`` / @@ -134,17 +134,8 @@ def get_products(instance): return [] -def inherit_product_tags(instance) -> bool: - # System-wide setting is cached, so check it first to short-circuit - # before walking related products. - if get_system_setting("enable_product_tag_inheritance"): - return True - products = get_products(instance) - return any(product.enable_product_tag_inheritance for product in products if product) - - def get_products_to_inherit_tags_from(instance): - products = get_products(instance) + products = [p for p in get_products(instance) if p] # System-wide setting is cached — short-circuit before reading the # per-product flag on every related product. if get_system_setting("enable_product_tag_inheritance"): @@ -152,6 +143,11 @@ def get_products_to_inherit_tags_from(instance): return [product for product in products if product.enable_product_tag_inheritance] +def is_tag_inheritance_enabled(instance) -> bool: + # delegate so we have logic centralized. no products -> no inheritance enabled. + return bool(get_products_to_inherit_tags_from(instance)) + + def propagate_inheritance(instance, tag_list=None): # Get the expected product tags if tag_list is None: @@ -177,7 +173,7 @@ def inherit_instance_tags(instance): # apply at exit. if is_suppressed(): return - if inherit_product_tags(instance): + if is_tag_inheritance_enabled(instance): # TODO: Is this change OK to make? # tag_list = instance._tags_tagulous.get_tag_list() tag_list = instance.tags.get_tag_list() @@ -185,10 +181,6 @@ def inherit_instance_tags(instance): instance.inherit_tags(tag_list) -def inherit_linked_instance_tags(instance): - inherit_instance_tags(instance.location) - - # --------------------------------------------------------------------------- # Bulk product-wide inheritance (relocated from dojo/product/helpers.py). # Logic unchanged. diff --git a/dojo/tags/signals.py b/dojo/tags/signals.py index d7cffdb5026..fd915b7bb42 100644 --- a/dojo/tags/signals.py +++ b/dojo/tags/signals.py @@ -9,14 +9,7 @@ from dojo.models import Endpoint, Engagement, Finding, Product, Test from dojo.product import helpers as async_product_funcs from dojo.tags import inheritance as tag_inheritance -from dojo.tags.inheritance import ( - get_products, # noqa: F401 -- backward compat re-export - get_products_to_inherit_tags_from, # noqa: F401 -- backward compat re-export - inherit_instance_tags, # noqa: F401 -- backward compat re-export - inherit_linked_instance_tags, # noqa: F401 -- backward compat re-export - inherit_product_tags, - propagate_inheritance, -) +from dojo.tags.inheritance import is_tag_inheritance_enabled, propagate_inheritance logger = logging.getLogger(__name__) @@ -28,7 +21,7 @@ def product_tags_post_add_remove(sender, instance, action, **kwargs): with contextlib.suppress(AttributeError): running_async_process = instance.running_async_process # Check if the async process is already running to avoid calling it a second time - if not running_async_process and inherit_product_tags(instance): + if not running_async_process and is_tag_inheritance_enabled(instance): dojo_dispatch_task(async_product_funcs.propagate_tags_on_product, instance.id, countdown=5) instance.running_async_process = True @@ -47,7 +40,7 @@ def make_inherited_tags_sticky(sender, instance, action, **kwargs): if tag_inheritance.is_suppressed(): return if action in {"post_add", "post_remove"}: - if inherit_product_tags(instance): + if is_tag_inheritance_enabled(instance): tag_list = [tag.name for tag in instance.tags.all()] if propagate_inheritance(instance, tag_list=tag_list): instance.inherit_tags(tag_list) @@ -72,4 +65,4 @@ def inherit_tags_on_instance(sender, instance, created, **kwargs): @receiver(signals.post_save, sender=LocationFindingReference) @receiver(signals.post_save, sender=LocationProductReference) def inherit_tags_on_linked_instance(sender, instance, created, **kwargs): - tag_inheritance.inherit_linked_instance_tags(instance) + tag_inheritance.inherit_instance_tags(instance.location) diff --git a/unittests/test_tag_inheritance.py b/unittests/test_tag_inheritance.py index 7666348f25b..d80b85f02a9 100644 --- a/unittests/test_tag_inheritance.py +++ b/unittests/test_tag_inheritance.py @@ -27,7 +27,7 @@ from dojo.location.status import ProductLocationStatus from dojo.models import Endpoint, Engagement, Finding, Product, Product_Type, Test, Test_Type from dojo.product.helpers import propagate_tags_on_product_sync -from dojo.tags.signals import get_products, inherit_product_tags, propagate_inheritance +from dojo.tags.inheritance import get_products, is_tag_inheritance_enabled, propagate_inheritance from dojo.tools.locations import LocationData from unittests.dojo_test_case import ( DojoAPITestCase, @@ -122,31 +122,31 @@ def _make_product(self, *, per_product_flag): @patch("dojo.tags.inheritance.get_products") def test_system_setting_on_returns_true(self, mock_get_products, mock_setting): mock_get_products.return_value = [self._make_product(per_product_flag=False)] - self.assertTrue(inherit_product_tags(MagicMock())) + self.assertTrue(is_tag_inheritance_enabled(MagicMock())) @patch("dojo.tags.inheritance.get_system_setting", return_value=False) @patch("dojo.tags.inheritance.get_products") def test_per_product_flag_on_system_off_returns_true(self, mock_get_products, mock_setting): mock_get_products.return_value = [self._make_product(per_product_flag=True)] - self.assertTrue(inherit_product_tags(MagicMock())) + self.assertTrue(is_tag_inheritance_enabled(MagicMock())) @patch("dojo.tags.inheritance.get_system_setting", return_value=False) @patch("dojo.tags.inheritance.get_products") def test_both_off_returns_false(self, mock_get_products, mock_setting): mock_get_products.return_value = [self._make_product(per_product_flag=False)] - self.assertFalse(inherit_product_tags(MagicMock())) + self.assertFalse(is_tag_inheritance_enabled(MagicMock())) @patch("dojo.tags.inheritance.get_system_setting", return_value=False) @patch("dojo.tags.inheritance.get_products") def test_no_products_returns_false(self, mock_get_products, mock_setting): mock_get_products.return_value = [] - self.assertFalse(inherit_product_tags(MagicMock())) + self.assertFalse(is_tag_inheritance_enabled(MagicMock())) @patch("dojo.tags.inheritance.get_system_setting", return_value=False) @patch("dojo.tags.inheritance.get_products") def test_none_entries_in_product_list_are_skipped(self, mock_get_products, mock_setting): mock_get_products.return_value = [None, self._make_product(per_product_flag=False)] - self.assertFalse(inherit_product_tags(MagicMock())) + self.assertFalse(is_tag_inheritance_enabled(MagicMock())) class TestPropagateInheritanceEarlyExit(unittest.TestCase): @@ -427,7 +427,7 @@ def setUp(self): self.system_settings(enable_product_tag_inheritance=True) def test_location_inherits_from_multiple_products(self): - from dojo.tags.signals import inherit_instance_tags # noqa: PLC0415 + from dojo.tags.inheritance import inherit_instance_tags # noqa: PLC0415 p1 = self.create_product("Product A", tags=["p1-tag"]) p2 = self.create_product("Product B", tags=["p2-tag"]) @@ -449,7 +449,7 @@ def test_location_inherits_from_multiple_products(self): self.assertIn("p2-tag", tag_names) def test_location_inherits_only_from_flagged_product_when_system_off(self): - from dojo.tags.signals import inherit_instance_tags # noqa: PLC0415 + from dojo.tags.inheritance import inherit_instance_tags # noqa: PLC0415 self.system_settings(enable_product_tag_inheritance=False) From 184e0113f62025e27047ac8482d69a1f09f8e168 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 11:18:50 +0200 Subject: [PATCH 07/19] refactor(tags): merge propagate_inheritance into inherit_instance_tags Combine the two-step gate (propagate_inheritance returning bool, then caller writing) into a single inherit_instance_tags() with an optional force=False kwarg that bypasses the suppress() check. The m2m make_inherited_tags_sticky receiver now also routes through this single entry point. Wrap the inner instance.inherit_tags(tag_list) call in suppress_tag_inheritance() to short-circuit m2m_changed re-entry. The re-entrant signal previously did a redundant in-sync check using a stale get_tag_list() cached value; suppressing it both eliminates the recursion risk and removes per-row redundant queries: - test_baseline_zap_scan_import_v2: 470 -> 463 - test_baseline_zap_scan_import_v3: 938 -> 931 - test_create_one_finding_v2/v3: 64 -> 61 - test_create_100_findings_v2/v3: 4024 -> 3724 - test_finding_add_user_tag_v2/v3: 17 -> 16 - test_finding_remove_inherited_v2/v3: 44 -> 40 Also picks up the rename of the context manager suppress() -> suppress_tag_inheritance(). Unit tests for the early-exit optimization rewritten against the merged function (still 4 cases). --- dojo/importers/default_importer.py | 2 +- dojo/importers/default_reimporter.py | 2 +- dojo/importers/location_manager.py | 2 +- dojo/tags/inheritance.py | 66 +++++++++++++------------- dojo/tags/signals.py | 25 +++++----- unittests/test_tag_inheritance.py | 47 ++++++++++-------- unittests/test_tag_inheritance_perf.py | 20 ++++---- 7 files changed, 85 insertions(+), 79 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 9c05d85083a..3c24d7fc7ec 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -169,7 +169,7 @@ def process_findings( # Inheritance is then applied in bulk per-batch (right before # `post_process_findings_batch` dispatch) so rules/dedup see inherited # tags on `finding.tags`. - with tag_inheritance.suppress(): + with tag_inheritance.suppress_tag_inheritance(): return self._process_findings_internal(parsed_findings, **kwargs) def _process_findings_internal( diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 16f33bb154f..d09391eaf5b 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -270,7 +270,7 @@ def process_findings( # Inheritance is then applied in bulk per-batch (right before # `post_process_findings_batch` dispatch) so rules/dedup see inherited # tags on `finding.tags`. - with tag_inheritance.suppress(): + with tag_inheritance.suppress_tag_inheritance(): return self._process_findings_internal(parsed_findings, **kwargs) def _process_findings_internal( diff --git a/dojo/importers/location_manager.py b/dojo/importers/location_manager.py index 535c1655b3e..d1d0ac88c68 100644 --- a/dojo/importers/location_manager.py +++ b/dojo/importers/location_manager.py @@ -560,7 +560,7 @@ def _get_tags(tags_field: TagField) -> dict[int, set[str]]: # under threaded gunicorn / Celery thread pools / ASGI threadpools: # while disconnected, every thread in the process lost sticky # enforcement. Thread-local batch state avoids that hazard. - with tag_inheritance.suppress(): + with tag_inheritance.suppress_tag_inheritance(): for location in locations: target_tag_names: set[str] = set() for pid in product_ids_by_location[location.id]: diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index ff3634ca383..6fc2d150335 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -18,8 +18,7 @@ - The per-instance inheritance helpers previously scattered across ``dojo/tags/signals.py``, ``dojo/models.py``, and ``dojo/product/helpers.py`` (``_manage_inherited_tags``, ``get_products``, ``inherit_product_tags``, - ``get_products_to_inherit_tags_from``, ``propagate_inheritance``, - ``inherit_instance_tags``). + ``get_products_to_inherit_tags_from``, ``inherit_instance_tags``). - The bulk product-wide inheritance sync (``propagate_tags_on_product_sync``) plus per-batch importer helpers (``apply_inherited_tags_for_findings`` / @@ -62,12 +61,12 @@ def is_suppressed() -> bool: @contextmanager -def suppress(): +def suppress_tag_inheritance(): """ Suppress per-instance inheritance signals for the calling thread. Usage: - with tag_inheritance.batch(): + with tag_inheritance.suppress_tag_inheritance(): # Bulk operations that would otherwise fire `make_inherited_tags_sticky` # or `inherit_tags_on_instance` per row. ... @@ -148,37 +147,38 @@ def is_tag_inheritance_enabled(instance) -> bool: return bool(get_products_to_inherit_tags_from(instance)) -def propagate_inheritance(instance, tag_list=None): - # Get the expected product tags - if tag_list is None: - tag_list = [] - product_inherited_tags = [ - tag.name - for product in get_products_to_inherit_tags_from(instance) - for tag in product.tags.all() - ] +def inherit_instance_tags(instance, *, force=False): + """ + Apply product-inherited tags to ``instance``. + + Unless ``force=True``, respects ``suppress_tag_inheritance()`` so bulk callers can defer + per-instance work. Skips the write when inherited_tags already match the + contributing products' tags and the instance's tag_list already contains + them. + """ + if not force and is_suppressed(): + return + products = get_products_to_inherit_tags_from(instance) + if not products: + return + tag_list = instance.tags.get_tag_list() + product_inherited_tags = [tag.name for product in products for tag in product.tags.all()] existing_inherited_tags = [tag.name for tag in instance.inherited_tags.all()] - # Check if product tags already matches inherited tags - product_tags_equals_inherited_tags = product_inherited_tags == existing_inherited_tags - # Check if product tags have already been inherited - tags_have_already_been_inherited = set(product_inherited_tags) <= set(tag_list) - return not (product_tags_equals_inherited_tags and tags_have_already_been_inherited) - - -def inherit_instance_tags(instance): - """Usually nothing to do when saving a model, except for new models?""" - # Suppress per-instance inheritance work inside an active batch. The - # caller (signal handler or bulk_create cleanup) need not know about - # batch_mode; whoever opened the batch is responsible for the bulk - # apply at exit. - if is_suppressed(): + # Skip the write if both: stored inherited_tags already match and all are + # present in the instance's tag_list (already applied). + if ( + product_inherited_tags == existing_inherited_tags + and set(product_inherited_tags) <= set(tag_list) + ): return - if is_tag_inheritance_enabled(instance): - # TODO: Is this change OK to make? - # tag_list = instance._tags_tagulous.get_tag_list() - tag_list = instance.tags.get_tag_list() - if propagate_inheritance(instance, tag_list=tag_list): - instance.inherit_tags(tag_list) + # Suppress reentrancy: the inherit_tags() write fires m2m_changed on + # the tags through-table, which would dispatch make_inherited_tags_sticky + # back into this function. We're already writing the correct state, so + # the signal has nothing to enforce. Without this guard, get_tag_list()'s + # cached value during the inner m2m_changed can be stale, defeating the + # in-sync early-exit and causing infinite signal recursion. + with suppress_tag_inheritance(): + instance.inherit_tags(tag_list) # --------------------------------------------------------------------------- diff --git a/dojo/tags/signals.py b/dojo/tags/signals.py index fd915b7bb42..6ac7f06d91e 100644 --- a/dojo/tags/signals.py +++ b/dojo/tags/signals.py @@ -9,7 +9,7 @@ from dojo.models import Endpoint, Engagement, Finding, Product, Test from dojo.product import helpers as async_product_funcs from dojo.tags import inheritance as tag_inheritance -from dojo.tags.inheritance import is_tag_inheritance_enabled, propagate_inheritance +from dojo.tags.inheritance import is_tag_inheritance_enabled logger = logging.getLogger(__name__) @@ -32,18 +32,19 @@ def product_tags_post_add_remove(sender, instance, action, **kwargs): @receiver(signals.m2m_changed, sender=Finding.tags.through) @receiver(signals.m2m_changed, sender=Location.tags.through) def make_inherited_tags_sticky(sender, instance, action, **kwargs): - """Make sure inherited tags are added back in if they are removed""" - # Inside a `tag_inheritance.batch()` block the caller takes responsibility - # for applying inheritance in bulk; per-row signal work would defeat the - # purpose. This replaces the old `signals.m2m_changed.disconnect(...)` - # pattern, which was process-global and unsafe under threaded workers. - if tag_inheritance.is_suppressed(): - return + """ + Make sure inherited tags are added back in if they are removed. + + Inside a ``tag_inheritance.suppress_tag_inheritance()`` block the caller takes + responsibility for applying inheritance in bulk; per-row signal work + would defeat the purpose. This replaces the old + ``signals.m2m_changed.disconnect(...)`` pattern, which was + process-global and unsafe under threaded workers. + ``inherit_instance_tags`` itself early-returns when suppression is + active. + """ if action in {"post_add", "post_remove"}: - if is_tag_inheritance_enabled(instance): - tag_list = [tag.name for tag in instance.tags.all()] - if propagate_inheritance(instance, tag_list=tag_list): - instance.inherit_tags(tag_list) + tag_inheritance.inherit_instance_tags(instance) @receiver(signals.post_save, sender=Endpoint) diff --git a/unittests/test_tag_inheritance.py b/unittests/test_tag_inheritance.py index d80b85f02a9..ecb19bb0e9d 100644 --- a/unittests/test_tag_inheritance.py +++ b/unittests/test_tag_inheritance.py @@ -27,7 +27,7 @@ from dojo.location.status import ProductLocationStatus from dojo.models import Endpoint, Engagement, Finding, Product, Product_Type, Test, Test_Type from dojo.product.helpers import propagate_tags_on_product_sync -from dojo.tags.inheritance import get_products, is_tag_inheritance_enabled, propagate_inheritance +from dojo.tags.inheritance import get_products, inherit_instance_tags, is_tag_inheritance_enabled from dojo.tools.locations import LocationData from unittests.dojo_test_case import ( DojoAPITestCase, @@ -149,15 +149,15 @@ def test_none_entries_in_product_list_are_skipped(self, mock_get_products, mock_ self.assertFalse(is_tag_inheritance_enabled(MagicMock())) -class TestPropagateInheritanceEarlyExit(unittest.TestCase): +class TestInheritInstanceTagsEarlyExit(unittest.TestCase): """ - Unit tests for propagate_inheritance() — the optimization guard that skips redundant DB writes. + Unit tests for inherit_instance_tags() — the optimization guard that skips redundant DB writes. - Returns False ("nothing to do") only when BOTH conditions hold: - 1. product tags match what is stored in instance.inherited_tags (already recorded) - 2. those tags are already present in the instance's full tag_list (already applied) - If either condition is false, returns True and the caller proceeds to write tags. + instance.inherit_tags(...) is only invoked when EITHER: + 1. product tags differ from what is stored in instance.inherited_tags, OR + 2. some product tag is not yet present in the instance's full tag_list. + Otherwise the call is a no-op. get_products_to_inherit_tags_from and instance.inherited_tags.all() are mocked to isolate the boolean logic from DB access. """ @@ -167,9 +167,10 @@ def _tag(self, name): t.name = name return t - def _make_instance(self, inherited_names): + def _make_instance(self, inherited_names, tag_list): instance = MagicMock() instance.inherited_tags.all.return_value = [self._tag(n) for n in inherited_names] + instance.tags.get_tag_list.return_value = list(tag_list) return instance def _make_product(self, tag_names): @@ -178,32 +179,36 @@ def _make_product(self, tag_names): return product @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") - def test_already_in_sync_returns_false(self, mock_get): + def test_already_in_sync_skips_write(self, mock_get): """inherited_tags matches product tags and all present in tag_list → skip.""" - instance = self._make_instance(["alpha", "beta"]) + instance = self._make_instance(["alpha", "beta"], tag_list=["alpha", "beta"]) mock_get.return_value = [self._make_product(["alpha", "beta"])] - self.assertFalse(propagate_inheritance(instance, tag_list=["alpha", "beta"])) + inherit_instance_tags(instance) + instance.inherit_tags.assert_not_called() @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") - def test_product_tags_changed_returns_true(self, mock_get): + def test_product_tags_changed_triggers_write(self, mock_get): """Stored inherited_tags differ from current product tags → must propagate.""" - instance = self._make_instance(["old"]) + instance = self._make_instance(["old"], tag_list=["old", "new"]) mock_get.return_value = [self._make_product(["new"])] - self.assertTrue(propagate_inheritance(instance, tag_list=["old", "new"])) + inherit_instance_tags(instance) + instance.inherit_tags.assert_called_once_with(["old", "new"]) @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") - def test_tags_not_yet_applied_to_instance_returns_true(self, mock_get): + def test_tags_not_yet_applied_to_instance_triggers_write(self, mock_get): """inherited_tags already correct but not yet reflected in tag_list → must propagate.""" - instance = self._make_instance(["alpha"]) + instance = self._make_instance(["alpha"], tag_list=[]) mock_get.return_value = [self._make_product(["alpha"])] - self.assertTrue(propagate_inheritance(instance, tag_list=[])) + inherit_instance_tags(instance) + instance.inherit_tags.assert_called_once_with([]) @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") - def test_no_products_no_inherited_tags_returns_false(self, mock_get): - """No products, no inherited tags, empty tag_list → already in sync, skip.""" - instance = self._make_instance([]) + def test_no_products_skips_write(self, mock_get): + """No products → no-op, regardless of any cached state.""" + instance = self._make_instance([], tag_list=[]) mock_get.return_value = [] - self.assertFalse(propagate_inheritance(instance, tag_list=[])) + inherit_instance_tags(instance) + instance.inherit_tags.assert_not_called() # --------------------------------------------------------------------------- diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index f1aec1317a9..6637ab5264f 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -364,15 +364,15 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): EXPECTED_PRODUCT_TAG_REMOVE_100_V2 = 53 EXPECTED_PRODUCT_TAG_REMOVE_100_V3 = 53 - EXPECTED_CREATE_ONE_FINDING_V2 = 64 - EXPECTED_CREATE_ONE_FINDING_V3 = 64 - EXPECTED_CREATE_100_FINDINGS_V2 = 4024 - EXPECTED_CREATE_100_FINDINGS_V3 = 4024 + EXPECTED_CREATE_ONE_FINDING_V2 = 61 + EXPECTED_CREATE_ONE_FINDING_V3 = 61 + EXPECTED_CREATE_100_FINDINGS_V2 = 3724 + EXPECTED_CREATE_100_FINDINGS_V3 = 3724 - EXPECTED_FINDING_ADD_USER_TAG_V2 = 17 - EXPECTED_FINDING_ADD_USER_TAG_V3 = 17 - EXPECTED_FINDING_REMOVE_INHERITED_V2 = 44 - EXPECTED_FINDING_REMOVE_INHERITED_V3 = 44 + EXPECTED_FINDING_ADD_USER_TAG_V2 = 16 + EXPECTED_FINDING_ADD_USER_TAG_V3 = 16 + EXPECTED_FINDING_REMOVE_INHERITED_V2 = 40 + EXPECTED_FINDING_REMOVE_INHERITED_V3 = 40 # V2 endpoint paths. Pre-Phase-A: 3958 add, 3740 remove. EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 91 @@ -498,7 +498,7 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # import path because the previous process-global signal-disconnect was # narrower in scope (Location.tags.through only). Net-positive trade for # eliminating the threading bug; full Phase B reductions land in Stage 2. - EXPECTED_ZAP_IMPORT_V2 = 470 - EXPECTED_ZAP_IMPORT_V3 = 938 + EXPECTED_ZAP_IMPORT_V2 = 463 + EXPECTED_ZAP_IMPORT_V3 = 931 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 75 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 102 From fc357eb2cf3e84ccb3fea33a30a0c4f15f175d7a Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 12:11:54 +0200 Subject: [PATCH 08/19] refactor(tags): diff-based _sync_inherited_tags, drop per-model wrappers Rewrite the per-row inheritance primitive as a pure diff: current_inherited = obj.inherited_tags.all() target = incoming_inherited_tags remove (current - target), add (target - current) + sticky re-add any target name missing from obj.tags Drops the "rebuild full tag_list, set() everything" approach and removes the `existing_tags_hint` parameter entirely. Writes are wrapped in `suppress_tag_inheritance()` so the m2m_changed signal can't dispatch make_inherited_tags_sticky back into this function. Other cleanup: - Rename `_manage_inherited_tags` -> `_sync_inherited_tags`. - Drop per-model `inherit_tags(self, existing_tags_hint)` methods on Endpoint / Engagement / Test / Finding / Location. The signal path (`inherit_instance_tags`) and `LocationManager._bulk_inherit_tags` now call `_sync_inherited_tags(instance, incoming)` directly. - Drop the unused `existing_tags_by_location` dict in `LocationManager._bulk_inherit_tags` (one fewer through-table read). - Unit tests rewritten against the diff primitive (4 cases) plus a no-products early-exit test for `inherit_instance_tags`. Perf baselines (test_tag_inheritance_perf): EXPECTED_ZAP_IMPORT_V2: 463 -> 422 EXPECTED_ZAP_IMPORT_V3: 931 -> 809 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3:102 -> 101 EXPECTED_CREATE_ONE_FINDING_V2/V3: 61 -> 55 EXPECTED_CREATE_100_FINDINGS_V2/V3: 3724 -> 3124 (-600) EXPECTED_FINDING_REMOVE_INHERITED_V2/V3: 40 -> 18 EXPECTED_FINDING_ADD_USER_TAG_V2/V3: 16 -> 17 (+1) --- dojo/importers/location_manager.py | 15 ++-- dojo/location/models.py | 7 +- dojo/models.py | 26 +------ dojo/tags/inheritance.py | 103 ++++++++++++++----------- unittests/test_tag_inheritance.py | 96 +++++++++++++---------- unittests/test_tag_inheritance_perf.py | 24 +++--- 6 files changed, 136 insertions(+), 135 deletions(-) diff --git a/dojo/importers/location_manager.py b/dojo/importers/location_manager.py index d1d0ac88c68..f1168a02f9f 100644 --- a/dojo/importers/location_manager.py +++ b/dojo/importers/location_manager.py @@ -12,7 +12,7 @@ from dojo.importers.base_location_manager import BaseLocationManager from dojo.location.models import AbstractLocation, Location, LocationFindingReference, LocationProductReference from dojo.location.status import FindingLocationStatus, ProductLocationStatus -from dojo.models import Product, _manage_inherited_tags +from dojo.models import Product, _sync_inherited_tags from dojo.tags import inheritance as tag_inheritance from dojo.tools.locations import LocationData from dojo.url.models import URL @@ -485,7 +485,7 @@ def type_id(x: tuple[int, AbstractLocation]) -> int: def _bulk_inherit_tags(self, locations): """ Bulk equivalent of calling inherit_instance_tags(loc) for many Locations. Actually persisting updates is handled - by a per-location call to _manage_inherited_tags(), but at least determining what the tags are is more efficient + by a per-location call to _sync_inherited_tags(), but at least determining what the tags are is more efficient (plus we can skip locations that don't need an update at all). When tag inheritance is enabled, computes the target inherited tags for each location from all related products @@ -546,11 +546,10 @@ def _get_tags(tags_field: TagField) -> dict[int, set[str]]: tags_by_location[l_id].add(t_name) return tags_by_location - # Gather inherited and 'regular' tags per location + # Gather inherited tags per location for the in-sync skip check below. existing_inherited_by_location: dict[int, set[str]] = _get_tags(Location.inherited_tags) - existing_tags_by_location: dict[int, set[str]] = _get_tags(Location.tags) - # Perform the bulk updates inside a `tag_inheritance.batch()` context. + # Perform the bulk updates inside a `tag_inheritance.suppress_tag_inheritance()` context. # While the batch is active, signal handlers in `dojo/tags_signals.py` # short-circuit per-row inheritance work that would otherwise fire on # every `(inherited_)tags.set()` and defeat the bulk update. @@ -574,8 +573,4 @@ def _get_tags(tags_field: TagField) -> dict[int, set[str]]: continue # Update tags for this location - _manage_inherited_tags( - location, - list(target_tag_names), - potentially_existing_tags=existing_tags_by_location[location.id], - ) + _sync_inherited_tags(location, list(target_tag_names)) diff --git a/dojo/location/models.py b/dojo/location/models.py index 16d3262c9ff..6c6b738655d 100644 --- a/dojo/location/models.py +++ b/dojo/location/models.py @@ -34,7 +34,7 @@ LocationQueryset, ) from dojo.location.status import FindingLocationStatus, ProductLocationStatus -from dojo.models import Dojo_User, Finding, Product, _manage_inherited_tags, copy_model_util +from dojo.models import Dojo_User, Finding, Product, copy_model_util from dojo.tools.locations import LocationAssociationData if TYPE_CHECKING: @@ -274,11 +274,6 @@ def products_to_inherit_tags_from(self) -> list[Product]: return products return [product for product in products if product.enable_product_tag_inheritance] - def inherit_tags(self, potentially_existing_tags): - # get a copy of the tags to be inherited - incoming_inherited_tags = [tag.name for product in self.products_to_inherit_tags_from() for tag in product.tags.all()] - _manage_inherited_tags(self, incoming_inherited_tags, potentially_existing_tags=potentially_existing_tags) - class Meta: verbose_name = "Locations - Location" verbose_name_plural = "Locations - Locations" diff --git a/dojo/models.py b/dojo/models.py index 4dfd1015b5d..a41f5640889 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -110,12 +110,12 @@ def _get_statistics_for_queryset(qs, annotation_factory): return stats -def _manage_inherited_tags(obj, incoming_inherited_tags, potentially_existing_tags=None): +def _sync_inherited_tags(obj, incoming_inherited_tags): # Backward-compat shim. Implementation lives in dojo.tags.inheritance; lazy # import keeps dojo.models loadable before dojo.tags.inheritance (which # transitively imports dojo.utils -> dojo.models) is ready. - from dojo.tags.inheritance import _manage_inherited_tags as _impl # noqa: PLC0415 - return _impl(obj, incoming_inherited_tags, potentially_existing_tags=potentially_existing_tags) + from dojo.tags.inheritance import _sync_inherited_tags as _impl # noqa: PLC0415 + return _impl(obj, incoming_inherited_tags) def copy_model_util(model_in_database, exclude_fields: list[str] | None = None): @@ -1569,11 +1569,6 @@ def delete(self, *args, **kwargs): from dojo.utils import perform_product_grading # noqa: PLC0415 circular import perform_product_grading(self.product) - def inherit_tags(self, potentially_existing_tags): - # get a copy of the tags to be inherited - incoming_inherited_tags = [tag.name for tag in self.product.tags.all()] - _manage_inherited_tags(self, incoming_inherited_tags, potentially_existing_tags=potentially_existing_tags) - class CWE(models.Model): url = models.CharField(max_length=1000) @@ -2019,11 +2014,6 @@ def from_uri(uri): fragment=fragment, ) - def inherit_tags(self, potentially_existing_tags): - # get a copy of the tags to be inherited - incoming_inherited_tags = [tag.name for tag in self.product.tags.all()] - _manage_inherited_tags(self, incoming_inherited_tags, potentially_existing_tags=potentially_existing_tags) - class Development_Environment(models.Model): name = models.CharField(max_length=200) @@ -2224,11 +2214,6 @@ def statistics(self): """Queries the database, no prefetching, so could be slow for lists of model instances""" return _get_statistics_for_queryset(Finding.objects.filter(test=self), _get_annotations_for_statistics) - def inherit_tags(self, potentially_existing_tags): - # get a copy of the tags to be inherited - incoming_inherited_tags = [tag.name for tag in self.engagement.product.tags.all()] - _manage_inherited_tags(self, incoming_inherited_tags, potentially_existing_tags=potentially_existing_tags) - class Test_Import(TimeStampedModel): @@ -3528,11 +3513,6 @@ def vulnerability_ids(self): # Remove duplicates return list(dict.fromkeys(vulnerability_ids)) - def inherit_tags(self, potentially_existing_tags): - # get a copy of the tags to be inherited - incoming_inherited_tags = [tag.name for tag in self.test.engagement.product.tags.all()] - _manage_inherited_tags(self, incoming_inherited_tags, potentially_existing_tags=potentially_existing_tags) - @property def violates_sla(self): return (self.sla_expiration_date and self.sla_expiration_date < timezone.now().date()) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index 6fc2d150335..1b183dc34e9 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -17,7 +17,7 @@ - The per-instance inheritance helpers previously scattered across ``dojo/tags/signals.py``, ``dojo/models.py``, and ``dojo/product/helpers.py`` - (``_manage_inherited_tags``, ``get_products``, ``inherit_product_tags``, + (``_sync_inherited_tags``, ``get_products``, ``inherit_product_tags``, ``get_products_to_inherit_tags_from``, ``inherit_instance_tags``). - The bulk product-wide inheritance sync (``propagate_tags_on_product_sync``) @@ -93,28 +93,58 @@ def suppress_tag_inheritance(): # --------------------------------------------------------------------------- -def _manage_inherited_tags(obj, incoming_inherited_tags, potentially_existing_tags=None): - # get copies of the current tag lists - if potentially_existing_tags is None: - potentially_existing_tags = [] - current_inherited_tags = [] if isinstance(obj.inherited_tags, FakeTagRelatedManager) else [tag.name for tag in obj.inherited_tags.all()] - tag_list = potentially_existing_tags if isinstance(obj.tags, FakeTagRelatedManager) or len(potentially_existing_tags) > 0 else [tag.name for tag in obj.tags.all()] - # Clean existing tag list from the old inherited tags. This represents the tags on the object and not the product - cleaned_tag_list = [tag for tag in tag_list if tag not in current_inherited_tags] - # Add the incoming inherited tag list - if incoming_inherited_tags: - for tag in incoming_inherited_tags: - if tag not in cleaned_tag_list: - cleaned_tag_list.append(tag) - # Update the current list of inherited tags. iteratively do this because of tagulous object restraints +def _sync_inherited_tags(obj, incoming_inherited_tags): + """ + Sync ``obj.inherited_tags`` and ``obj.tags`` to match ``incoming_inherited_tags``. + + Diff-based: only the inherited names that changed are added/removed. Also + re-adds any inherited name that has been stripped from ``obj.tags`` directly + (sticky enforcement). + + Writes are wrapped in ``suppress_tag_inheritance()`` so the m2m_changed + signal fired by each ``.add()``/``.remove()`` does not dispatch + ``make_inherited_tags_sticky`` back into this function. The context + manager is reentrant so callers that already opened a batch (e.g. + ``inherit_instance_tags`` or ``LocationManager._bulk_inherit_tags``) + nest harmlessly. + """ + target = set(incoming_inherited_tags or []) + + # Unsaved instance: FakeTagRelatedManager has no .all()/.add()/.remove(). + # Set in-memory tag lists directly, merging incoming into any preset tags. + # set_tag_list() is purely in-memory — no DB write, no m2m_changed — so it + # doesn't need the suppress wrap. The `obj.tags.add(*target)` fallback + # below covers a theoretical mixed-state case (saved tags manager next to + # an unsaved inherited_tags manager) and DOES fire m2m_changed, so it + # gets wrapped. if isinstance(obj.inherited_tags, FakeTagRelatedManager): - obj.inherited_tags.set_tag_list(incoming_inherited_tags) - if incoming_inherited_tags: - obj.tags.set_tag_list(cleaned_tag_list) - else: - obj.inherited_tags.set(incoming_inherited_tags) - if incoming_inherited_tags: - obj.tags.set(cleaned_tag_list) + obj.inherited_tags.set_tag_list(list(target)) + if target: + if isinstance(obj.tags, FakeTagRelatedManager): + existing = obj.tags.get_tag_list() + obj.tags.set_tag_list(list(dict.fromkeys([*existing, *target]))) + else: + with suppress_tag_inheritance(): + obj.tags.add(*target) + return + + current_inherited = {tag.name for tag in obj.inherited_tags.all()} + current_tags = {tag.name for tag in obj.tags.all()} + to_remove = current_inherited - target + to_add = target - current_inherited + # Sticky: any target name already absent from obj.tags AND not covered by + # to_add (user-driven m2m_changed stripped it). Re-add separately. + sticky_missing = (target - current_tags) - to_add + + with suppress_tag_inheritance(): + if to_remove: + obj.inherited_tags.remove(*to_remove) + obj.tags.remove(*to_remove) + if to_add: + obj.inherited_tags.add(*to_add) + obj.tags.add(*to_add) + if sticky_missing: + obj.tags.add(*sticky_missing) def get_products(instance): @@ -151,34 +181,17 @@ def inherit_instance_tags(instance, *, force=False): """ Apply product-inherited tags to ``instance``. - Unless ``force=True``, respects ``suppress_tag_inheritance()`` so bulk callers can defer - per-instance work. Skips the write when inherited_tags already match the - contributing products' tags and the instance's tag_list already contains - them. + Unless ``force=True``, respects ``suppress_tag_inheritance()`` so bulk + callers can defer per-instance work. The underlying ``_sync_inherited_tags`` + diffs the current vs target inherited set and only writes the delta. """ if not force and is_suppressed(): return products = get_products_to_inherit_tags_from(instance) if not products: return - tag_list = instance.tags.get_tag_list() - product_inherited_tags = [tag.name for product in products for tag in product.tags.all()] - existing_inherited_tags = [tag.name for tag in instance.inherited_tags.all()] - # Skip the write if both: stored inherited_tags already match and all are - # present in the instance's tag_list (already applied). - if ( - product_inherited_tags == existing_inherited_tags - and set(product_inherited_tags) <= set(tag_list) - ): - return - # Suppress reentrancy: the inherit_tags() write fires m2m_changed on - # the tags through-table, which would dispatch make_inherited_tags_sticky - # back into this function. We're already writing the correct state, so - # the signal has nothing to enforce. Without this guard, get_tag_list()'s - # cached value during the inner m2m_changed can be stale, defeating the - # in-sync early-exit and causing infinite signal recursion. - with suppress_tag_inheritance(): - instance.inherit_tags(tag_list) + incoming_inherited_tags = [tag.name for product in products for tag in product.tags.all()] + _sync_inherited_tags(instance, incoming_inherited_tags) # --------------------------------------------------------------------------- @@ -381,7 +394,7 @@ def _sync_inheritance_for_qs(queryset, *, target_names_per_child): remove_map[name].append(child) # Apply adds. Both `tags` and `inherited_tags` get the same set of new - # inherited names — `_manage_inherited_tags` did the same. + # inherited names — `_sync_inherited_tags` did the same. if add_map: bulk_add_tag_mapping(add_map, tag_field_name="inherited_tags") bulk_add_tag_mapping(add_map, tag_field_name="tags") diff --git a/unittests/test_tag_inheritance.py b/unittests/test_tag_inheritance.py index ecb19bb0e9d..2900c1eada8 100644 --- a/unittests/test_tag_inheritance.py +++ b/unittests/test_tag_inheritance.py @@ -27,7 +27,12 @@ from dojo.location.status import ProductLocationStatus from dojo.models import Endpoint, Engagement, Finding, Product, Product_Type, Test, Test_Type from dojo.product.helpers import propagate_tags_on_product_sync -from dojo.tags.inheritance import get_products, inherit_instance_tags, is_tag_inheritance_enabled +from dojo.tags.inheritance import ( + _sync_inherited_tags, # noqa: PLC2701 -- private API tested directly + get_products, + inherit_instance_tags, + is_tag_inheritance_enabled, +) from dojo.tools.locations import LocationData from unittests.dojo_test_case import ( DojoAPITestCase, @@ -149,17 +154,16 @@ def test_none_entries_in_product_list_are_skipped(self, mock_get_products, mock_ self.assertFalse(is_tag_inheritance_enabled(MagicMock())) -class TestInheritInstanceTagsEarlyExit(unittest.TestCase): +class TestManageInheritedTagsDiff(unittest.TestCase): """ - Unit tests for inherit_instance_tags() — the optimization guard that skips redundant DB writes. - - instance.inherit_tags(...) is only invoked when EITHER: - 1. product tags differ from what is stored in instance.inherited_tags, OR - 2. some product tag is not yet present in the instance's full tag_list. - Otherwise the call is a no-op. - get_products_to_inherit_tags_from and instance.inherited_tags.all() are mocked - to isolate the boolean logic from DB access. + Unit tests for _sync_inherited_tags() — the diff primitive. + + Verifies that the function: + - Adds inherited tags that aren't yet recorded. + - Removes inherited tags that no longer belong. + - Re-adds inherited tags missing from instance.tags (sticky enforcement). + - Does no work when target matches current state. """ def _tag(self, name): @@ -167,48 +171,62 @@ def _tag(self, name): t.name = name return t - def _make_instance(self, inherited_names, tag_list): + def _make_instance(self, inherited_names, tag_names): instance = MagicMock() + # Skip the FakeTagRelatedManager branch so we exercise the diff path. + instance.inherited_tags.__class__ = MagicMock + instance.tags.__class__ = MagicMock instance.inherited_tags.all.return_value = [self._tag(n) for n in inherited_names] - instance.tags.get_tag_list.return_value = list(tag_list) + instance.tags.all.return_value = [self._tag(n) for n in tag_names] return instance - def _make_product(self, tag_names): - product = MagicMock() - product.tags.all.return_value = [self._tag(n) for n in tag_names] - return product + def test_already_in_sync_no_writes(self): + instance = self._make_instance(["alpha", "beta"], tag_names=["alpha", "beta"]) + _sync_inherited_tags(instance, ["alpha", "beta"]) + instance.inherited_tags.add.assert_not_called() + instance.inherited_tags.remove.assert_not_called() + instance.tags.add.assert_not_called() + instance.tags.remove.assert_not_called() + + def test_target_adds_new_inherited(self): + instance = self._make_instance(["old"], tag_names=["old", "user"]) + _sync_inherited_tags(instance, ["old", "new"]) + instance.inherited_tags.add.assert_called_once_with("new") + instance.tags.add.assert_called_once_with("new") + instance.inherited_tags.remove.assert_not_called() + instance.tags.remove.assert_not_called() + + def test_target_removes_dropped_inherited(self): + instance = self._make_instance(["alpha", "beta"], tag_names=["alpha", "beta", "user"]) + _sync_inherited_tags(instance, ["alpha"]) + instance.inherited_tags.remove.assert_called_once_with("beta") + instance.tags.remove.assert_called_once_with("beta") + instance.inherited_tags.add.assert_not_called() + instance.tags.add.assert_not_called() + + def test_sticky_readds_missing_inherited(self): + # inherited_tags already records "alpha", target is "alpha", but user + # stripped it from tags via m2m_changed. Sticky enforcement re-adds it. + instance = self._make_instance(["alpha"], tag_names=["user"]) + _sync_inherited_tags(instance, ["alpha"]) + instance.inherited_tags.add.assert_not_called() + instance.inherited_tags.remove.assert_not_called() + instance.tags.remove.assert_not_called() + instance.tags.add.assert_called_once_with("alpha") - @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") - def test_already_in_sync_skips_write(self, mock_get): - """inherited_tags matches product tags and all present in tag_list → skip.""" - instance = self._make_instance(["alpha", "beta"], tag_list=["alpha", "beta"]) - mock_get.return_value = [self._make_product(["alpha", "beta"])] - inherit_instance_tags(instance) - instance.inherit_tags.assert_not_called() - @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") - def test_product_tags_changed_triggers_write(self, mock_get): - """Stored inherited_tags differ from current product tags → must propagate.""" - instance = self._make_instance(["old"], tag_list=["old", "new"]) - mock_get.return_value = [self._make_product(["new"])] - inherit_instance_tags(instance) - instance.inherit_tags.assert_called_once_with(["old", "new"]) +class TestInheritInstanceTagsEarlyExit(unittest.TestCase): - @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") - def test_tags_not_yet_applied_to_instance_triggers_write(self, mock_get): - """inherited_tags already correct but not yet reflected in tag_list → must propagate.""" - instance = self._make_instance(["alpha"], tag_list=[]) - mock_get.return_value = [self._make_product(["alpha"])] - inherit_instance_tags(instance) - instance.inherit_tags.assert_called_once_with([]) + """No-products case: inherit_instance_tags must short-circuit before touching the instance.""" @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") def test_no_products_skips_write(self, mock_get): - """No products → no-op, regardless of any cached state.""" - instance = self._make_instance([], tag_list=[]) + instance = MagicMock() mock_get.return_value = [] inherit_instance_tags(instance) instance.inherit_tags.assert_not_called() + instance.inherited_tags.add.assert_not_called() + instance.tags.add.assert_not_called() # --------------------------------------------------------------------------- diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index 6637ab5264f..76e843d5042 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -364,15 +364,15 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): EXPECTED_PRODUCT_TAG_REMOVE_100_V2 = 53 EXPECTED_PRODUCT_TAG_REMOVE_100_V3 = 53 - EXPECTED_CREATE_ONE_FINDING_V2 = 61 - EXPECTED_CREATE_ONE_FINDING_V3 = 61 - EXPECTED_CREATE_100_FINDINGS_V2 = 3724 - EXPECTED_CREATE_100_FINDINGS_V3 = 3724 + EXPECTED_CREATE_ONE_FINDING_V2 = 55 + EXPECTED_CREATE_ONE_FINDING_V3 = 55 + EXPECTED_CREATE_100_FINDINGS_V2 = 3124 + EXPECTED_CREATE_100_FINDINGS_V3 = 3124 - EXPECTED_FINDING_ADD_USER_TAG_V2 = 16 - EXPECTED_FINDING_ADD_USER_TAG_V3 = 16 - EXPECTED_FINDING_REMOVE_INHERITED_V2 = 40 - EXPECTED_FINDING_REMOVE_INHERITED_V3 = 40 + EXPECTED_FINDING_ADD_USER_TAG_V2 = 17 + EXPECTED_FINDING_ADD_USER_TAG_V3 = 17 + EXPECTED_FINDING_REMOVE_INHERITED_V2 = 18 + EXPECTED_FINDING_REMOVE_INHERITED_V3 = 18 # V2 endpoint paths. Pre-Phase-A: 3958 add, 3740 remove. EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 91 @@ -395,7 +395,7 @@ class TagInheritanceImportPerfBaselines(DojoAPITestCase): Pinned query-count baselines for the importer hot path. Real production tag-inheritance cost lives in scan import / reimport: the - importer creates findings + endpoints/locations, then `_manage_inherited_tags` + importer creates findings + endpoints/locations, then `_sync_inherited_tags` runs per row. Phase A (bulk product-side propagation + post_save gated on create) doesn't touch this loop because the importer's hot path is creation-driven. Phase B's `tag_inheritance.batch()` context manager @@ -498,7 +498,7 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # import path because the previous process-global signal-disconnect was # narrower in scope (Location.tags.through only). Net-positive trade for # eliminating the threading bug; full Phase B reductions land in Stage 2. - EXPECTED_ZAP_IMPORT_V2 = 463 - EXPECTED_ZAP_IMPORT_V3 = 931 + EXPECTED_ZAP_IMPORT_V2 = 422 + EXPECTED_ZAP_IMPORT_V3 = 809 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 75 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 102 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 101 From 7936ffd86e6727f8bdbded50ab7b8e394c439e73 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 12:22:45 +0200 Subject: [PATCH 09/19] add comments --- dojo/tags/inheritance.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index 1b183dc34e9..247d833f5e9 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -124,6 +124,7 @@ def _sync_inherited_tags(obj, incoming_inherited_tags): existing = obj.tags.get_tag_list() obj.tags.set_tag_list(list(dict.fromkeys([*existing, *target]))) else: + # avoid reentrancy: the `add(*target)` write fires m2m_changed with suppress_tag_inheritance(): obj.tags.add(*target) return @@ -136,6 +137,7 @@ def _sync_inherited_tags(obj, incoming_inherited_tags): # to_add (user-driven m2m_changed stripped it). Re-add separately. sticky_missing = (target - current_tags) - to_add + # avoid reentrancy: the `remove(*to_remove)` / `add(*to_add)` / `add(*sticky_missing)` writes fire m2m_changed with suppress_tag_inheritance(): if to_remove: obj.inherited_tags.remove(*to_remove) From ad932dee3729e5d2a1059e639e3dc26020231196 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 15:28:40 +0200 Subject: [PATCH 10/19] refactor(tags): consolidate location inheritance, drop dojo/product/helpers.py - New `apply_inherited_tags_for_locations(locations, *, product)` mirrors the endpoint/finding helpers. Replaces `LocationManager._bulk_inherit_tags`; callsite delegates. Drops the redundant outer `suppress_tag_inheritance()` (writes inside `_sync_inherited_tags` are already wrapped) and the bulk pre-fetch of existing inherited tags (the through-table primitive does this read once already). - `_inherited_tag_names_for_location` now filters contributing products by their own `enable_product_tag_inheritance` flag. Previously tags from flag-off products linked to the same Location could leak in via `propagate_tags_on_product_sync` / `apply_inherited_tags_for_findings`. - Move `propagate_tags_on_product` Celery task into `dojo.tags.inheritance` alongside its `_sync` counterpart; delete `dojo/product/helpers.py`. Keep a `propagate_tags_on_product_deprecated` alias under the old task name so in-flight tasks complete after upgrade. - Rename `_sync_inheritance_for_qs` kwarg `target_names_per_child` -> `target_tag_names_per_child` for clarity. - Update import sites + perf baseline V3 import: 809 -> 445 queries. --- dojo/importers/default_importer.py | 2 +- dojo/importers/default_reimporter.py | 2 +- dojo/importers/endpoint_manager.py | 2 +- dojo/importers/location_manager.py | 108 +------------------- dojo/product/helpers.py | 22 ---- dojo/tags/inheritance.py | 134 +++++++++++++++++++++---- dojo/tags/signals.py | 18 +--- unittests/test_tag_inheritance.py | 2 +- unittests/test_tag_inheritance_perf.py | 8 +- 9 files changed, 128 insertions(+), 170 deletions(-) delete mode 100644 dojo/product/helpers.py diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 3c24d7fc7ec..bc78a6b9592 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -18,8 +18,8 @@ Test_Import, ) from dojo.notifications.helper import async_create_notification -from dojo.product.helpers import apply_inherited_tags_for_findings from dojo.tags import inheritance as tag_inheritance +from dojo.tags.inheritance import apply_inherited_tags_for_findings from dojo.tags.utils import bulk_apply_parser_tags from dojo.utils import get_full_url, perform_product_grading from dojo.validators import clean_tags diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index d09391eaf5b..222a142cbe0 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -24,8 +24,8 @@ Test, Test_Import, ) -from dojo.product.helpers import apply_inherited_tags_for_findings from dojo.tags import inheritance as tag_inheritance +from dojo.tags.inheritance import apply_inherited_tags_for_findings from dojo.tags.utils import bulk_apply_parser_tags from dojo.utils import perform_product_grading from dojo.validators import clean_tags diff --git a/dojo/importers/endpoint_manager.py b/dojo/importers/endpoint_manager.py index edf541cc762..f3af161f159 100644 --- a/dojo/importers/endpoint_manager.py +++ b/dojo/importers/endpoint_manager.py @@ -14,7 +14,7 @@ Finding, Product, ) -from dojo.product.helpers import apply_inherited_tags_for_endpoints +from dojo.tags.inheritance import apply_inherited_tags_for_endpoints logger = logging.getLogger(__name__) diff --git a/dojo/importers/location_manager.py b/dojo/importers/location_manager.py index f1168a02f9f..81b1a1a495d 100644 --- a/dojo/importers/location_manager.py +++ b/dojo/importers/location_manager.py @@ -12,16 +12,12 @@ from dojo.importers.base_location_manager import BaseLocationManager from dojo.location.models import AbstractLocation, Location, LocationFindingReference, LocationProductReference from dojo.location.status import FindingLocationStatus, ProductLocationStatus -from dojo.models import Product, _sync_inherited_tags from dojo.tags import inheritance as tag_inheritance from dojo.tools.locations import LocationData from dojo.url.models import URL -from dojo.utils import get_system_setting if TYPE_CHECKING: - from tagulous.models import TagField - - from dojo.models import Dojo_User, Finding + from dojo.models import Dojo_User, Finding, Product logger = logging.getLogger(__name__) @@ -215,7 +211,10 @@ def _persist_locations(self) -> None: ) # Trigger bulk tag inheritance - self._bulk_inherit_tags(loc.location for loc in saved) + tag_inheritance.apply_inherited_tags_for_locations( + [loc.location for loc in saved], + product=self._product, + ) # Clear accumulators self._locations_by_finding.clear() @@ -477,100 +476,3 @@ def type_id(x: tuple[int, AbstractLocation]) -> int: # Restore the original input ordering saved.sort(key=itemgetter(0)) return [loc for _, loc in saved] - - # ------------------------------------------------------------------ - # Tag inheritance - # ------------------------------------------------------------------ - - def _bulk_inherit_tags(self, locations): - """ - Bulk equivalent of calling inherit_instance_tags(loc) for many Locations. Actually persisting updates is handled - by a per-location call to _sync_inherited_tags(), but at least determining what the tags are is more efficient - (plus we can skip locations that don't need an update at all). - - When tag inheritance is enabled, computes the target inherited tags for each location from all related products - and updates only locations that are out of sync. - """ - locations = list(locations) - if not locations: - return - - # Check whether tag inheritance is enabled at either the product level or system-wide; quit early if neither. - # System-wide setting is cached, so check it first to short-circuit before reading the product attribute. - system_wide_inherit = bool(get_system_setting("enable_product_tag_inheritance")) - if not system_wide_inherit and not getattr(self._product, "enable_product_tag_inheritance", False): - return - - # A location can be shared across multiple products. Its inherited tags should be the union of - # tags from ALL contributing products, not just the one running this import. - location_ids = [loc.id for loc in locations] - product_ids_by_location: dict[int, set[int]] = {loc.id: set() for loc in locations} - - # Find associations through LocationProductReference entries - for loc_id, prod_id in LocationProductReference.objects.filter( - location_id__in=location_ids, - ).values_list("location_id", "product_id"): - product_ids_by_location[loc_id].add(prod_id) - - # Find associations through LocationFindingReference entries and the finding.test.engagement.product chain. - # This shouldn't add anything new, but just in case. - for loc_id, prod_id in ( - LocationFindingReference.objects - .filter(location_id__in=location_ids) - .values_list("location_id", "finding__test__engagement__product_id") - ): - product_ids_by_location[loc_id].add(prod_id) - - # Fetch all products that will contribute to tag inheritance, and their tags - all_product_ids = {pid for pids in product_ids_by_location.values() for pid in pids} - product_qs = Product.objects.filter(id__in=all_product_ids).prefetch_related("tags") - if not system_wide_inherit: - # Product-level inheritance only - product_qs = product_qs.filter(enable_product_tag_inheritance=True) - # Materialize into a dict for ease of use - products: dict[int, Product] = {p.id: p for p in product_qs} - # Get distinct tags, per-product - tags_by_product: dict[int, set[str]] = { - pid: {t.name for t in p.tags.all()} - for pid, p in products.items() - } - - # Helper method for getting all tags from the given TagField - def _get_tags(tags_field: TagField) -> dict[int, set[str]]: - through_model = tags_field.through - fk_name = tags_field.field.m2m_reverse_field_name() - tags_by_location: dict[int, set[str]] = {loc.id: set() for loc in locations} - for l_id, t_name in through_model.objects.filter( - location_id__in=location_ids, - ).values_list("location_id", f"{fk_name}__name"): - tags_by_location[l_id].add(t_name) - return tags_by_location - - # Gather inherited tags per location for the in-sync skip check below. - existing_inherited_by_location: dict[int, set[str]] = _get_tags(Location.inherited_tags) - - # Perform the bulk updates inside a `tag_inheritance.suppress_tag_inheritance()` context. - # While the batch is active, signal handlers in `dojo/tags_signals.py` - # short-circuit per-row inheritance work that would otherwise fire on - # every `(inherited_)tags.set()` and defeat the bulk update. - # - # This replaces a previous `signals.m2m_changed.disconnect(...)` / - # `connect(...)` dance which was process-global and therefore unsafe - # under threaded gunicorn / Celery thread pools / ASGI threadpools: - # while disconnected, every thread in the process lost sticky - # enforcement. Thread-local batch state avoids that hazard. - with tag_inheritance.suppress_tag_inheritance(): - for location in locations: - target_tag_names: set[str] = set() - for pid in product_ids_by_location[location.id]: - # product_ids_by_location may contain products that shouldn't to contribute to tag inheritance (we - # didn't filter either location ref lookups to check), so do a last-minute check here - if pid in products: - target_tag_names |= tags_by_product[pid] - - if target_tag_names == existing_inherited_by_location[location.id]: - # The existing set matches the expected set, so nothing more to do for this location - continue - - # Update tags for this location - _sync_inherited_tags(location, list(target_tag_names)) diff --git a/dojo/product/helpers.py b/dojo/product/helpers.py deleted file mode 100644 index 99c4eed0718..00000000000 --- a/dojo/product/helpers.py +++ /dev/null @@ -1,22 +0,0 @@ -import contextlib -import logging - -from dojo.celery import app -from dojo.models import Product -from dojo.tags.inheritance import ( - _LOCATION_PREFETCH_FOR_INHERITANCE, # noqa: F401 -- backward compat re-export - _inherited_tag_names_for_location, # noqa: F401 -- backward compat re-export - _sync_inheritance_for_qs, # noqa: F401 -- backward compat re-export - apply_inherited_tags_for_endpoints, # noqa: F401 -- backward compat re-export - apply_inherited_tags_for_findings, # noqa: F401 -- backward compat re-export - propagate_tags_on_product_sync, -) - -logger = logging.getLogger(__name__) - - -@app.task -def propagate_tags_on_product(product_id, *args, **kwargs): - with contextlib.suppress(Product.DoesNotExist): - product = Product.objects.get(id=product_id) - propagate_tags_on_product_sync(product) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index 247d833f5e9..157da8aa06d 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -16,11 +16,12 @@ the full rationale). - The per-instance inheritance helpers previously scattered across - ``dojo/tags/signals.py``, ``dojo/models.py``, and ``dojo/product/helpers.py`` + ``dojo/tags/signals.py`` and ``dojo/models.py`` (``_sync_inherited_tags``, ``get_products``, ``inherit_product_tags``, ``get_products_to_inherit_tags_from``, ``inherit_instance_tags``). -- The bulk product-wide inheritance sync (``propagate_tags_on_product_sync``) +- The bulk product-wide inheritance sync (``propagate_tags_on_product_sync``), + its Celery entrypoint (``propagate_tags_on_product``), plus per-batch importer helpers (``apply_inherited_tags_for_findings`` / ``apply_inherited_tags_for_endpoints``) and their shared ``_sync_inheritance_for_qs`` primitive. @@ -30,11 +31,10 @@ """ from __future__ import annotations -import contextlib import logging import threading from collections import defaultdict -from contextlib import contextmanager +from contextlib import contextmanager, suppress from django.conf import settings from django.db.models import Q @@ -45,6 +45,7 @@ # evaluation of ``dojo.models``. By the time anything imports this module # (signals registration, importers, the per-model ``inherit_tags()`` shim # in ``dojo.models``), the full model layer is initialised. +from dojo.celery import app from dojo.location.models import Location from dojo.models import Endpoint, Engagement, Finding, Product, Test from dojo.tags.utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances @@ -56,7 +57,7 @@ def is_suppressed() -> bool: - """Return True when the current thread is inside an active ``batch()``.""" + """Return True when the current thread is inside an active ``suppress_tag_inheritance()``.""" return bool(getattr(_state, "depth", 0)) @@ -83,7 +84,7 @@ def suppress_tag_inheritance(): _state.depth -= 1 if _state.depth <= 0: # Clean up the attribute so leak-free thread reuse stays simple. - with contextlib.suppress(AttributeError): + with suppress(AttributeError): del _state.depth @@ -197,8 +198,7 @@ def inherit_instance_tags(instance, *, force=False): # --------------------------------------------------------------------------- -# Bulk product-wide inheritance (relocated from dojo/product/helpers.py). -# Logic unchanged. +# Bulk product-wide inheritance # --------------------------------------------------------------------------- @@ -226,17 +226,17 @@ def propagate_tags_on_product_sync(product): logger.debug("Propagating tags from %s to all engagements", product) _sync_inheritance_for_qs( Engagement.objects.filter(product=product), - target_names_per_child=lambda _child: inherited_tag_names, + target_tag_names_per_child=lambda _child: inherited_tag_names, ) logger.debug("Propagating tags from %s to all tests", product) _sync_inheritance_for_qs( Test.objects.filter(engagement__product=product), - target_names_per_child=lambda _child: inherited_tag_names, + target_tag_names_per_child=lambda _child: inherited_tag_names, ) logger.debug("Propagating tags from %s to all findings", product) _sync_inheritance_for_qs( Finding.objects.filter(test__engagement__product=product), - target_names_per_child=lambda _child: inherited_tag_names, + target_tag_names_per_child=lambda _child: inherited_tag_names, ) if settings.V3_FEATURE_LOCATIONS: logger.debug("Propagating tags from %s to all locations", product) @@ -248,16 +248,31 @@ def propagate_tags_on_product_sync(product): # is the union of every related product's tags. Compute per-location. _sync_inheritance_for_qs( location_qs, - target_names_per_child=_inherited_tag_names_for_location, + target_tag_names_per_child=_inherited_tag_names_for_location, ) else: logger.debug("Propagating tags from %s to all endpoints", product) _sync_inheritance_for_qs( Endpoint.objects.filter(product=product), - target_names_per_child=lambda _child: inherited_tag_names, + target_tag_names_per_child=lambda _child: inherited_tag_names, ) +@app.task(name="dojo.product.helpers.propagate_tags_on_product") +def propagate_tags_on_product_deprecated(product_id, *args, **kwargs): + # kept to make sure tasks are still processed if someone didn't do a clean shutdown before upgrading + logger.warning("propagate_tags_on_product_deprecated is deprecated and will be removed in a future version. Use propagate_tags_on_product instead.") + propagate_tags_on_product(product_id, *args, **kwargs) + + +@app.task(name="dojo.product.helpers.propagate_tags_on_product") +def propagate_tags_on_product(product_id, *args, **kwargs): + """Load Product by id and run ``propagate_tags_on_product_sync`` (Celery worker).""" + with suppress(Product.DoesNotExist): + product = Product.objects.get(id=product_id) + propagate_tags_on_product_sync(product) + + def apply_inherited_tags_for_endpoints(endpoints): """ Bulk inheritance for a list of Endpoints, e.g. those just created via @@ -275,7 +290,7 @@ def apply_inherited_tags_for_endpoints(endpoints): inherited_tag_names = {tag.name for tag in product.tags.all()} _sync_inheritance_for_qs( Endpoint.objects.filter(id__in=[e.pk for e in endpoints]), - target_names_per_child=lambda _child: inherited_tag_names, + target_tag_names_per_child=lambda _child: inherited_tag_names, ) @@ -305,17 +320,17 @@ def apply_inherited_tags_for_findings(findings): _sync_inheritance_for_qs( Finding.objects.filter(id__in=finding_ids), - target_names_per_child=lambda _child: inherited_tag_names, + target_tag_names_per_child=lambda _child: inherited_tag_names, ) if settings.V3_FEATURE_LOCATIONS: _sync_inheritance_for_qs( Location.objects.filter(findings__finding_id__in=finding_ids).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE), - target_names_per_child=_inherited_tag_names_for_location, + target_tag_names_per_child=_inherited_tag_names_for_location, ) else: _sync_inheritance_for_qs( Endpoint.objects.filter(status_endpoint__finding_id__in=finding_ids).distinct(), - target_names_per_child=lambda _child: inherited_tag_names, + target_tag_names_per_child=lambda _child: inherited_tag_names, ) @@ -327,33 +342,108 @@ def _inherited_tag_names_for_location(location): Product), a Location can be attached to multiple Products — directly via `LocationProductReference` or indirectly via `LocationFindingReference` -> Finding -> Test -> Engagement -> Product. The target inherited set is - therefore the UNION of every related Product's tags. + therefore the UNION of every related Product's tags, restricted to + Products whose own `enable_product_tag_inheritance` flag is on (or where + the system-wide setting is on). - Used as the `target_names_per_child` callback for `_sync_inheritance_for_qs` + Used as the `target_tag_names_per_child` callback for `_sync_inheritance_for_qs` on Location querysets; it must be called per Location because each Location has its own set of related Products. Uses `iter_related_products()` so that an upstream `prefetch_related(...)` reduces per-call cost to 0 queries. """ + system_wide = bool(get_system_setting("enable_product_tag_inheritance")) names: set[str] = set() for related_product in location.iter_related_products(): if related_product is None: continue + if not system_wide and not related_product.enable_product_tag_inheritance: + continue names.update(tag.name for tag in related_product.tags.all()) return names +def apply_inherited_tags_for_locations(locations, *, product): + """ + Per-batch bulk inheritance for Locations touched during an import. + + A Location can be linked to multiple Products via `LocationProductReference` + (direct) or `LocationFindingReference` -> Finding -> Test -> Engagement -> + Product (indirect). Target inherited set is the union of every contributing + Product's tags, filtered by each Product's `enable_product_tag_inheritance` + flag (skipped entirely when the system-wide setting is on). + + Gated on the importing `product`: when neither the system setting nor the + importing product's flag is on, this is a no-op. Tags from other products + propagate via their own `Product.tags.through` m2m_changed handler when + they change, so skipping here is safe. + + Uses values_list-based ref-table lookups (4 small queries) rather than + `prefetch_related(_LOCATION_PREFETCH_FOR_INHERITANCE)` to keep the + importer hot path lean. + """ + locations = list(locations) + if not locations: + return + system_wide = bool(get_system_setting("enable_product_tag_inheritance")) + if not system_wide and not getattr(product, "enable_product_tag_inheritance", False): + return + + from dojo.location.models import ( # noqa: PLC0415 + LocationFindingReference, + LocationProductReference, + ) + + location_ids = [loc.id for loc in locations] + product_ids_by_location: dict[int, set[int]] = {loc.id: set() for loc in locations} + + for loc_id, prod_id in LocationProductReference.objects.filter( + location_id__in=location_ids, + ).values_list("location_id", "product_id"): + product_ids_by_location[loc_id].add(prod_id) + + # LocationFindingReference -> Finding -> Test -> Engagement -> Product. + # Shouldn't add anything new (LocationProductReference is created alongside), + # but covers edge cases where only the finding ref exists. + for loc_id, prod_id in ( + LocationFindingReference.objects + .filter(location_id__in=location_ids) + .values_list("location_id", "finding__test__engagement__product_id") + ): + product_ids_by_location[loc_id].add(prod_id) + + all_product_ids = {pid for pids in product_ids_by_location.values() for pid in pids} + product_qs = Product.objects.filter(id__in=all_product_ids).prefetch_related("tags") + if not system_wide: + product_qs = product_qs.filter(enable_product_tag_inheritance=True) + tags_by_product: dict[int, set[str]] = { + p.id: {t.name for t in p.tags.all()} for p in product_qs + } + + def _target_for_location(loc): + names: set[str] = set() + for pid in product_ids_by_location[loc.id]: + # product_ids_by_location may contain products that shouldn't contribute + # (ref lookups weren't flag-filtered); check membership in tags_by_product. + tags = tags_by_product.get(pid) + if tags: + names |= tags + return names + + _sync_inheritance_for_qs(locations, target_tag_names_per_child=_target_for_location) + + _LOCATION_PREFETCH_FOR_INHERITANCE = ( "products__product__tags", "findings__finding__test__engagement__product__tags", ) -def _sync_inheritance_for_qs(queryset, *, target_names_per_child): +def _sync_inheritance_for_qs(queryset, *, target_tag_names_per_child): """ Sync inherited_tags + tags for every child in `queryset` to its target tag set. - target_names_per_child: callable(child) -> set[str]. + target_tag_names_per_child: callable(child) -> set[str]. Issues bulk SQL: one through-table read for current inherited_tags, then bulk add/remove on `tags` and `inherited_tags` fields. @@ -388,7 +478,7 @@ def _sync_inheritance_for_qs(queryset, *, target_names_per_child): add_map: dict[str, list] = defaultdict(list) remove_map: dict[str, list] = defaultdict(list) for child in children: - target = target_names_per_child(child) + target = target_tag_names_per_child(child) old = old_inherited_by_child.get(child.pk, set()) for name in target - old: add_map[name].append(child) diff --git a/dojo/tags/signals.py b/dojo/tags/signals.py index 6ac7f06d91e..07b895ed3c3 100644 --- a/dojo/tags/signals.py +++ b/dojo/tags/signals.py @@ -7,9 +7,7 @@ from dojo.celery_dispatch import dojo_dispatch_task from dojo.location.models import Location, LocationFindingReference, LocationProductReference from dojo.models import Endpoint, Engagement, Finding, Product, Test -from dojo.product import helpers as async_product_funcs from dojo.tags import inheritance as tag_inheritance -from dojo.tags.inheritance import is_tag_inheritance_enabled logger = logging.getLogger(__name__) @@ -21,8 +19,8 @@ def product_tags_post_add_remove(sender, instance, action, **kwargs): with contextlib.suppress(AttributeError): running_async_process = instance.running_async_process # Check if the async process is already running to avoid calling it a second time - if not running_async_process and is_tag_inheritance_enabled(instance): - dojo_dispatch_task(async_product_funcs.propagate_tags_on_product, instance.id, countdown=5) + if not running_async_process and tag_inheritance.is_tag_inheritance_enabled(instance): + dojo_dispatch_task(tag_inheritance.propagate_tags_on_product, instance.id, countdown=5) instance.running_async_process = True @@ -32,17 +30,7 @@ def product_tags_post_add_remove(sender, instance, action, **kwargs): @receiver(signals.m2m_changed, sender=Finding.tags.through) @receiver(signals.m2m_changed, sender=Location.tags.through) def make_inherited_tags_sticky(sender, instance, action, **kwargs): - """ - Make sure inherited tags are added back in if they are removed. - - Inside a ``tag_inheritance.suppress_tag_inheritance()`` block the caller takes - responsibility for applying inheritance in bulk; per-row signal work - would defeat the purpose. This replaces the old - ``signals.m2m_changed.disconnect(...)`` pattern, which was - process-global and unsafe under threaded workers. - ``inherit_instance_tags`` itself early-returns when suppression is - active. - """ + """Make sure inherited tags are added back in if they are removed.""" if action in {"post_add", "post_remove"}: tag_inheritance.inherit_instance_tags(instance) diff --git a/unittests/test_tag_inheritance.py b/unittests/test_tag_inheritance.py index 2900c1eada8..870fa5a6983 100644 --- a/unittests/test_tag_inheritance.py +++ b/unittests/test_tag_inheritance.py @@ -26,12 +26,12 @@ from dojo.location.models import Location, LocationProductReference from dojo.location.status import ProductLocationStatus from dojo.models import Endpoint, Engagement, Finding, Product, Product_Type, Test, Test_Type -from dojo.product.helpers import propagate_tags_on_product_sync from dojo.tags.inheritance import ( _sync_inherited_tags, # noqa: PLC2701 -- private API tested directly get_products, inherit_instance_tags, is_tag_inheritance_enabled, + propagate_tags_on_product_sync, ) from dojo.tools.locations import LocationData from unittests.dojo_test_case import ( diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index 76e843d5042..dd212ea9f0c 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -23,7 +23,7 @@ from dojo.location.models import Location, LocationFindingReference, LocationProductReference from dojo.models import Endpoint, Engagement, Finding, Product, Product_Type, Test, Test_Type -from dojo.product.helpers import propagate_tags_on_product_sync +from dojo.tags.inheritance import propagate_tags_on_product_sync from unittests.dojo_test_case import ( DojoAPITestCase, DojoTestCase, @@ -398,7 +398,7 @@ class TagInheritanceImportPerfBaselines(DojoAPITestCase): importer creates findings + endpoints/locations, then `_sync_inherited_tags` runs per row. Phase A (bulk product-side propagation + post_save gated on create) doesn't touch this loop because the importer's hot path is - creation-driven. Phase B's `tag_inheritance.batch()` context manager + creation-driven. Phase B's `tag_inheritance.suppress_tag_inheritance()` context manager targets it. Two scenarios: @@ -430,7 +430,7 @@ def test_baseline_zap_scan_import_v2(self): Captures total query count for: scan parse + finding creation + endpoint attachment + per-row inherit_tags signal chain. Production hot path. - Phase A leaves this number ~unchanged; Phase B's `tag_inheritance.batch()` + Phase A leaves this number ~unchanged; Phase B's `tag_inheritance.suppress_tag_inheritance()` targets it. """ with self.assertNumQueries(self.EXPECTED_ZAP_IMPORT_V2): @@ -499,6 +499,6 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # narrower in scope (Location.tags.through only). Net-positive trade for # eliminating the threading bug; full Phase B reductions land in Stage 2. EXPECTED_ZAP_IMPORT_V2 = 422 - EXPECTED_ZAP_IMPORT_V3 = 809 + EXPECTED_ZAP_IMPORT_V3 = 445 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 75 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 101 From 5a715a07c870438e61700a436d588fa18393818f Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 15:29:23 +0200 Subject: [PATCH 11/19] comments --- dojo/tags/inheritance.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index 157da8aa06d..ca21b4cc8e0 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -213,14 +213,9 @@ def propagate_tags_on_product_sync(product): Product's current tags, and applies adds/removes via the bulk tag helpers. Both `tags` and `inherited_tags` fields are kept in sync. """ - # Skip the full child sweep when inheritance is disabled both system-wide - # and on this product. Without this gate the importer hot path pays ~9 - # queries per scan (one product-tags read + one list/through-table read per - # child kind) even when no inheritance work is possible. State transitions - # (toggling the flag on/off) still trigger a full sweep via the m2m_changed - # handler on `Product.tags.through` and the per-product flag save handler. if not (get_system_setting("enable_product_tag_inheritance") or product.enable_product_tag_inheritance): return + inherited_tag_names = {tag.name for tag in product.tags.all()} logger.debug("Propagating tags from %s to all engagements", product) From 854be22ca096d3f9ab894076751a4688d97be73c Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 15:30:24 +0200 Subject: [PATCH 12/19] comments --- dojo/tags/inheritance.py | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index ca21b4cc8e0..f2b067b61e0 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -1,34 +1,3 @@ -""" -Tag inheritance — central coordination module. - -Provides: - -- ``batch_mode()`` — thread-local context manager that suppresses - per-instance inheritance work driven by ``m2m_changed`` and ``post_save`` - signals. While inside a batch, the signal handlers in - ``dojo/tags/signals.py`` early-return; the calling code is responsible for - applying inheritance in bulk (e.g. via the importer's existing - ``_bulk_inherit_tags`` path or ``propagate_tags_on_product_sync``). - - This replaces the previous pattern of ``signals.m2m_changed.disconnect(...)`` - in importer hot loops, which was process-global and unsafe under threaded - gunicorn / Celery thread pools / ASGI threadpools (see PR description for - the full rationale). - -- The per-instance inheritance helpers previously scattered across - ``dojo/tags/signals.py`` and ``dojo/models.py`` - (``_sync_inherited_tags``, ``get_products``, ``inherit_product_tags``, - ``get_products_to_inherit_tags_from``, ``inherit_instance_tags``). - -- The bulk product-wide inheritance sync (``propagate_tags_on_product_sync``), - its Celery entrypoint (``propagate_tags_on_product``), - plus per-batch importer helpers (``apply_inherited_tags_for_findings`` / - ``apply_inherited_tags_for_endpoints``) and their shared ``_sync_inheritance_for_qs`` - primitive. - -Model imports are deferred to function bodies to keep this module loadable -before ``dojo.models`` finishes initialising. -""" from __future__ import annotations import logging @@ -88,12 +57,6 @@ def suppress_tag_inheritance(): del _state.depth -# --------------------------------------------------------------------------- -# Per-instance inheritance helpers (relocated from dojo/models.py + -# dojo/tags/signals.py). Logic unchanged. -# --------------------------------------------------------------------------- - - def _sync_inherited_tags(obj, incoming_inherited_tags): """ Sync ``obj.inherited_tags`` and ``obj.tags`` to match ``incoming_inherited_tags``. From 777001630f3e1dd0ccc4395c261757e8cb6d1ac2 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 15:45:36 +0200 Subject: [PATCH 13/19] refactor(tags): rename inherit_instance_tags -> auto_inherit_product_tags, move to signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames `inherit_instance_tags` to `auto_inherit_product_tags` and relocates it from `dojo.tags.inheritance` to `dojo.tags.signals`. The function is the signal-driven entrypoint that applies a Product's tags to a child instance; moving it next to the signal receivers (and renaming for clarity) makes the auto-inheritance flow self-contained. Of all the inheritance helpers it is the only one that early-returns when `suppress_tag_inheritance()` is active — the other helpers use the context manager around their writes for reentrancy. That gate semantic is exclusive to the signal path, so colocation fits. Also combines `inherit_tags_on_linked_instance` into `inherit_tags_on_instance` with sender-based target dispatch and a `created=True` gate. Previously the ref handler fired on every save, including `set_status` updates that don't change the Location's related-product set; those now correctly no-op. Drops the unused `force=` parameter on the function while renaming. --- dojo/tags/inheritance.py | 20 +--------- dojo/tags/signals.py | 62 ++++++++++++++++++++++++------- unittests/test_tag_inheritance.py | 18 ++++----- 3 files changed, 59 insertions(+), 41 deletions(-) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index f2b067b61e0..bb1d6782ef2 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -69,7 +69,8 @@ def _sync_inherited_tags(obj, incoming_inherited_tags): signal fired by each ``.add()``/``.remove()`` does not dispatch ``make_inherited_tags_sticky`` back into this function. The context manager is reentrant so callers that already opened a batch (e.g. - ``inherit_instance_tags`` or ``LocationManager._bulk_inherit_tags``) + ``auto_inherit_product_tags`` in ``dojo.tags.signals``, or the importer's + bulk path) nest harmlessly. """ target = set(incoming_inherited_tags or []) @@ -143,23 +144,6 @@ def is_tag_inheritance_enabled(instance) -> bool: return bool(get_products_to_inherit_tags_from(instance)) -def inherit_instance_tags(instance, *, force=False): - """ - Apply product-inherited tags to ``instance``. - - Unless ``force=True``, respects ``suppress_tag_inheritance()`` so bulk - callers can defer per-instance work. The underlying ``_sync_inherited_tags`` - diffs the current vs target inherited set and only writes the delta. - """ - if not force and is_suppressed(): - return - products = get_products_to_inherit_tags_from(instance) - if not products: - return - incoming_inherited_tags = [tag.name for product in products for tag in product.tags.all()] - _sync_inherited_tags(instance, incoming_inherited_tags) - - # --------------------------------------------------------------------------- # Bulk product-wide inheritance # --------------------------------------------------------------------------- diff --git a/dojo/tags/signals.py b/dojo/tags/signals.py index 07b895ed3c3..6b8acaec59b 100644 --- a/dojo/tags/signals.py +++ b/dojo/tags/signals.py @@ -8,17 +8,49 @@ from dojo.location.models import Location, LocationFindingReference, LocationProductReference from dojo.models import Endpoint, Engagement, Finding, Product, Test from dojo.tags import inheritance as tag_inheritance +from dojo.tags.inheritance import ( + _sync_inherited_tags, + get_products_to_inherit_tags_from, + is_suppressed, +) logger = logging.getLogger(__name__) +def auto_inherit_product_tags(instance): + """ + Apply product-inherited tags to ``instance`` from the auto-inheritance + signal path. + + Skipped while a ``suppress_tag_inheritance()`` context is active so bulk + callers (e.g. the importer hot loop) can defer per-instance work and run + inheritance once at batch time. The underlying ``_sync_inherited_tags`` + diffs the current vs target inherited set and only writes the delta. + """ + if is_suppressed(): + return + products = get_products_to_inherit_tags_from(instance) + if not products: + return + incoming_inherited_tags = [tag.name for product in products for tag in product.tags.all()] + _sync_inherited_tags(instance, incoming_inherited_tags) + + @receiver(signals.m2m_changed, sender=Product.tags.through) def product_tags_post_add_remove(sender, instance, action, **kwargs): if action in {"post_add", "post_remove"}: + # `running_async_process` is an in-memory dedup flag on the Product + # instance. `tags.set([...])` fires m2m_changed twice on the SAME + # instance — once `post_remove` for dropped tags, once `post_add` for + # added tags — and we only want one `propagate_tags_on_product` task + # per Python-level operation. Not persisted: scope is exactly the + # lifetime of this in-memory instance. Two separate `Product.objects + # .get(id=X).tags.add(...)` calls still dispatch twice; the + # downstream task is idempotent (diff-based sync, no-op when nothing + # changed) so duplicates waste a slot but don't corrupt state. running_async_process = False with contextlib.suppress(AttributeError): running_async_process = instance.running_async_process - # Check if the async process is already running to avoid calling it a second time if not running_async_process and tag_inheritance.is_tag_inheritance_enabled(instance): dojo_dispatch_task(tag_inheritance.propagate_tags_on_product, instance.id, countdown=5) instance.running_async_process = True @@ -32,7 +64,7 @@ def product_tags_post_add_remove(sender, instance, action, **kwargs): def make_inherited_tags_sticky(sender, instance, action, **kwargs): """Make sure inherited tags are added back in if they are removed.""" if action in {"post_add", "post_remove"}: - tag_inheritance.inherit_instance_tags(instance) + auto_inherit_product_tags(instance) @receiver(signals.post_save, sender=Endpoint) @@ -40,18 +72,20 @@ def make_inherited_tags_sticky(sender, instance, action, **kwargs): @receiver(signals.post_save, sender=Test) @receiver(signals.post_save, sender=Finding) @receiver(signals.post_save, sender=Location) +@receiver(signals.post_save, sender=LocationFindingReference) +@receiver(signals.post_save, sender=LocationProductReference) def inherit_tags_on_instance(sender, instance, created, **kwargs): - # Only inherit on creation. The previous behavior fired on every save - # (create OR update), repeatedly re-applying inherited tags to children - # whose tag state had not changed. Sticky enforcement on user-driven - # tag edits is handled by `make_inherited_tags_sticky` (m2m_changed). - # `inherit_instance_tags` itself early-returns when a batch is active. + # Only inherit on creation. Previously fired on every save (create OR + # update), repeatedly re-applying inherited tags to children whose tag + # state had not changed. Sticky enforcement on user-driven tag edits is + # handled by `make_inherited_tags_sticky` (m2m_changed). + # `auto_inherit_product_tags` itself early-returns when suppressed. + # + # For LocationFindingReference / LocationProductReference, the new link + # means the referenced Location may have a different set of related + # Products, so re-sync the Location's inherited tags. Ref status updates + # via `set_status` don't change the related-product set and are skipped. if not created: return - tag_inheritance.inherit_instance_tags(instance) - - -@receiver(signals.post_save, sender=LocationFindingReference) -@receiver(signals.post_save, sender=LocationProductReference) -def inherit_tags_on_linked_instance(sender, instance, created, **kwargs): - tag_inheritance.inherit_instance_tags(instance.location) + target = instance.location if isinstance(instance, LocationFindingReference | LocationProductReference) else instance + auto_inherit_product_tags(target) diff --git a/unittests/test_tag_inheritance.py b/unittests/test_tag_inheritance.py index 870fa5a6983..964094fa8ed 100644 --- a/unittests/test_tag_inheritance.py +++ b/unittests/test_tag_inheritance.py @@ -29,10 +29,10 @@ from dojo.tags.inheritance import ( _sync_inherited_tags, # noqa: PLC2701 -- private API tested directly get_products, - inherit_instance_tags, is_tag_inheritance_enabled, propagate_tags_on_product_sync, ) +from dojo.tags.signals import auto_inherit_product_tags from dojo.tools.locations import LocationData from unittests.dojo_test_case import ( DojoAPITestCase, @@ -217,13 +217,13 @@ def test_sticky_readds_missing_inherited(self): class TestInheritInstanceTagsEarlyExit(unittest.TestCase): - """No-products case: inherit_instance_tags must short-circuit before touching the instance.""" + """No-products case: auto_inherit_product_tags must short-circuit before touching the instance.""" - @patch("dojo.tags.inheritance.get_products_to_inherit_tags_from") + @patch("dojo.tags.signals.get_products_to_inherit_tags_from") def test_no_products_skips_write(self, mock_get): instance = MagicMock() mock_get.return_value = [] - inherit_instance_tags(instance) + auto_inherit_product_tags(instance) instance.inherit_tags.assert_not_called() instance.inherited_tags.add.assert_not_called() instance.tags.add.assert_not_called() @@ -440,7 +440,7 @@ class TestLocationMultipleProductInheritance(DojoTestCase): linked to many products via LocationProductReference. These tests verify that all_related_products() is used correctly and tags are merged from every linked product, and that the per-product flag filters correctly when the system setting is off. - inherit_instance_tags() is called directly rather than relying on signal chaining. + auto_inherit_product_tags() is called directly rather than relying on signal chaining. Skipped when V3_FEATURE_LOCATIONS is disabled. """ @@ -450,7 +450,7 @@ def setUp(self): self.system_settings(enable_product_tag_inheritance=True) def test_location_inherits_from_multiple_products(self): - from dojo.tags.inheritance import inherit_instance_tags # noqa: PLC0415 + from dojo.tags.signals import auto_inherit_product_tags # noqa: PLC0415 p1 = self.create_product("Product A", tags=["p1-tag"]) p2 = self.create_product("Product B", tags=["p2-tag"]) @@ -464,7 +464,7 @@ def test_location_inherits_from_multiple_products(self): location=location, product=p2, status=ProductLocationStatus.Active, ) - inherit_instance_tags(location) + auto_inherit_product_tags(location) location.refresh_from_db() tag_names = sorted(t.name for t in location.tags.all()) @@ -472,7 +472,7 @@ def test_location_inherits_from_multiple_products(self): self.assertIn("p2-tag", tag_names) def test_location_inherits_only_from_flagged_product_when_system_off(self): - from dojo.tags.inheritance import inherit_instance_tags # noqa: PLC0415 + from dojo.tags.signals import auto_inherit_product_tags # noqa: PLC0415 self.system_settings(enable_product_tag_inheritance=False) @@ -490,7 +490,7 @@ def test_location_inherits_only_from_flagged_product_when_system_off(self): location=location, product=p_no, status=ProductLocationStatus.Active, ) - inherit_instance_tags(location) + auto_inherit_product_tags(location) location.refresh_from_db() tag_names = sorted(t.name for t in location.tags.all()) From 62d8887659567f6cbb37bf1a0fccd7aef904da9e Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 15:56:14 +0200 Subject: [PATCH 14/19] make tag accumulator mandatory param --- dojo/importers/default_reimporter.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 222a142cbe0..f9dfa83c315 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -968,7 +968,7 @@ def finding_post_processing( finding_from_report: Finding, *, is_matched_finding: bool = False, - tag_accumulator: list | None = None, + tag_accumulator: list, ) -> Finding: """ Save all associated objects to the finding after it has been saved @@ -990,15 +990,10 @@ def finding_post_processing( finding_from_report.unsaved_tags = merged_tags if finding_from_report.unsaved_tags: cleaned_tags = clean_tags(finding_from_report.unsaved_tags) - if tag_accumulator is not None: - if isinstance(cleaned_tags, list): - tag_accumulator.append((finding, cleaned_tags)) - elif isinstance(cleaned_tags, str): - tag_accumulator.append((finding, [cleaned_tags])) - elif isinstance(cleaned_tags, list): - finding.tags.add(*cleaned_tags) + if isinstance(cleaned_tags, list): + tag_accumulator.append((finding, cleaned_tags)) elif isinstance(cleaned_tags, str): - finding.tags.add(cleaned_tags) + tag_accumulator.append((finding, [cleaned_tags])) # Process any files if finding_from_report.unsaved_files: finding.unsaved_files = finding_from_report.unsaved_files From aa0c1a5dee510e6babaf47447feb8a6cf69f8d7b Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 16:05:08 +0200 Subject: [PATCH 15/19] resolve duplicate task name --- dojo/tags/inheritance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index bb1d6782ef2..13b3e25129d 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -207,7 +207,7 @@ def propagate_tags_on_product_deprecated(product_id, *args, **kwargs): propagate_tags_on_product(product_id, *args, **kwargs) -@app.task(name="dojo.product.helpers.propagate_tags_on_product") +@app.task def propagate_tags_on_product(product_id, *args, **kwargs): """Load Product by id and run ``propagate_tags_on_product_sync`` (Celery worker).""" with suppress(Product.DoesNotExist): From 2994efd42e05e696561bab9c665e63c0b4cfb405 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 16:28:38 +0200 Subject: [PATCH 16/19] perf(tags): pk-based _sync_inheritance_for_ids; skip full-row fetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces `_sync_inheritance_for_qs(queryset, target_tag_names_per_child=callable)` with `_sync_inheritance_for_ids(model_class, child_ids, target_tag_names)`. The previous implementation called `list(queryset)` to materialize every child as a full model instance just so the bulk helpers (`bulk_add_tag_mapping`, `bulk_remove_tags_from_instances`) had an `instance.pk` and `instance.__class__` to work with. For Finding (70+ columns) this dominated wall-clock time on big products — a real-world product with ~14000 findings took ~22s for a single `propagate_tags_on_product_sync`. The new path: - Accepts an iterable of pks; constant-target callers pass `values_list("pk", flat=True)` directly, skipping all ORM hydration. - Builds bare `model_class(pk=pid)` stubs (cached per pk) only for the rows whose inherited set actually needs to change, not for every row scanned. - Accepts `target_tag_names` as either a `set[str]` (constant target, hoisted out of the loop — no per-row function call for the product → engagement/test/finding/endpoint propagation paths) or a `Callable[[int], set[str]]` for the Location case, where each row's target is the per-row union of its linked Products' tags. - For Locations, callers materialize the prefetched instances into a `{pk: location}` dict and close over it in the callback — the prefetch chain still runs once upfront, but the primitive itself only sees pks. - Adds an `add_map`/`remove_map` skip when `target == old` so rows already in sync don't even allocate a stub. Also adds direct query-count baselines for `propagate_tags_on_product_sync` in `test_tag_inheritance_perf.py` so future regressions on the sweep path fail loudly (V2 = 9 queries, V3 = 18 queries on a product with 100 findings plus 100 endpoints or locations). ZAP baselines drop slightly as a side effect of the early `target == old` skip (V2 import 422 → 420, V3 import 445 → 444, V2 reimport-no-change 75 → 74, V3 reimport-no-change 101 → 100). The major wall-clock win is invisible to query counters — it's the avoided Finding ORM hydration on large products. --- dojo/tags/inheritance.py | 189 ++++++++++++++++++------- unittests/test_tag_inheritance_perf.py | 54 ++++++- 2 files changed, 187 insertions(+), 56 deletions(-) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index 13b3e25129d..adbb3407e6c 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -4,6 +4,12 @@ import threading from collections import defaultdict from contextlib import contextmanager, suppress +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable + + from django.db.models import Model from django.conf import settings from django.db.models import Q @@ -166,37 +172,47 @@ def propagate_tags_on_product_sync(product): inherited_tag_names = {tag.name for tag in product.tags.all()} logger.debug("Propagating tags from %s to all engagements", product) - _sync_inheritance_for_qs( - Engagement.objects.filter(product=product), - target_tag_names_per_child=lambda _child: inherited_tag_names, + _sync_inheritance_for_ids( + Engagement, + Engagement.objects.filter(product=product).values_list("pk", flat=True), + target_tag_names=inherited_tag_names, ) logger.debug("Propagating tags from %s to all tests", product) - _sync_inheritance_for_qs( - Test.objects.filter(engagement__product=product), - target_tag_names_per_child=lambda _child: inherited_tag_names, + _sync_inheritance_for_ids( + Test, + Test.objects.filter(engagement__product=product).values_list("pk", flat=True), + target_tag_names=inherited_tag_names, ) logger.debug("Propagating tags from %s to all findings", product) - _sync_inheritance_for_qs( - Finding.objects.filter(test__engagement__product=product), - target_tag_names_per_child=lambda _child: inherited_tag_names, + _sync_inheritance_for_ids( + Finding, + Finding.objects.filter(test__engagement__product=product).values_list("pk", flat=True), + target_tag_names=inherited_tag_names, ) if settings.V3_FEATURE_LOCATIONS: logger.debug("Propagating tags from %s to all locations", product) - location_qs = Location.objects.filter( - Q(products__product=product) - | Q(findings__finding__test__engagement__product=product), - ).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE) # Locations can be linked to multiple products, so the inherited target - # is the union of every related product's tags. Compute per-location. - _sync_inheritance_for_qs( - location_qs, - target_tag_names_per_child=_inherited_tag_names_for_location, + # is the union of every related product's tags. Materialize the full + # Locations (with the related-product prefetch chain) into a pk-keyed + # dict so the per-pk callback can look up each Location's instance. + locations_by_pk = { + loc.pk: loc + for loc in Location.objects.filter( + Q(products__product=product) + | Q(findings__finding__test__engagement__product=product), + ).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE) + } + _sync_inheritance_for_ids( + Location, + locations_by_pk.keys(), + target_tag_names=lambda pk: _inherited_tag_names_for_location(locations_by_pk[pk]), ) else: logger.debug("Propagating tags from %s to all endpoints", product) - _sync_inheritance_for_qs( - Endpoint.objects.filter(product=product), - target_tag_names_per_child=lambda _child: inherited_tag_names, + _sync_inheritance_for_ids( + Endpoint, + Endpoint.objects.filter(product=product).values_list("pk", flat=True), + target_tag_names=inherited_tag_names, ) @@ -230,9 +246,10 @@ def apply_inherited_tags_for_endpoints(endpoints): if not (get_system_setting("enable_product_tag_inheritance") or product.enable_product_tag_inheritance): return inherited_tag_names = {tag.name for tag in product.tags.all()} - _sync_inheritance_for_qs( - Endpoint.objects.filter(id__in=[e.pk for e in endpoints]), - target_tag_names_per_child=lambda _child: inherited_tag_names, + _sync_inheritance_for_ids( + Endpoint, + [e.pk for e in endpoints], + target_tag_names=inherited_tag_names, ) @@ -260,19 +277,28 @@ def apply_inherited_tags_for_findings(findings): inherited_tag_names = {tag.name for tag in product.tags.all()} finding_ids = [f.pk for f in findings] - _sync_inheritance_for_qs( - Finding.objects.filter(id__in=finding_ids), - target_tag_names_per_child=lambda _child: inherited_tag_names, + _sync_inheritance_for_ids( + Finding, + finding_ids, + target_tag_names=inherited_tag_names, ) if settings.V3_FEATURE_LOCATIONS: - _sync_inheritance_for_qs( - Location.objects.filter(findings__finding_id__in=finding_ids).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE), - target_tag_names_per_child=_inherited_tag_names_for_location, + locations_by_pk = { + loc.pk: loc + for loc in Location.objects.filter( + findings__finding_id__in=finding_ids, + ).distinct().prefetch_related(*_LOCATION_PREFETCH_FOR_INHERITANCE) + } + _sync_inheritance_for_ids( + Location, + locations_by_pk.keys(), + target_tag_names=lambda pk: _inherited_tag_names_for_location(locations_by_pk[pk]), ) else: - _sync_inheritance_for_qs( - Endpoint.objects.filter(status_endpoint__finding_id__in=finding_ids).distinct(), - target_tag_names_per_child=lambda _child: inherited_tag_names, + _sync_inheritance_for_ids( + Endpoint, + Endpoint.objects.filter(status_endpoint__finding_id__in=finding_ids).distinct().values_list("pk", flat=True), + target_tag_names=inherited_tag_names, ) @@ -288,7 +314,7 @@ def _inherited_tag_names_for_location(location): Products whose own `enable_product_tag_inheritance` flag is on (or where the system-wide setting is on). - Used as the `target_tag_names_per_child` callback for `_sync_inheritance_for_qs` + Used as the `target_tag_names` callback for `_sync_inheritance_for_ids` on Location querysets; it must be called per Location because each Location has its own set of related Products. Uses `iter_related_products()` so that an upstream `prefetch_related(...)` reduces per-call cost to 0 @@ -362,9 +388,9 @@ def apply_inherited_tags_for_locations(locations, *, product): p.id: {t.name for t in p.tags.all()} for p in product_qs } - def _target_for_location(loc): + def _target_for_location_pk(pk): names: set[str] = set() - for pid in product_ids_by_location[loc.id]: + for pid in product_ids_by_location[pk]: # product_ids_by_location may contain products that shouldn't contribute # (ref lookups weren't flag-filtered); check membership in tags_by_product. tags = tags_by_product.get(pid) @@ -372,7 +398,11 @@ def _target_for_location(loc): names |= tags return names - _sync_inheritance_for_qs(locations, target_tag_names_per_child=_target_for_location) + _sync_inheritance_for_ids( + Location, + [loc.id for loc in locations], + target_tag_names=_target_for_location_pk, + ) _LOCATION_PREFETCH_FOR_INHERITANCE = ( @@ -381,20 +411,61 @@ def _target_for_location(loc): ) -def _sync_inheritance_for_qs(queryset, *, target_tag_names_per_child): +def _sync_inheritance_for_ids( + model_class: type[Model], + child_ids: Iterable[int], + *, + target_tag_names: set[str] | Callable[[int], set[str]], +) -> None: """ - Sync inherited_tags + tags for every child in `queryset` to its target tag set. - - target_tag_names_per_child: callable(child) -> set[str]. + Sync ``inherited_tags`` and ``tags`` for every child pk to its target tag set. + + Parameters + ---------- + model_class + The child model class (``Finding``, ``Engagement``, ``Endpoint``, + ``Location``, …). Used to resolve the ``inherited_tags`` field's + through-table and to build minimal pk-only stubs for the bulk helpers. + child_ids + Iterable of primary keys for ``model_class``. Pass a ``values_list("pk", + flat=True)`` queryset directly to avoid materializing model instances + — fetching full rows for 14000 findings was the bottleneck (~22s per + product-tag toggle) that motivated the pk-based design. + target_tag_names + The desired inherited-tag-name set for each child, in one of two forms: + + - ``set[str]`` — **constant target**. All children share the same + inherited set (Product → Engagement/Test/Finding/Endpoint + propagation, where every child has the same one parent product). + The value is hoisted out of the per-pk loop so there is no per-row + function-call overhead. + - ``Callable[[int], set[str]]`` — **per-pk target**. Looks up the + target set for each pk. Used for ``Location``, which can be linked + to multiple Products via ``LocationProductReference`` / + ``LocationFindingReference``, so the inherited set is the per-row + union of every linked Product's tags. Callers typically build a + ``{pk: location}`` dict with the relevant ``prefetch_related`` chain + and close over it inside the callback. + + Implementation notes + -------------------- + + + Avoids materializing children as full model instances. The previous + ``list(queryset)`` path fetched all 70+ columns per Finding row, which + dominated wall-clock time on large products. ``bulk_add_tag_mapping`` / + ``bulk_remove_tags_from_instances`` only ever read ``instance.pk`` and + ``instance.__class__``, so a bare ``model_class(pk=pid)`` stub is enough. + + Issues bulk SQL: one through-table read for the current ``inherited_tags`` + rows, then one INSERT per added tag-name and one DELETE per removed + tag-name (each batched if needed by the helpers). - Issues bulk SQL: one through-table read for current inherited_tags, then - bulk add/remove on `tags` and `inherited_tags` fields. """ - children = list(queryset) - if not children: + child_ids = list(child_ids) + if not child_ids: return - model_class = type(children[0]) inherited_field = model_class._meta.get_field("inherited_tags") inherited_through = inherited_field.remote_field.through inherited_tag_model = inherited_field.related_model @@ -406,7 +477,6 @@ def _sync_inheritance_for_qs(queryset, *, target_tag_names_per_child): source_field_name = field.name break - child_ids = [c.pk for c in children] # One query: pull every (child_id, tag_name) pair from the inherited_tags through table. existing_pairs = inherited_through.objects.filter( **{f"{source_field_name}__in": child_ids}, @@ -416,16 +486,31 @@ def _sync_inheritance_for_qs(queryset, *, target_tag_names_per_child): for child_id, tag_name in existing_pairs: old_inherited_by_child[child_id].add(tag_name) - # Compute per-child diff and bucket by tag name. + # Per-pk stub instances reused across tag buckets (bulk helpers only read + # .pk and .__class__). + stubs: dict[int, object] = {} + + def _stub(pk): + s = stubs.get(pk) + if s is None: + s = model_class(pk=pk) + stubs[pk] = s + return s + + constant_target: set[str] | None = None if callable(target_tag_names) else target_tag_names + + # Compute per-pk diff and bucket by tag name. add_map: dict[str, list] = defaultdict(list) remove_map: dict[str, list] = defaultdict(list) - for child in children: - target = target_tag_names_per_child(child) - old = old_inherited_by_child.get(child.pk, set()) + for pk in child_ids: + target = constant_target if constant_target is not None else target_tag_names(pk) + old = old_inherited_by_child.get(pk, set()) + if target == old: + continue for name in target - old: - add_map[name].append(child) + add_map[name].append(_stub(pk)) for name in old - target: - remove_map[name].append(child) + remove_map[name].append(_stub(pk)) # Apply adds. Both `tags` and `inherited_tags` get the same set of new # inherited names — `_sync_inherited_tags` did the same. diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index dd212ea9f0c..f292c53df8d 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -214,6 +214,24 @@ def _do_finding_add_user_tag(self, name: str, expected: int) -> None: self.assertIn("user-only", finding_tag_names) self.assertIn("inherited", finding_tag_names) # still sticky + def _do_propagate_sync_only(self, name: str, expected: int, *, with_endpoints: bool, with_locations: bool) -> None: + """ + Measure `propagate_tags_on_product_sync(product)` in isolation — no tag change. + + Captures the raw sweep cost for a product with a realistic mix of children: + N findings + (V2) N endpoints or (V3) N locations. Should be roughly idempotent + (no add/remove to apply) so the number reflects diff-detection overhead. + """ + product = _make_product_with_findings(name, n_findings=100, tags=["t1", "t2"]) + if with_endpoints: + _make_endpoints(product, n=100) + if with_locations: + _make_locations(product, n=100) + with self.assertNumQueries(expected): + propagate_tags_on_product_sync(product) + finding = Finding.objects.filter(test__engagement__product=product).first() + self.assertEqual({"t1", "t2"}, {t.name for t in finding.tags.all()}) + def _do_finding_remove_inherited(self, name: str, expected: int) -> None: product = _make_product_with_findings(name, n_findings=1, tags=["inherited"]) finding = Finding.objects.filter(test__engagement__product=product).first() @@ -282,6 +300,29 @@ def test_baseline_finding_remove_inherited_tag_sticky_re_adds_v2(self): def test_baseline_finding_remove_inherited_tag_sticky_re_adds_v3(self): self._do_finding_remove_inherited("perf-sticky-rm-v3", self.EXPECTED_FINDING_REMOVE_INHERITED_V3) + # ------------------------------------------------------------------ + # propagate_tags_on_product_sync direct invocation (no tag change). + # Measures the raw sweep cost over a product's children. + # ------------------------------------------------------------------ + + @override_settings(V3_FEATURE_LOCATIONS=False) + def test_baseline_propagate_tags_on_product_sync_v2(self): + self._do_propagate_sync_only( + "perf-sync-v2", + self.EXPECTED_PROPAGATE_SYNC_V2, + with_endpoints=True, + with_locations=False, + ) + + @override_settings(V3_FEATURE_LOCATIONS=True) + def test_baseline_propagate_tags_on_product_sync_v3(self): + self._do_propagate_sync_only( + "perf-sync-v3", + self.EXPECTED_PROPAGATE_SYNC_V3, + with_endpoints=False, + with_locations=True, + ) + # ------------------------------------------------------------------ # V2: propagation to Endpoints (skipped under V3_FEATURE_LOCATIONS) # ------------------------------------------------------------------ @@ -382,6 +423,11 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 125 EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 75 + # propagate_tags_on_product_sync direct invocation (no tag change). + # Product with 100 findings + 100 endpoints (V2) or + 100 locations (V3). + EXPECTED_PROPAGATE_SYNC_V2 = 9 + EXPECTED_PROPAGATE_SYNC_V3 = 18 + @override_settings( CELERY_TASK_ALWAYS_EAGER=True, @@ -498,7 +544,7 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # import path because the previous process-global signal-disconnect was # narrower in scope (Location.tags.through only). Net-positive trade for # eliminating the threading bug; full Phase B reductions land in Stage 2. - EXPECTED_ZAP_IMPORT_V2 = 422 - EXPECTED_ZAP_IMPORT_V3 = 445 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 75 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 101 + EXPECTED_ZAP_IMPORT_V2 = 420 + EXPECTED_ZAP_IMPORT_V3 = 444 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 74 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 100 From bf0912b2bd0a3ed0a0676fa588e6621f9d7c3f07 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 16:31:26 +0200 Subject: [PATCH 17/19] rename --- dojo/tags/inheritance.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dojo/tags/inheritance.py b/dojo/tags/inheritance.py index adbb3407e6c..206298e6282 100644 --- a/dojo/tags/inheritance.py +++ b/dojo/tags/inheritance.py @@ -380,6 +380,7 @@ def apply_inherited_tags_for_locations(locations, *, product): ): product_ids_by_location[loc_id].add(prod_id) + # prefetch all tags for all linked products up front so we can pass a callable to the sync function all_product_ids = {pid for pids in product_ids_by_location.values() for pid in pids} product_qs = Product.objects.filter(id__in=all_product_ids).prefetch_related("tags") if not system_wide: @@ -388,7 +389,7 @@ def apply_inherited_tags_for_locations(locations, *, product): p.id: {t.name for t in p.tags.all()} for p in product_qs } - def _target_for_location_pk(pk): + def _target_tag_names_for_location_pk(pk): names: set[str] = set() for pid in product_ids_by_location[pk]: # product_ids_by_location may contain products that shouldn't contribute @@ -401,7 +402,7 @@ def _target_for_location_pk(pk): _sync_inheritance_for_ids( Location, [loc.id for loc in locations], - target_tag_names=_target_for_location_pk, + target_tag_names=_target_tag_names_for_location_pk, ) From b571e357f27fac314b69ee44e82e550a65d728a3 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 16:43:37 +0200 Subject: [PATCH 18/19] perf(tags): skip redundant inheritance on no-change reimport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two gates eliminate ~14 wasted queries on a reimport that creates no new findings or location refs (the common "scheduled rescan, nothing changed" path): 1. `DefaultReImporter.process_findings` now tracks newly-created findings in `new_findings_in_batch` (populated in the else branch of the matched/unmatched dispatch) and passes ONLY those to `apply_inherited_tags_for_findings`. Matched/existing findings already had inheritance applied at their original creation, so re-running the through-table read + Location prefetch chain on them is pure overhead. 2. `LocationManager._persist_locations` now skips `apply_inherited_tags_for_locations` when no new `LocationProductReference` rows were created. New `LocationFindingReference`s only add findings within `self._product`, so they can't change a Location's Product membership; only a new product ref can. When `all_product_refs` is empty, the Location's inherited target set is unchanged and the helper would do a costly no-op read for nothing. Net effect on the pinned ZAP reimport-no-change baselines: - V2: 75 → 69 (matches the pre-Phase-A baseline of 69) - V3: 101 → 81 (beats the pre-Phase-A baseline of 87) --- dojo/importers/default_reimporter.py | 17 ++++++++++++++--- dojo/importers/location_manager.py | 17 ++++++++++++----- unittests/test_tag_inheritance_perf.py | 4 ++-- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index f9dfa83c315..9940cb337b8 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -317,6 +317,11 @@ def _process_findings_internal( batch_finding_ids: list[int] = [] batch_findings: list[Finding] = [] + # Findings that were newly created (else branch below) — pass these to + # `apply_inherited_tags_for_findings` instead of `batch_findings` so + # matched/existing findings (which already have correct inherited tags) + # don't trigger a redundant through-table read on no-change reimports. + new_findings_in_batch: list[Finding] = [] findings_with_parser_tags: list[tuple] = [] # Batch size for deduplication/post-processing (only new findings) dedupe_batch_max_size = getattr(settings, "IMPORT_REIMPORT_DEDUPE_BATCH_SIZE", 1000) @@ -399,6 +404,8 @@ def _process_findings_internal( candidates_by_uid, candidates_by_key, ) + if finding: + new_findings_in_batch.append(finding) # This condition __appears__ to always be true, but am afraid to remove it if finding: @@ -437,10 +444,14 @@ def _process_findings_internal( findings_with_parser_tags.clear() # Apply import-time tags before post-processing so rules/deduplication see them. self.apply_import_tags_for_batch(batch_findings) - # Apply inherited Product tags to this batch's findings (and - # their endpoints/locations) BEFORE post_process_findings_batch + # Apply inherited Product tags to NEWLY CREATED findings only + # (and their endpoints/locations) BEFORE post_process_findings_batch # dispatches, so rules/dedup see inherited tags on .tags. - apply_inherited_tags_for_findings(batch_findings) + # Matched/existing findings already have inheritance applied from + # their original creation; re-running it on no-change reimports + # would be ~8 wasted queries per batch. + apply_inherited_tags_for_findings(new_findings_in_batch) + new_findings_in_batch.clear() batch_findings.clear() finding_ids_batch = list(batch_finding_ids) batch_finding_ids.clear() diff --git a/dojo/importers/location_manager.py b/dojo/importers/location_manager.py index 81b1a1a495d..a2e08e5272e 100644 --- a/dojo/importers/location_manager.py +++ b/dojo/importers/location_manager.py @@ -210,11 +210,18 @@ def _persist_locations(self) -> None: all_product_refs, batch_size=1000, ignore_conflicts=True, ) - # Trigger bulk tag inheritance - tag_inheritance.apply_inherited_tags_for_locations( - [loc.location for loc in saved], - product=self._product, - ) + # Trigger bulk tag inheritance only when the Location's product + # membership actually changed. New product refs are the only thing + # that can add a Product to a Location's inherited-tags target set + # (new finding refs are always to findings in `self._product`, so + # they don't introduce a new Product); skipping when `all_product_refs` + # is empty avoids the through-table read on no-change reimports. + if all_product_refs: + new_ref_location_ids = {ref.location_id for ref in all_product_refs} + tag_inheritance.apply_inherited_tags_for_locations( + [loc.location for loc in saved if loc.location_id in new_ref_location_ids], + product=self._product, + ) # Clear accumulators self._locations_by_finding.clear() diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index f292c53df8d..2caa8f6b4c3 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -546,5 +546,5 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # eliminating the threading bug; full Phase B reductions land in Stage 2. EXPECTED_ZAP_IMPORT_V2 = 420 EXPECTED_ZAP_IMPORT_V3 = 444 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 74 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 100 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 81 From 3de44417756f699ca6640b2c43874be80a095fa3 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 15 May 2026 16:47:17 +0200 Subject: [PATCH 19/19] test(tags): add reimport-with-new-findings perf baseline Imports the 10-finding ZAP subset first, then reimports the 19-finding full report so 9 findings are created during reimport while 10 are matched. Pins V2 = 169 queries, V3 = 198 queries. Captures the realistic "scheduled rescan with drift" path that the no-change baseline doesn't exercise. --- unittests/test_tag_inheritance_perf.py | 43 ++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index 2caa8f6b4c3..9cbf221c038 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -468,6 +468,11 @@ def setUp(self): self.product = self.create_product("Tag Perf Import Product", tags=["inherit", "these"]) self.engagement = self.create_engagement("Tag Perf Import Engagement", self.product) self.scan_path = get_unit_tests_scans_path("zap") / "dvwa_baseline_dojo.xml" + # Subset of the full report (10 findings vs 19) used to exercise the + # reimport-with-new-findings code path: initial import uses the subset, + # then reimport uses the full report so 9 findings get created during + # reimport while 10 match existing ones. + self.scan_path_subset = get_unit_tests_scans_path("zap") / "dvwa_baseline_dojo_subset.xml" @override_settings(V3_FEATURE_LOCATIONS=False) def test_baseline_zap_scan_import_v2(self): @@ -534,6 +539,42 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): finding = Finding.objects.filter(test_id=test_id).first() self.assertEqual({"inherit", "these"}, {t.name for t in finding.tags.all()}) + @override_settings(V3_FEATURE_LOCATIONS=False) + def test_baseline_zap_scan_reimport_with_new_findings_v2(self): + """ + V2: import 10-finding subset, then reimport 19-finding full report. + + 9 findings are NEW (must run inheritance), 10 are matched (skip). + Exercises the realistic "scheduled rescan with drift" path where a + reimport actually creates findings. + """ + response = self.import_scan_with_params( + self.scan_path_subset, + engagement=self.engagement.id, + ) + test_id = response["test"] + + with self.assertNumQueries(self.EXPECTED_ZAP_REIMPORT_WITH_NEW_V2): + self.reimport_scan_with_params(test_id, str(self.scan_path)) + + finding = Finding.objects.filter(test_id=test_id).first() + self.assertEqual({"inherit", "these"}, {t.name for t in finding.tags.all()}) + + @override_settings(V3_FEATURE_LOCATIONS=True) + def test_baseline_zap_scan_reimport_with_new_findings_v3(self): + """V3: same as V2 but Location-backed.""" + response = self.import_scan_with_params( + self.scan_path_subset, + engagement=self.engagement.id, + ) + test_id = response["test"] + + with self.assertNumQueries(self.EXPECTED_ZAP_REIMPORT_WITH_NEW_V3): + self.reimport_scan_with_params(test_id, str(self.scan_path)) + + finding = Finding.objects.filter(test_id=test_id).first() + self.assertEqual({"inherit", "these"}, {t.name for t in finding.tags.all()}) + # Pinned baselines per mode. Each test forces its own V3_FEATURE_LOCATIONS # via @override_settings so all four import paths run in a single suite # invocation regardless of the ambient `DD_V3_FEATURE_LOCATIONS` env var. @@ -548,3 +589,5 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): EXPECTED_ZAP_IMPORT_V3 = 444 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 81 + EXPECTED_ZAP_REIMPORT_WITH_NEW_V2 = 169 + EXPECTED_ZAP_REIMPORT_WITH_NEW_V3 = 198