Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8513855
perf(tags): batch_mode + per-batch bulk inheritance during import (Ph…
valentijnscholten May 14, 2026
a15790e
Merge remote-tracking branch 'upstream/dev' into perf/tag-inheritance…
valentijnscholten May 15, 2026
455871b
refactor(tags): centralize inheritance helpers in dojo/tag_inheritance
valentijnscholten May 15, 2026
34a32fa
refactor(tags): group tag modules under dojo/tags/
valentijnscholten May 15, 2026
46667c6
rename batch_mode to suppressed
valentijnscholten May 15, 2026
99494c9
perf(tags): check cached system_setting before per-product flag
valentijnscholten May 15, 2026
039c65c
refactor(tags): simplify is_tag_inheritance_enabled, drop linked-inst…
valentijnscholten May 15, 2026
184e011
refactor(tags): merge propagate_inheritance into inherit_instance_tags
valentijnscholten May 15, 2026
fc357eb
refactor(tags): diff-based _sync_inherited_tags, drop per-model wrappers
valentijnscholten May 15, 2026
7936ffd
add comments
valentijnscholten May 15, 2026
ad932de
refactor(tags): consolidate location inheritance, drop dojo/product/h…
valentijnscholten May 15, 2026
5a715a0
comments
valentijnscholten May 15, 2026
854be22
comments
valentijnscholten May 15, 2026
7770016
refactor(tags): rename inherit_instance_tags -> auto_inherit_product_…
valentijnscholten May 15, 2026
62d8887
make tag accumulator mandatory param
valentijnscholten May 15, 2026
aa0c1a5
resolve duplicate task name
valentijnscholten May 15, 2026
2994efd
perf(tags): pk-based _sync_inheritance_for_ids; skip full-row fetch
valentijnscholten May 15, 2026
bf0912b
rename
valentijnscholten May 15, 2026
b571e35
perf(tags): skip redundant inheritance on no-change reimport
valentijnscholten May 15, 2026
3de4441
test(tags): add reimport-with-new-findings perf baseline
valentijnscholten May 15, 2026
39e3257
Merge remote-tracking branch 'upstream/dev' into perf/tag-inheritance…
valentijnscholten May 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dojo/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def ready(self):
import dojo.product_type.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady
import dojo.risk_acceptance.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady
import dojo.sla_config.helpers # noqa: PLC0415, F401 raised: AppRegistryNotReady
import dojo.tags_signals # noqa: PLC0415, F401 raised: AppRegistryNotReady
import dojo.tags.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady
import dojo.test.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady
import dojo.tool_product.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady
import dojo.url.signals # noqa: PLC0415, F401 raised: AppRegistryNotReady
Expand Down
2 changes: 1 addition & 1 deletion dojo/finding/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ def bulk_clear_finding_m2m(finding_qs):
FileUpload.delete() fires and removes files from disk storage.
Tags are handled via bulk_remove_all_tags to maintain tag counts.
"""
from dojo.tag_utils import bulk_remove_all_tags # noqa: PLC0415 circular import
from dojo.tags.utils import bulk_remove_all_tags # noqa: PLC0415 circular import

finding_ids = finding_qs.values_list("id", flat=True)

Expand Down
2 changes: 1 addition & 1 deletion dojo/finding/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
User,
)
from dojo.notifications.helper import create_notification
from dojo.tag_utils import bulk_add_tags_to_instances
from dojo.tags.utils import bulk_add_tags_to_instances
from dojo.test.queries import get_authorized_tests
from dojo.tools import tool_issue_updater
from dojo.utils import (
Expand Down
2 changes: 1 addition & 1 deletion dojo/importers/base_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
Test_Type,
)
from dojo.notifications.helper import create_notification
from dojo.tag_utils import bulk_add_tags_to_instances
from dojo.tags.utils import bulk_add_tags_to_instances
from dojo.tools.factory import get_parser
from dojo.tools.parser_test import ParserTest
from dojo.utils import max_safe
Expand Down
21 changes: 20 additions & 1 deletion dojo/importers/default_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
Test_Import,
)
from dojo.notifications.helper import async_create_notification
from dojo.tag_utils import bulk_apply_parser_tags
from dojo.tags import inheritance as tag_inheritance
from dojo.tags.inheritance import apply_inherited_tags_for_findings
from dojo.tags.utils import bulk_apply_parser_tags
from dojo.utils import get_full_url, perform_product_grading
from dojo.validators import clean_tags

Expand Down Expand Up @@ -161,6 +163,19 @@ def process_findings(
self,
parsed_findings: list[Finding],
**kwargs: dict,
) -> list[Finding]:
# Whole hot loop runs under `batch_mode()`: per-row inheritance signals
# for the findings/endpoints/locations created below are suppressed.
# Inheritance is then applied in bulk per-batch (right before
# `post_process_findings_batch` dispatch) so rules/dedup see inherited
# tags on `finding.tags`.
with tag_inheritance.suppress_tag_inheritance():
return self._process_findings_internal(parsed_findings, **kwargs)

def _process_findings_internal(
self,
parsed_findings: list[Finding],
**kwargs: dict,
) -> list[Finding]:
# Batched post-processing (no chord): dispatch a task per 1000 findings or on final finding
batch_finding_ids: list[int] = []
Expand Down Expand Up @@ -266,6 +281,10 @@ def process_findings(
findings_with_parser_tags.clear()
# Apply import-time tags before post-processing so rules/deduplication see them.
self.apply_import_tags_for_batch(batch_findings)
# Apply inherited Product tags to this batch's findings (and
# their endpoints/locations) BEFORE post_process_findings_batch
# dispatches, so rules/dedup see inherited tags on .tags.
apply_inherited_tags_for_findings(batch_findings)
batch_findings.clear()
finding_ids_batch = list(batch_finding_ids)
batch_finding_ids.clear()
Expand Down
45 changes: 35 additions & 10 deletions dojo/importers/default_reimporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
Test,
Test_Import,
)
from dojo.tag_utils import bulk_apply_parser_tags
from dojo.tags import inheritance as tag_inheritance
from dojo.tags.inheritance import apply_inherited_tags_for_findings
from dojo.tags.utils import bulk_apply_parser_tags
from dojo.utils import perform_product_grading
from dojo.validators import clean_tags

Expand Down Expand Up @@ -263,6 +265,19 @@ def process_findings(
the finding may be appended to a new or existing group based upon user selection
at import time
"""
# Whole hot loop runs under `batch_mode()`: per-row inheritance signals
# for the findings/endpoints/locations created below are suppressed.
# Inheritance is then applied in bulk per-batch (right before
# `post_process_findings_batch` dispatch) so rules/dedup see inherited
# tags on `finding.tags`.
with tag_inheritance.suppress_tag_inheritance():
return self._process_findings_internal(parsed_findings, **kwargs)

def _process_findings_internal(
self,
parsed_findings: list[Finding],
**kwargs: dict,
) -> tuple[list[Finding], list[Finding], list[Finding], list[Finding]]:
self.deduplication_algorithm = self.determine_deduplication_algorithm()
# Only process findings with the same service value (or None)
# Even though the service values is used in the hash_code calculation,
Expand Down Expand Up @@ -302,6 +317,11 @@ def process_findings(

batch_finding_ids: list[int] = []
batch_findings: list[Finding] = []
# Findings that were newly created (else branch below) — pass these to
# `apply_inherited_tags_for_findings` instead of `batch_findings` so
# matched/existing findings (which already have correct inherited tags)
# don't trigger a redundant through-table read on no-change reimports.
new_findings_in_batch: list[Finding] = []
findings_with_parser_tags: list[tuple] = []
# Batch size for deduplication/post-processing (only new findings)
dedupe_batch_max_size = getattr(settings, "IMPORT_REIMPORT_DEDUPE_BATCH_SIZE", 1000)
Expand Down Expand Up @@ -384,6 +404,8 @@ def process_findings(
candidates_by_uid,
candidates_by_key,
)
if finding:
new_findings_in_batch.append(finding)

# This condition __appears__ to always be true, but am afraid to remove it
if finding:
Expand Down Expand Up @@ -422,6 +444,14 @@ def process_findings(
findings_with_parser_tags.clear()
# Apply import-time tags before post-processing so rules/deduplication see them.
self.apply_import_tags_for_batch(batch_findings)
# Apply inherited Product tags to NEWLY CREATED findings only
# (and their endpoints/locations) BEFORE post_process_findings_batch
# dispatches, so rules/dedup see inherited tags on .tags.
# Matched/existing findings already have inheritance applied from
# their original creation; re-running it on no-change reimports
# would be ~8 wasted queries per batch.
apply_inherited_tags_for_findings(new_findings_in_batch)
new_findings_in_batch.clear()
batch_findings.clear()
finding_ids_batch = list(batch_finding_ids)
batch_finding_ids.clear()
Expand Down Expand Up @@ -949,7 +979,7 @@ def finding_post_processing(
finding_from_report: Finding,
*,
is_matched_finding: bool = False,
tag_accumulator: list | None = None,
tag_accumulator: list,
) -> Finding:
"""
Save all associated objects to the finding after it has been saved
Expand All @@ -971,15 +1001,10 @@ def finding_post_processing(
finding_from_report.unsaved_tags = merged_tags
if finding_from_report.unsaved_tags:
cleaned_tags = clean_tags(finding_from_report.unsaved_tags)
if tag_accumulator is not None:
if isinstance(cleaned_tags, list):
tag_accumulator.append((finding, cleaned_tags))
elif isinstance(cleaned_tags, str):
tag_accumulator.append((finding, [cleaned_tags]))
elif isinstance(cleaned_tags, list):
finding.tags.add(*cleaned_tags)
if isinstance(cleaned_tags, list):
tag_accumulator.append((finding, cleaned_tags))
elif isinstance(cleaned_tags, str):
finding.tags.add(cleaned_tags)
tag_accumulator.append((finding, [cleaned_tags]))
# Process any files
if finding_from_report.unsaved_files:
finding.unsaved_files = finding_from_report.unsaved_files
Expand Down
16 changes: 11 additions & 5 deletions dojo/importers/endpoint_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
Finding,
Product,
)
from dojo.tags_signals import inherit_instance_tags
from dojo.tags.inheritance import apply_inherited_tags_for_endpoints

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -231,10 +231,16 @@ def get_or_create_endpoints(self) -> tuple[dict[EndpointUniqueKey, Endpoint], li
if to_create:
created = Endpoint.objects.bulk_create(to_create, batch_size=1000)
endpoints_by_key.update(zip(to_create_keys, created, strict=True))
# bulk_create bypasses post_save signals, so manually trigger tag inheritance
# this is not ideal, but we need to take a separate look at the tag inheritance feature itself later
for ep in created:
inherit_instance_tags(ep)
# bulk_create bypasses post_save so per-row inheritance signals never
# fire here. The importer hot path already covers these endpoints via
# the per-batch `apply_inherited_tags_for_findings` sweep (it picks
# them up through `Endpoint.status_finding.finding`), so this call is
# redundant for the importer. We keep a bulk call anyway as a defensive
# measure: if anything outside the importer ever bulk-creates endpoints
# through this manager, they still receive their inherited Product tags
# instead of silently missing them. The bulk helper costs ~2 queries
# when there's nothing to apply, vs N per-row signal fires.
apply_inherited_tags_for_endpoints(created)

self._endpoints_to_create.clear()
return endpoints_by_key, created
Expand Down
124 changes: 14 additions & 110 deletions dojo/importers/location_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,15 @@
from django.db import transaction
from django.utils import timezone

from dojo import tag_inheritance
from dojo.importers.base_location_manager import BaseLocationManager
from dojo.location.models import AbstractLocation, Location, LocationFindingReference, LocationProductReference
from dojo.location.status import FindingLocationStatus, ProductLocationStatus
from dojo.models import Product, _manage_inherited_tags
from dojo.tags import inheritance as tag_inheritance
from dojo.tools.locations import LocationData
from dojo.url.models import URL
from dojo.utils import get_system_setting

if TYPE_CHECKING:
from tagulous.models import TagField

from dojo.models import Dojo_User, Finding
from dojo.models import Dojo_User, Finding, Product

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -214,8 +210,18 @@ def _persist_locations(self) -> None:
all_product_refs, batch_size=1000, ignore_conflicts=True,
)

# Trigger bulk tag inheritance
self._bulk_inherit_tags(loc.location for loc in saved)
# Trigger bulk tag inheritance only when the Location's product
# membership actually changed. New product refs are the only thing
# that can add a Product to a Location's inherited-tags target set
# (new finding refs are always to findings in `self._product`, so
# they don't introduce a new Product); skipping when `all_product_refs`
# is empty avoids the through-table read on no-change reimports.
if all_product_refs:
new_ref_location_ids = {ref.location_id for ref in all_product_refs}
tag_inheritance.apply_inherited_tags_for_locations(
[loc.location for loc in saved if loc.location_id in new_ref_location_ids],
product=self._product,
)

# Clear accumulators
self._locations_by_finding.clear()
Expand Down Expand Up @@ -477,105 +483,3 @@ def type_id(x: tuple[int, AbstractLocation]) -> int:
# Restore the original input ordering
saved.sort(key=itemgetter(0))
return [loc for _, loc in saved]

# ------------------------------------------------------------------
# Tag inheritance
# ------------------------------------------------------------------

def _bulk_inherit_tags(self, locations):
"""
Bulk equivalent of calling inherit_instance_tags(loc) for many Locations. Actually persisting updates is handled
by a per-location call to _manage_inherited_tags(), but at least determining what the tags are is more efficient
(plus we can skip locations that don't need an update at all).

When tag inheritance is enabled, computes the target inherited tags for each location from all related products
and updates only locations that are out of sync.
"""
locations = list(locations)
if not locations:
return

# Check whether tag inheritance is enabled at either the product level or system-wide; quit early if neither
product_inherit = getattr(self._product, "enable_product_tag_inheritance", False)
system_wide_inherit = bool(get_system_setting("enable_product_tag_inheritance"))
if not system_wide_inherit and not product_inherit:
return

# A location can be shared across multiple products. Its inherited tags should be the union of
# tags from ALL contributing products, not just the one running this import.
location_ids = [loc.id for loc in locations]
product_ids_by_location: dict[int, set[int]] = {loc.id: set() for loc in locations}

# Find associations through LocationProductReference entries
for loc_id, prod_id in LocationProductReference.objects.filter(
location_id__in=location_ids,
).values_list("location_id", "product_id"):
product_ids_by_location[loc_id].add(prod_id)

# Find associations through LocationFindingReference entries and the finding.test.engagement.product chain.
# This shouldn't add anything new, but just in case.
for loc_id, prod_id in (
LocationFindingReference.objects
.filter(location_id__in=location_ids)
.values_list("location_id", "finding__test__engagement__product_id")
):
product_ids_by_location[loc_id].add(prod_id)

# Fetch all products that will contribute to tag inheritance, and their tags
all_product_ids = {pid for pids in product_ids_by_location.values() for pid in pids}
product_qs = Product.objects.filter(id__in=all_product_ids).prefetch_related("tags")
if not system_wide_inherit:
# Product-level inheritance only
product_qs = product_qs.filter(enable_product_tag_inheritance=True)
# Materialize into a dict for ease of use
products: dict[int, Product] = {p.id: p for p in product_qs}
# Get distinct tags, per-product
tags_by_product: dict[int, set[str]] = {
pid: {t.name for t in p.tags.all()}
for pid, p in products.items()
}

# Helper method for getting all tags from the given TagField
def _get_tags(tags_field: TagField) -> dict[int, set[str]]:
through_model = tags_field.through
fk_name = tags_field.field.m2m_reverse_field_name()
tags_by_location: dict[int, set[str]] = {loc.id: set() for loc in locations}
for l_id, t_name in through_model.objects.filter(
location_id__in=location_ids,
).values_list("location_id", f"{fk_name}__name"):
tags_by_location[l_id].add(t_name)
return tags_by_location

# Gather inherited and 'regular' tags per location
existing_inherited_by_location: dict[int, set[str]] = _get_tags(Location.inherited_tags)
existing_tags_by_location: dict[int, set[str]] = _get_tags(Location.tags)

# Perform the bulk updates inside a `tag_inheritance.batch()` context.
# While the batch is active, signal handlers in `dojo/tags_signals.py`
# short-circuit per-row inheritance work that would otherwise fire on
# every `(inherited_)tags.set()` and defeat the bulk update.
#
# This replaces a previous `signals.m2m_changed.disconnect(...)` /
# `connect(...)` dance which was process-global and therefore unsafe
# under threaded gunicorn / Celery thread pools / ASGI threadpools:
# while disconnected, every thread in the process lost sticky
# enforcement. Thread-local batch state avoids that hazard.
with tag_inheritance.batch_mode():
for location in locations:
target_tag_names: set[str] = set()
for pid in product_ids_by_location[location.id]:
# product_ids_by_location may contain products that shouldn't to contribute to tag inheritance (we
# didn't filter either location ref lookups to check), so do a last-minute check here
if pid in products:
target_tag_names |= tags_by_product[pid]

if target_tag_names == existing_inherited_by_location[location.id]:
# The existing set matches the expected set, so nothing more to do for this location
continue

# Update tags for this location
_manage_inherited_tags(
location,
list(target_tag_names),
potentially_existing_tags=existing_tags_by_location[location.id],
)
Loading
Loading