diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 24d6b17aa..2c57e2fa5 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -11,6 +11,7 @@ import traceback from typing import TYPE_CHECKING, Any, Generator +from .condition import Not from .errors import DataJointError, LostConnectionError from .expression import AndList, QueryExpression @@ -401,7 +402,12 @@ def _populate_direct( """ from tqdm import tqdm - keys = (self._jobs_to_do(restrictions) - self.proj()).keys() + # Disable semantic_check on the antijoin: when self has FK-inherited + # PK attributes, self.proj() may carry attribute lineages that don't + # match key_source's (same attribute, different source-table tag). + # The set-difference itself doesn't care about lineage — we just want + # rows in key_source that aren't yet in self. + keys = self._jobs_to_do(restrictions).restrict(Not(self.proj()), semantic_check=False).keys() logger.debug("Found %d keys to populate" % len(keys)) @@ -702,7 +708,8 @@ def progress(self, *restrictions: Any, display: bool = False) -> tuple[int, int] if not common_attrs: # No common attributes - fall back to two-query method total = len(todo) - remaining = len(todo - self.proj()) + # Same lineage caveat as in _populate_direct — disable semantic_check. + remaining = len(todo.restrict(Not(self.proj()), semantic_check=False)) else: # Build a single query that computes both total and remaining # Using LEFT JOIN with COUNT(DISTINCT) to handle 1:many relationships diff --git a/tests/integration/test_autopopulate.py b/tests/integration/test_autopopulate.py index 02ba69d6b..e3913e120 100644 --- a/tests/integration/test_autopopulate.py +++ b/tests/integration/test_autopopulate.py @@ -236,6 +236,64 @@ def make(self, key): test_schema.drop(prompt=False) +def test_populate_antijoin_fk_inherited_pk(prefix, connection_test): + """Regression test: populate antijoin on a table whose PK is fully FK-inherited. + + Reproduces the lineage-mismatch failure that hits ``Imported`` or + ``Computed`` tables whose primary key consists entirely of attributes + inherited via a foreign key, with no own-table PK attributes. + + Without the ``semantic_check=False`` on the populate antijoin, the + subtraction ``key_source - self.proj()`` raises:: + + DataJointError: Cannot join on attribute 'spec_id': different lineages + (schema.spec.spec_id vs None). Use .proj() to rename one of the attributes. + + The set-difference doesn't actually need lineage matching — it just + asks which key_source rows aren't yet in ``self``. + """ + test_schema = dj.Schema(f"{prefix}_antijoin_fk_pk", connection=connection_test) + + @test_schema + class Spec(dj.Manual): + definition = """ + spec_id : int32 + --- + label : varchar(30) + """ + + @test_schema + class Item(dj.Imported): + definition = """ + -> Spec + --- + payload : varchar(60) + """ + + def make(self, key): + label = (Spec & key).fetch1("label") + self.insert1(dict(key, payload=f"made:{label}")) + + try: + Spec.insert([(1, "alpha"), (2, "beta"), (3, "gamma")]) + + # Before the fix this raised DataJointError on the antijoin. + Item.populate(max_calls=2) + assert len(Item) == 2 + + remaining, total = Item.progress() + assert total == 3 + assert remaining == 1 + + Item.populate() + assert len(Item) == 3 + remaining, total = Item.progress() + assert remaining == 0 + assert total == 3 + finally: + test_schema.drop(prompt=False) + + def test_load_dependencies(prefix, connection_test): schema = dj.Schema(f"{prefix}_load_dependencies_populate", connection=connection_test)