Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/datajoint/autopopulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import traceback
from typing import TYPE_CHECKING, Any, Generator

from .condition import Not
from .errors import DataJointError, LostConnectionError
from .expression import AndList, QueryExpression

Expand Down Expand Up @@ -401,7 +402,12 @@ def _populate_direct(
"""
from tqdm import tqdm

keys = (self._jobs_to_do(restrictions) - self.proj()).keys()
# Disable semantic_check on the antijoin: when self has FK-inherited
# PK attributes, self.proj() may carry attribute lineages that don't
# match key_source's (same attribute, different source-table tag).
# The set-difference itself doesn't care about lineage — we just want
# rows in key_source that aren't yet in self.
keys = self._jobs_to_do(restrictions).restrict(Not(self.proj()), semantic_check=False).keys()

logger.debug("Found %d keys to populate" % len(keys))

Expand Down Expand Up @@ -702,7 +708,8 @@ def progress(self, *restrictions: Any, display: bool = False) -> tuple[int, int]
if not common_attrs:
# No common attributes - fall back to two-query method
total = len(todo)
remaining = len(todo - self.proj())
# Same lineage caveat as in _populate_direct — disable semantic_check.
remaining = len(todo.restrict(Not(self.proj()), semantic_check=False))
else:
# Build a single query that computes both total and remaining
# Using LEFT JOIN with COUNT(DISTINCT) to handle 1:many relationships
Expand Down
58 changes: 58 additions & 0 deletions tests/integration/test_autopopulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,64 @@ def make(self, key):
test_schema.drop(prompt=False)


def test_populate_antijoin_fk_inherited_pk(prefix, connection_test):
"""Regression test: populate antijoin on a table whose PK is fully FK-inherited.

Reproduces the lineage-mismatch failure that hits ``Imported`` or
``Computed`` tables whose primary key consists entirely of attributes
inherited via a foreign key, with no own-table PK attributes.

Without the ``semantic_check=False`` on the populate antijoin, the
subtraction ``key_source - self.proj()`` raises::

DataJointError: Cannot join on attribute 'spec_id': different lineages
(schema.spec.spec_id vs None). Use .proj() to rename one of the attributes.

The set-difference doesn't actually need lineage matching — it just
asks which key_source rows aren't yet in ``self``.
"""
test_schema = dj.Schema(f"{prefix}_antijoin_fk_pk", connection=connection_test)

@test_schema
class Spec(dj.Manual):
definition = """
spec_id : int32
---
label : varchar(30)
"""

@test_schema
class Item(dj.Imported):
definition = """
-> Spec
---
payload : varchar(60)
"""

def make(self, key):
label = (Spec & key).fetch1("label")
self.insert1(dict(key, payload=f"made:{label}"))

try:
Spec.insert([(1, "alpha"), (2, "beta"), (3, "gamma")])

# Before the fix this raised DataJointError on the antijoin.
Item.populate(max_calls=2)
assert len(Item) == 2

remaining, total = Item.progress()
assert total == 3
assert remaining == 1

Item.populate()
assert len(Item) == 3
remaining, total = Item.progress()
assert remaining == 0
assert total == 3
finally:
test_schema.drop(prompt=False)


def test_load_dependencies(prefix, connection_test):
schema = dj.Schema(f"{prefix}_load_dependencies_populate", connection=connection_test)

Expand Down
Loading