diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py index 41b6da63f..c059ea927 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py @@ -46,16 +46,33 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: continue is_system = field.is_system_field type_name = field.sql_type.name if field.sql_type else "unknown" + # A relationship is either a declared foreign key or a Relationship-typed + # field; use the same condition to tag it and to extract its target, so + # the two never disagree. + is_relationship = ( + field.is_foreign_key + or getattr(field, "field_display_type", None) == "Relationship" + ) + ref_entity_table: str | None = None + ref_field_name: str | None = None + if is_relationship: + ref_entity = getattr(field, "reference_entity", None) + ref_entity_table = getattr(ref_entity, "name", None) + ref_field = getattr(field, "reference_field", None) + ref_definition = getattr(ref_field, "definition", None) + ref_field_name = getattr(ref_definition, "name", None) fs = FieldSchema( name=field.name, display_name=field.display_name, type=type_name, description=field.description, - is_foreign_key=field.is_foreign_key, + is_foreign_key=is_relationship, is_required=field.is_required, is_unique=field.is_unique, nullable=not field.is_required, is_system_field=is_system, + ref_entity_table=ref_entity_table, + ref_field_name=ref_field_name, ) field_schemas.append(fs) @@ -178,6 +195,8 @@ def format_sql_context(ctx: SQLContext) -> str: lines.append("## All available Data Fabric Entities") lines.append("") + entity_tables = {ec.entity_schema.entity_name for ec in ctx.entity_contexts} + for entity_ctx in ctx.entity_contexts: entity = entity_ctx.entity_schema lines.append( @@ -194,6 +213,39 @@ def format_sql_context(ctx: SQLContext) -> str: lines.append("") + # Relationship fields store the related record's Id; spell out the join + # so the model doesn't compare the FK column to a human-readable value. + # Only surface relationships whose target entity is in this set (and thus + # queryable) — a dangling reference would produce an unusable join. + relationships = [ + field + for field in entity.fields + if field.is_relationship and field.ref_entity_table in entity_tables + ] + if relationships: + lines.append(f"**Relationships for {entity.entity_name}:**") + lines.append( + f"_Join on the related entity's Id. Use LEFT JOIN to keep all {entity.entity_name} " + "rows (relationship may be unset); INNER JOIN when the related record must exist or " + "you filter on it. Project the specific related column you need — not `*`._" + ) + lines.append("") + for field in relationships: + join = ( + f"LEFT JOIN {field.ref_entity_table} " + f"ON {field.ref_entity_table}.{field.ref_join_key} = {entity.entity_name}.{field.name}" + ) + repr_hint = ( + f", representative field `{field.ref_entity_table}.{field.ref_field_name}`" + if field.ref_field_name + else "" + ) + lines.append( + f"- `{entity.entity_name}.{field.name}` → `{field.ref_entity_table}` " + f"(`{join}`{repr_hint})" + ) + lines.append("") + lines.append(f"**Query Patterns for {entity.entity_name}:**") lines.append("") lines.append("| User Intent | SQL Pattern |") diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompts.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompts.py index 6c1226dad..4dc078881 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompts.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompts.py @@ -157,10 +157,15 @@ ### 2. Multi-Entity Joins (≤4 adapters) - INNER JOIN chains via entity model (up to 4 tables) +- Equi-joins only (ON left.col = right.col) +- LEFT JOIN is allowed ONLY for relationship (foreign-key) joins on the related + entity's Id (see "Relationship fields" guidance) — use it for optional + relationships to keep parent rows - Shared intermediates **Examples:** - SELECT o.id, c.name FROM Order o INNER JOIN Customer c ON o.customer_id = c.id +- SELECT o.id, a.Name FROM Order o LEFT JOIN Account a ON a.Id = o.account -- relationship join, keeps orders with no account - Fields spanning 3-4 adapters with proper INNER JOIN chains ### 3. Predicate Distribution & Pushdown @@ -253,7 +258,7 @@ - Common Table Expressions (WITH/CTE) - Window functions (ROW_NUMBER, RANK, PARTITION BY) - Self-joins -- LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN (only INNER JOIN supported) +- RIGHT JOIN, FULL OUTER JOIN (general joins must be INNER; LEFT JOIN only for relationship/foreign-key joins on Id) - CROSS JOIN **Examples:** @@ -275,7 +280,7 @@ ### 4. ADVANCED_JOINS - More than 4 tables in JOIN chain -- LEFT JOIN +- LEFT JOIN for non-relationship joins (LEFT JOIN is allowed ONLY to join a relationship/foreign-key field to its related entity on Id) - RIGHT JOIN - FULL OUTER JOIN - CROSS JOIN @@ -283,9 +288,9 @@ - Non-equi joins (theta joins) **Examples:** -- SELECT * FROM t1 RIGHT JOIN t2 -- ❌ -- SELECT * FROM t1, t2 -- ❌ (implicit CROSS JOIN) -- SELECT * FROM Employee e1 JOIN Employee e2 ON e1.manager_id = e2.id -- ❌ (self-join) +- SELECT c.id FROM t1 c RIGHT JOIN t2 d ON d.id = c.fk -- ❌ +- SELECT c.id FROM t1 c, t2 d -- ❌ (implicit CROSS JOIN) +- SELECT e1.id FROM Employee e1 JOIN Employee e2 ON e1.manager_id = e2.id -- ❌ (self-join) ### 5. UNSUPPORTED_FUNCTIONS - Date/time manipulation functions (DATE_ADD, DATE_SUB, DATEDIFF) @@ -336,7 +341,7 @@ 1. **ALWAYS use explicit column names** - Never use SELECT * 2. **Use COUNT(column_name)** - Never use COUNT(*) -3. **Only INNER JOIN** - No LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN, or CROSS JOIN +3. **INNER JOIN by default; LEFT JOIN only for relationships** - General joins must be INNER JOIN (equi-join). LEFT JOIN is permitted ONLY to join a relationship (foreign-key) field to its related entity on Id — use it for optional relationships to keep parent rows, INNER JOIN when the related row must exist. No RIGHT JOIN, FULL OUTER JOIN, CROSS JOIN, or self-joins 4. **Maximum 4 tables** - No more than 4 tables in a JOIN chain 5. **No subqueries** - No subqueries in any clause 6. **No CTEs** - No WITH clauses diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/models.py b/src/uipath_langchain/agent/tools/datafabric_tool/models.py index a68334a2b..fbd0cbd59 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/models.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/models.py @@ -18,6 +18,12 @@ class FieldSchema(BaseModel): is_unique: bool = False nullable: bool = True is_system_field: bool = False + # For relationship (foreign-key) fields: the related entity's SQL table and + # the column to join on. The field itself stores the related record's Id, so + # the join is always ``related. = .``. + ref_entity_table: str | None = None + ref_join_key: str = "Id" + ref_field_name: str | None = None @property def display_type(self) -> str: @@ -25,12 +31,19 @@ def display_type(self) -> str: modifiers = [] if self.is_required: modifiers.append("required") + if self.is_foreign_key: + modifiers.append("fk") if self.is_system_field: modifiers.append("system") if modifiers: return f"{self.type}, {', '.join(modifiers)}" return self.type + @property + def is_relationship(self) -> bool: + """True when this field references another entity that can be joined.""" + return self.is_foreign_key and self.ref_entity_table is not None + @property def is_numeric(self) -> bool: return self.type.lower() in NUMERIC_TYPES diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py b/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py index e3e1e1c8e..c0ccd5afb 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py @@ -133,6 +133,34 @@ BAD: SELECT name FROM t1 WHERE id IN (SELECT fk FROM t2 WHERE x = 1) GOOD: SELECT t1.name FROM t1 INNER JOIN t2 ON t1.id = t2.fk WHERE t2.x = 1 +RELATIONSHIP FIELDS (foreign keys): +- A relationship field (marked ``fk`` in the schema) stores the RELATED \ +record's Id (a GUID) — not its name, label, or any other attribute. Comparing \ +such a column to a human-readable value (e.g. ``WHERE Account = 'Acme'``) will \ +never match. +- To filter on, or return, the related entity's attributes, JOIN the related \ +entity on its Id and project the specific column(s) you need: + SELECT parent., related. FROM parent LEFT JOIN related ON related.Id = parent. + then put your filter/selection on ``related``'s columns \ +(e.g. ``WHERE related.Name = 'Acme'``). The exact join and the related entity's \ +representative field are listed under "Relationships for " in the entity \ +schemas above. +- Choose the join type by intent (the schema tags a relationship field \ +``required`` or not): + - LEFT JOIN when the relationship is optional (not ``required``) and you want \ +every parent row, including those where it is unset (the related columns come \ +back NULL). Use this when the question is about the parent entity and only \ +enriches it with related data. + - INNER JOIN when the relationship is marked ``required`` (the related record \ +always exists, so no parent rows are dropped), when the related record must \ +otherwise exist, or when you filter on the related entity's columns \ +(e.g. "orders whose account region is APAC"). +- If you only need the related record's identifier itself, select the \ +relationship field directly — no JOIN. +- Only equi-joins on the related entity's Id are supported \ +(``JOIN related ON related.Id = parent.``); the related entity's \ +schema is one of the entities listed above. + ERROR RECOVERY (structured error taxonomy): If ``execute_sql`` returns an ``error`` field, classify it and apply the \ targeted fix: diff --git a/tests/agent/tools/test_datafabric_prompt_builder.py b/tests/agent/tools/test_datafabric_prompt_builder.py index 560e049d1..7a6e90749 100644 --- a/tests/agent/tools/test_datafabric_prompt_builder.py +++ b/tests/agent/tools/test_datafabric_prompt_builder.py @@ -19,21 +19,36 @@ def _fake_field(**overrides): is_unique=False, is_hidden_field=False, is_system_field=False, + field_display_type=None, + reference_entity=None, + reference_field=None, ) defaults.update(overrides) return SimpleNamespace(**defaults) -def _fake_entity(*fields, **overrides): - return SimpleNamespace( +def _fake_fk_field(name="account", ref_table="Account", ref_field="Id", **overrides): + return _fake_field( + name=name, + display_name=name.title(), + description=f"Reference to {ref_table}", + is_foreign_key=True, + field_display_type="Relationship", + reference_entity=SimpleNamespace(name=ref_table), + reference_field=SimpleNamespace(definition=SimpleNamespace(name=ref_field)), + **overrides, + ) + + +def _fake_entity(*fields, name="Ticket", **overrides): + defaults = dict( id="entity-1", - name="Ticket", display_name="Ticket", description="Support tickets", record_count=10, - fields=list(fields), - **overrides, ) + defaults.update(overrides) + return SimpleNamespace(name=name, fields=list(fields), **defaults) def test_build_renders_ecp_aware_prompt_strategy(): @@ -60,6 +75,83 @@ def test_build_includes_domain_guidance_in_rendered_prompt(): assert "Use business-friendly ticket language." in prompt +def test_relationship_field_renders_join_when_target_entity_present(): + order = _fake_entity( + _fake_field(), + _fake_fk_field(ref_field="Name"), + name="Order", + display_name="Order", + ) + account = _fake_entity( + _fake_field(name="Name"), name="Account", display_name="Account" + ) + + prompt = build([order, account]) + + # The FK column is tagged; the join is spelled out against the target Id as a + # LEFT JOIN (keeps parent rows), and the representative field is surfaced. + assert "| account | varchar, fk |" in prompt + assert "**Relationships for Order:**" in prompt + assert "LEFT JOIN Account ON Account.Id = Order.account" in prompt + assert "representative field `Account.Name`" in prompt + + +def test_relationship_detected_via_display_type_without_is_foreign_key(): + # A Relationship-typed field with is_foreign_key unset must still be tagged + # fk and rendered in the Relationships section. + relationship_field = _fake_field( + name="account", + display_name="Account", + field_display_type="Relationship", + reference_entity=SimpleNamespace(name="Account"), + reference_field=SimpleNamespace(definition=SimpleNamespace(name="Name")), + ) + order = _fake_entity(relationship_field, name="Order", display_name="Order") + account = _fake_entity( + _fake_field(name="Name"), name="Account", display_name="Account" + ) + + prompt = build([order, account]) + + assert "| account | varchar, fk |" in prompt + assert "LEFT JOIN Account ON Account.Id = Order.account" in prompt + + +def test_v1_prompt_documents_left_vs_inner_join_intent(): + prompt = build([_fake_entity(_fake_field())]) + + # The relationship guidance explains when to use LEFT vs INNER. + assert "LEFT JOIN" in prompt + assert "INNER JOIN" in prompt + + +def test_relationship_subsection_absent_when_no_foreign_keys(): + prompt = build([_fake_entity(_fake_field())]) + + # The rendered per-entity header (distinct from the static prompt guidance + # that mentions "Relationships for
") must not appear. + assert "**Relationships for Ticket:**" not in prompt + + +def test_relationship_omitted_when_target_entity_not_in_set(): + # Order references Account, but Account is not part of the entity set, so a + # join would be unusable — the relationship line must be suppressed. + order = _fake_entity( + _fake_field(), _fake_fk_field(), name="Order", display_name="Order" + ) + + prompt = build([order]) + + assert "**Relationships for Order:**" not in prompt + assert "INNER JOIN Account" not in prompt + + +def test_v1_prompt_documents_relationship_fields(): + prompt = build([_fake_entity(_fake_field())]) + + assert "RELATIONSHIP FIELDS" in prompt + + def _system_field(name, type_name="datetimeoffset", **overrides): """A fake auto-added system/audit field (Id, CreateTime, ...).""" return _fake_field(