From 7515c083ea71b15148fd2ac949e34b8cf2270cb3 Mon Sep 17 00:00:00 2001 From: milind-jain-uipath Date: Thu, 2 Jul 2026 02:42:55 +0530 Subject: [PATCH 1/4] feat(datafabric-tool): ground relationship-field joins in text-to-SQL prompt Surface entity relationship (foreign-key) fields to the Data Fabric SQL sub-graph so it can join related entities. A relationship field stores the related record's Id; the prompt now instructs the model to join on related.Id = parent. and project explicit related columns. - FieldSchema carries the related entity's SQL table, join key (Id), and reference field; foreign-key fields are tagged "fk"; add is_relationship. - build_entity_context populates these from the SDK field metadata (reference_entity, reference_field, field_display_type). - The rendered schema adds a per-entity "Relationships" subsection with the join expression, gated to related entities present in the set. - The v1 prompt adds a RELATIONSHIP FIELDS section: LEFT JOIN for optional relationships, INNER JOIN when the related record must exist or is filtered. - SQL_CONSTRAINTS permits LEFT JOIN only for relationship/foreign-key joins on Id; general joins remain INNER-only. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../datafabric_prompt_builder.py | 45 +++++++++++ .../datafabric_tool/datafabric_prompts.py | 17 ++-- .../agent/tools/datafabric_tool/models.py | 13 ++++ .../agent/tools/datafabric_tool/prompts/v1.py | 25 ++++++ .../tools/test_datafabric_prompt_builder.py | 78 +++++++++++++++++-- 5 files changed, 164 insertions(+), 14 deletions(-) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py index 8154caf5e..ffb8e6524 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py @@ -36,6 +36,14 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: if field.is_hidden_field or field.is_system_field: continue type_name = field.sql_type.name if field.sql_type else "unknown" + ref_entity_table: str | None = None + ref_field_name: str | None = None + if field.is_foreign_key or field.field_display_type == "Relationship": + ref_entity = getattr(field, "reference_entity", None) + ref_entity_table = getattr(ref_entity, "name", None) + ref_field = getattr(field, "reference_field", None) + ref_definition = getattr(ref_field, "definition", None) + ref_field_name = getattr(ref_definition, "name", None) fs = FieldSchema( name=field.name, display_name=field.display_name, @@ -45,6 +53,8 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: is_required=field.is_required, is_unique=field.is_unique, nullable=not field.is_required, + ref_entity_table=ref_entity_table, + ref_field_name=ref_field_name, ) field_schemas.append(fs) @@ -164,6 +174,8 @@ def format_sql_context(ctx: SQLContext) -> str: lines.append("## All available Data Fabric Entities") lines.append("") + entity_tables = {ec.entity_schema.entity_name for ec in ctx.entity_contexts} + for entity_ctx in ctx.entity_contexts: entity = entity_ctx.entity_schema lines.append( @@ -180,6 +192,39 @@ def format_sql_context(ctx: SQLContext) -> str: lines.append("") + # Relationship fields store the related record's Id; spell out the join + # so the model doesn't compare the FK column to a human-readable value. + # Only surface relationships whose target entity is in this set (and thus + # queryable) — a dangling reference would produce an unusable join. + relationships = [ + field + for field in entity.fields + if field.is_relationship and field.ref_entity_table in entity_tables + ] + if relationships: + lines.append(f"**Relationships for {entity.entity_name}:**") + lines.append( + f"_Join on the related entity's Id. Use LEFT JOIN to keep all {entity.entity_name} " + "rows (relationship may be unset); INNER JOIN when the related record must exist or " + "you filter on it. Project the specific related column you need — not `*`._" + ) + lines.append("") + for field in relationships: + join = ( + f"LEFT JOIN {field.ref_entity_table} " + f"ON {field.ref_entity_table}.{field.ref_join_key} = {entity.entity_name}.{field.name}" + ) + repr_hint = ( + f", representative field `{field.ref_entity_table}.{field.ref_field_name}`" + if field.ref_field_name + else "" + ) + lines.append( + f"- `{entity.entity_name}.{field.name}` → `{field.ref_entity_table}` " + f"(`{join}`{repr_hint})" + ) + lines.append("") + lines.append(f"**Query Patterns for {entity.entity_name}:**") lines.append("") lines.append("| User Intent | SQL Pattern |") diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompts.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompts.py index 6c1226dad..4dc078881 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompts.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompts.py @@ -157,10 +157,15 @@ ### 2. Multi-Entity Joins (≤4 adapters) - INNER JOIN chains via entity model (up to 4 tables) +- Equi-joins only (ON left.col = right.col) +- LEFT JOIN is allowed ONLY for relationship (foreign-key) joins on the related + entity's Id (see "Relationship fields" guidance) — use it for optional + relationships to keep parent rows - Shared intermediates **Examples:** - SELECT o.id, c.name FROM Order o INNER JOIN Customer c ON o.customer_id = c.id +- SELECT o.id, a.Name FROM Order o LEFT JOIN Account a ON a.Id = o.account -- relationship join, keeps orders with no account - Fields spanning 3-4 adapters with proper INNER JOIN chains ### 3. Predicate Distribution & Pushdown @@ -253,7 +258,7 @@ - Common Table Expressions (WITH/CTE) - Window functions (ROW_NUMBER, RANK, PARTITION BY) - Self-joins -- LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN (only INNER JOIN supported) +- RIGHT JOIN, FULL OUTER JOIN (general joins must be INNER; LEFT JOIN only for relationship/foreign-key joins on Id) - CROSS JOIN **Examples:** @@ -275,7 +280,7 @@ ### 4. ADVANCED_JOINS - More than 4 tables in JOIN chain -- LEFT JOIN +- LEFT JOIN for non-relationship joins (LEFT JOIN is allowed ONLY to join a relationship/foreign-key field to its related entity on Id) - RIGHT JOIN - FULL OUTER JOIN - CROSS JOIN @@ -283,9 +288,9 @@ - Non-equi joins (theta joins) **Examples:** -- SELECT * FROM t1 RIGHT JOIN t2 -- ❌ -- SELECT * FROM t1, t2 -- ❌ (implicit CROSS JOIN) -- SELECT * FROM Employee e1 JOIN Employee e2 ON e1.manager_id = e2.id -- ❌ (self-join) +- SELECT c.id FROM t1 c RIGHT JOIN t2 d ON d.id = c.fk -- ❌ +- SELECT c.id FROM t1 c, t2 d -- ❌ (implicit CROSS JOIN) +- SELECT e1.id FROM Employee e1 JOIN Employee e2 ON e1.manager_id = e2.id -- ❌ (self-join) ### 5. UNSUPPORTED_FUNCTIONS - Date/time manipulation functions (DATE_ADD, DATE_SUB, DATEDIFF) @@ -336,7 +341,7 @@ 1. **ALWAYS use explicit column names** - Never use SELECT * 2. **Use COUNT(column_name)** - Never use COUNT(*) -3. **Only INNER JOIN** - No LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN, or CROSS JOIN +3. **INNER JOIN by default; LEFT JOIN only for relationships** - General joins must be INNER JOIN (equi-join). LEFT JOIN is permitted ONLY to join a relationship (foreign-key) field to its related entity on Id — use it for optional relationships to keep parent rows, INNER JOIN when the related row must exist. No RIGHT JOIN, FULL OUTER JOIN, CROSS JOIN, or self-joins 4. **Maximum 4 tables** - No more than 4 tables in a JOIN chain 5. **No subqueries** - No subqueries in any clause 6. **No CTEs** - No WITH clauses diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/models.py b/src/uipath_langchain/agent/tools/datafabric_tool/models.py index 09f4436ee..886cdeee0 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/models.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/models.py @@ -17,6 +17,12 @@ class FieldSchema(BaseModel): is_required: bool = False is_unique: bool = False nullable: bool = True + # For relationship (foreign-key) fields: the related entity's SQL table and + # the column to join on. The field itself stores the related record's Id, so + # the join is always ``related. = .``. + ref_entity_table: str | None = None + ref_join_key: str = "Id" + ref_field_name: str | None = None @property def display_type(self) -> str: @@ -24,10 +30,17 @@ def display_type(self) -> str: modifiers = [] if self.is_required: modifiers.append("required") + if self.is_foreign_key: + modifiers.append("fk") if modifiers: return f"{self.type}, {', '.join(modifiers)}" return self.type + @property + def is_relationship(self) -> bool: + """True when this field references another entity that can be joined.""" + return self.is_foreign_key and self.ref_entity_table is not None + @property def is_numeric(self) -> bool: return self.type.lower() in NUMERIC_TYPES diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py b/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py index 539f2df39..c16754849 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py @@ -128,6 +128,31 @@ BAD: SELECT name FROM t1 WHERE id IN (SELECT fk FROM t2 WHERE x = 1) GOOD: SELECT t1.name FROM t1 INNER JOIN t2 ON t1.id = t2.fk WHERE t2.x = 1 +RELATIONSHIP FIELDS (foreign keys): +- A relationship field (marked ``fk`` in the schema) stores the RELATED \ +record's Id (a GUID) — not its name, label, or any other attribute. Comparing \ +such a column to a human-readable value (e.g. ``WHERE Account = 'Acme'``) will \ +never match. +- To filter on, or return, the related entity's attributes, JOIN the related \ +entity on its Id and project the SPECIFIC column(s) you need — never \ +``SELECT parent.*``: + SELECT parent., related. FROM parent LEFT JOIN related ON related.Id = parent. + then put your filter/selection on ``related``'s columns \ +(e.g. ``WHERE related.Name = 'Acme'``). The exact join and the related entity's \ +representative field are listed under "Relationships for " in the entity \ +schemas above. +- Choose the join type by intent: + - LEFT JOIN when you want every parent row, including those whose \ +relationship is unset (the related columns come back NULL). Prefer this when the \ +question is about the parent entity and only enriches it with related data. + - INNER JOIN when the related record must exist, or when you filter on the \ +related entity's columns (e.g. "orders whose account region is APAC"). +- If you only need the related record's identifier itself, select the \ +relationship field directly — no JOIN. +- Only equi-joins on the related entity's Id are supported \ +(``JOIN related ON related.Id = parent.``); the related entity's \ +schema is one of the entities listed above. + ERROR RECOVERY (structured error taxonomy): If ``execute_sql`` returns an ``error`` field, classify it and apply the \ targeted fix: diff --git a/tests/agent/tools/test_datafabric_prompt_builder.py b/tests/agent/tools/test_datafabric_prompt_builder.py index 47034b144..73ea27537 100644 --- a/tests/agent/tools/test_datafabric_prompt_builder.py +++ b/tests/agent/tools/test_datafabric_prompt_builder.py @@ -3,9 +3,8 @@ from uipath_langchain.agent.tools.datafabric_tool.datafabric_prompt_builder import build -def _fake_field(**overrides): - return SimpleNamespace( - name="status", +def _fake_field(name="status", **overrides): + defaults = dict( display_name="Status", sql_type=SimpleNamespace(name="varchar"), description="The canonical workflow status", @@ -19,20 +18,36 @@ def _fake_field(**overrides): is_unique=False, is_hidden_field=False, is_system_field=False, + field_display_type=None, + reference_entity=None, + reference_field=None, + ) + defaults.update(overrides) + return SimpleNamespace(name=name, **defaults) + + +def _fake_fk_field(name="account", ref_table="Account", ref_field="Id", **overrides): + return _fake_field( + name=name, + display_name=name.title(), + description=f"Reference to {ref_table}", + is_foreign_key=True, + field_display_type="Relationship", + reference_entity=SimpleNamespace(name=ref_table), + reference_field=SimpleNamespace(definition=SimpleNamespace(name=ref_field)), **overrides, ) -def _fake_entity(*fields, **overrides): - return SimpleNamespace( +def _fake_entity(*fields, name="Ticket", **overrides): + defaults = dict( id="entity-1", - name="Ticket", display_name="Ticket", description="Support tickets", record_count=10, - fields=list(fields), - **overrides, ) + defaults.update(overrides) + return SimpleNamespace(name=name, fields=list(fields), **defaults) def test_build_renders_ecp_aware_prompt_strategy(): @@ -57,3 +72,50 @@ def test_build_includes_domain_guidance_in_rendered_prompt(): assert "## Domain Guidance" in prompt assert "Use business-friendly ticket language." in prompt + + +def test_relationship_field_renders_join_when_target_entity_present(): + order = _fake_entity(_fake_field(), _fake_fk_field(ref_field="Name"), name="Order", display_name="Order") + account = _fake_entity(_fake_field(name="Name"), name="Account", display_name="Account") + + prompt = build([order, account]) + + # The FK column is tagged; the join is spelled out against the target Id as a + # LEFT JOIN (keeps parent rows), and the representative field is surfaced. + assert "| account | varchar, fk |" in prompt + assert "**Relationships for Order:**" in prompt + assert "LEFT JOIN Account ON Account.Id = Order.account" in prompt + assert "representative field `Account.Name`" in prompt + + +def test_v1_prompt_documents_left_vs_inner_join_intent(): + prompt = build([_fake_entity(_fake_field())]) + + # The relationship guidance explains when to use LEFT vs INNER. + assert "LEFT JOIN" in prompt + assert "INNER JOIN" in prompt + + +def test_relationship_subsection_absent_when_no_foreign_keys(): + prompt = build([_fake_entity(_fake_field())]) + + # The rendered per-entity header (distinct from the static prompt guidance + # that mentions "Relationships for
") must not appear. + assert "**Relationships for Ticket:**" not in prompt + + +def test_relationship_omitted_when_target_entity_not_in_set(): + # Order references Account, but Account is not part of the entity set, so a + # join would be unusable — the relationship line must be suppressed. + order = _fake_entity(_fake_field(), _fake_fk_field(), name="Order", display_name="Order") + + prompt = build([order]) + + assert "**Relationships for Order:**" not in prompt + assert "INNER JOIN Account" not in prompt + + +def test_v1_prompt_documents_relationship_fields(): + prompt = build([_fake_entity(_fake_field())]) + + assert "RELATIONSHIP FIELDS" in prompt From f69f837b32969931bb9cb58f5a920d26e417dac6 Mon Sep 17 00:00:00 2001 From: milind-jain-uipath Date: Fri, 3 Jul 2026 03:00:34 +0530 Subject: [PATCH 2/4] style(datafabric-tool): apply ruff format to prompt builder tests Co-Authored-By: Claude Opus 4.8 (1M context) --- .../agent/tools/test_datafabric_prompt_builder.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/agent/tools/test_datafabric_prompt_builder.py b/tests/agent/tools/test_datafabric_prompt_builder.py index 73ea27537..b131c35a1 100644 --- a/tests/agent/tools/test_datafabric_prompt_builder.py +++ b/tests/agent/tools/test_datafabric_prompt_builder.py @@ -75,8 +75,15 @@ def test_build_includes_domain_guidance_in_rendered_prompt(): def test_relationship_field_renders_join_when_target_entity_present(): - order = _fake_entity(_fake_field(), _fake_fk_field(ref_field="Name"), name="Order", display_name="Order") - account = _fake_entity(_fake_field(name="Name"), name="Account", display_name="Account") + order = _fake_entity( + _fake_field(), + _fake_fk_field(ref_field="Name"), + name="Order", + display_name="Order", + ) + account = _fake_entity( + _fake_field(name="Name"), name="Account", display_name="Account" + ) prompt = build([order, account]) @@ -107,7 +114,9 @@ def test_relationship_subsection_absent_when_no_foreign_keys(): def test_relationship_omitted_when_target_entity_not_in_set(): # Order references Account, but Account is not part of the entity set, so a # join would be unusable — the relationship line must be suppressed. - order = _fake_entity(_fake_field(), _fake_fk_field(), name="Order", display_name="Order") + order = _fake_entity( + _fake_field(), _fake_fk_field(), name="Order", display_name="Order" + ) prompt = build([order]) From df939523b36ec2c9f5f03e8de419fb3a36c31e07 Mon Sep 17 00:00:00 2001 From: milind-jain-uipath Date: Fri, 3 Jul 2026 03:14:03 +0530 Subject: [PATCH 3/4] docs(datafabric-tool): tighten relationship-join prompt guidance - Drop the redundant "never SELECT parent.*" aside (SQL_CONSTRAINTS already forbids SELECT *). - Tie join-type choice to the relationship field's required flag: a required field -> INNER JOIN is safe (related record always exists); optional -> LEFT JOIN to keep parent rows where it is unset. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../agent/tools/datafabric_tool/prompts/v1.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py b/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py index c16754849..4c37a8ae8 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py @@ -134,19 +134,22 @@ such a column to a human-readable value (e.g. ``WHERE Account = 'Acme'``) will \ never match. - To filter on, or return, the related entity's attributes, JOIN the related \ -entity on its Id and project the SPECIFIC column(s) you need — never \ -``SELECT parent.*``: +entity on its Id and project the specific column(s) you need: SELECT parent., related. FROM parent LEFT JOIN related ON related.Id = parent. then put your filter/selection on ``related``'s columns \ (e.g. ``WHERE related.Name = 'Acme'``). The exact join and the related entity's \ representative field are listed under "Relationships for
" in the entity \ schemas above. -- Choose the join type by intent: - - LEFT JOIN when you want every parent row, including those whose \ -relationship is unset (the related columns come back NULL). Prefer this when the \ -question is about the parent entity and only enriches it with related data. - - INNER JOIN when the related record must exist, or when you filter on the \ -related entity's columns (e.g. "orders whose account region is APAC"). +- Choose the join type by intent (the schema tags a relationship field \ +``required`` or not): + - LEFT JOIN when the relationship is optional (not ``required``) and you want \ +every parent row, including those where it is unset (the related columns come \ +back NULL). Use this when the question is about the parent entity and only \ +enriches it with related data. + - INNER JOIN when the relationship is marked ``required`` (the related record \ +always exists, so no parent rows are dropped), when the related record must \ +otherwise exist, or when you filter on the related entity's columns \ +(e.g. "orders whose account region is APAC"). - If you only need the related record's identifier itself, select the \ relationship field directly — no JOIN. - Only equi-joins on the related entity's Id are supported \ From 312b81ce34d3fe4ec8cd13c0746f5c4946817202 Mon Sep 17 00:00:00 2001 From: milind-jain-uipath Date: Fri, 3 Jul 2026 03:27:18 +0530 Subject: [PATCH 4/4] fix(datafabric-tool): tag Relationship-typed fields consistently Detect a relationship as (is_foreign_key OR fieldDisplayType == "Relationship") and use that single condition both to set is_foreign_key on the FieldSchema and to extract the reference target, so a Relationship-typed field without the is_foreign_key flag is still tagged fk and rendered in the Relationships section. Read field_display_type via getattr for safety. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../datafabric_prompt_builder.py | 11 ++++++++-- .../tools/test_datafabric_prompt_builder.py | 21 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py index ffb8e6524..fcbaf016b 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py @@ -36,9 +36,16 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: if field.is_hidden_field or field.is_system_field: continue type_name = field.sql_type.name if field.sql_type else "unknown" + # A relationship is either a declared foreign key or a Relationship-typed + # field; use the same condition to tag it and to extract its target, so + # the two never disagree. + is_relationship = ( + field.is_foreign_key + or getattr(field, "field_display_type", None) == "Relationship" + ) ref_entity_table: str | None = None ref_field_name: str | None = None - if field.is_foreign_key or field.field_display_type == "Relationship": + if is_relationship: ref_entity = getattr(field, "reference_entity", None) ref_entity_table = getattr(ref_entity, "name", None) ref_field = getattr(field, "reference_field", None) @@ -49,7 +56,7 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: display_name=field.display_name, type=type_name, description=field.description, - is_foreign_key=field.is_foreign_key, + is_foreign_key=is_relationship, is_required=field.is_required, is_unique=field.is_unique, nullable=not field.is_required, diff --git a/tests/agent/tools/test_datafabric_prompt_builder.py b/tests/agent/tools/test_datafabric_prompt_builder.py index b131c35a1..98dcae3fb 100644 --- a/tests/agent/tools/test_datafabric_prompt_builder.py +++ b/tests/agent/tools/test_datafabric_prompt_builder.py @@ -95,6 +95,27 @@ def test_relationship_field_renders_join_when_target_entity_present(): assert "representative field `Account.Name`" in prompt +def test_relationship_detected_via_display_type_without_is_foreign_key(): + # A Relationship-typed field with is_foreign_key unset must still be tagged + # fk and rendered in the Relationships section. + relationship_field = _fake_field( + name="account", + display_name="Account", + field_display_type="Relationship", + reference_entity=SimpleNamespace(name="Account"), + reference_field=SimpleNamespace(definition=SimpleNamespace(name="Name")), + ) + order = _fake_entity(relationship_field, name="Order", display_name="Order") + account = _fake_entity( + _fake_field(name="Name"), name="Account", display_name="Account" + ) + + prompt = build([order, account]) + + assert "| account | varchar, fk |" in prompt + assert "LEFT JOIN Account ON Account.Id = Order.account" in prompt + + def test_v1_prompt_documents_left_vs_inner_join_intent(): prompt = build([_fake_entity(_fake_field())])