Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,32 @@
if field.is_hidden_field or field.is_system_field:
continue
type_name = field.sql_type.name if field.sql_type else "unknown"
# A relationship is either a declared foreign key or a Relationship-typed
# field; use the same condition to tag it and to extract its target, so
# the two never disagree.
is_relationship = (
field.is_foreign_key
or getattr(field, "field_display_type", None) == "Relationship"
)
ref_entity_table: str | None = None
ref_field_name: str | None = None
if is_relationship:
ref_entity = getattr(field, "reference_entity", None)
Comment on lines 38 to +49
ref_entity_table = getattr(ref_entity, "name", None)
ref_field = getattr(field, "reference_field", None)
ref_definition = getattr(ref_field, "definition", None)
ref_field_name = getattr(ref_definition, "name", None)
fs = FieldSchema(
name=field.name,
display_name=field.display_name,
type=type_name,
description=field.description,
is_foreign_key=field.is_foreign_key,
is_foreign_key=is_relationship,
is_required=field.is_required,
is_unique=field.is_unique,
nullable=not field.is_required,
ref_entity_table=ref_entity_table,
ref_field_name=ref_field_name,
)
field_schemas.append(fs)

Expand Down Expand Up @@ -133,7 +150,7 @@
)


def format_sql_context(ctx: SQLContext) -> str:

Check failure on line 153 in src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 21 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=UiPath_uipath-langchain-python&issues=AZ8fjFVth8zrEkD9XXIW&open=AZ8fjFVth8zrEkD9XXIW&pullRequest=962
"""Format a SQLContext as text for system prompt injection."""
lines: list[str] = []

Expand Down Expand Up @@ -164,6 +181,8 @@
lines.append("## All available Data Fabric Entities")
lines.append("")

entity_tables = {ec.entity_schema.entity_name for ec in ctx.entity_contexts}

for entity_ctx in ctx.entity_contexts:
entity = entity_ctx.entity_schema
lines.append(
Expand All @@ -180,6 +199,39 @@

lines.append("")

# Relationship fields store the related record's Id; spell out the join
# so the model doesn't compare the FK column to a human-readable value.
# Only surface relationships whose target entity is in this set (and thus
# queryable) — a dangling reference would produce an unusable join.
relationships = [
field
for field in entity.fields
if field.is_relationship and field.ref_entity_table in entity_tables
]
if relationships:
lines.append(f"**Relationships for {entity.entity_name}:**")
lines.append(
f"_Join on the related entity's Id. Use LEFT JOIN to keep all {entity.entity_name} "
"rows (relationship may be unset); INNER JOIN when the related record must exist or "
"you filter on it. Project the specific related column you need — not `*`._"
)
lines.append("")
for field in relationships:
join = (
f"LEFT JOIN {field.ref_entity_table} "
f"ON {field.ref_entity_table}.{field.ref_join_key} = {entity.entity_name}.{field.name}"
)
repr_hint = (
f", representative field `{field.ref_entity_table}.{field.ref_field_name}`"
if field.ref_field_name
else ""
)
lines.append(
f"- `{entity.entity_name}.{field.name}` → `{field.ref_entity_table}` "
f"(`{join}`{repr_hint})"
)
lines.append("")

lines.append(f"**Query Patterns for {entity.entity_name}:**")
lines.append("")
lines.append("| User Intent | SQL Pattern |")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,15 @@

### 2. Multi-Entity Joins (≤4 adapters)
- INNER JOIN chains via entity model (up to 4 tables)
- Equi-joins only (ON left.col = right.col)
- LEFT JOIN is allowed ONLY for relationship (foreign-key) joins on the related
entity's Id (see "Relationship fields" guidance) — use it for optional
relationships to keep parent rows
Comment on lines +161 to +163

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep v0 from forbidding relationship LEFT JOINs

When callers render this builder with prompt_version="v0", the legacy SQL_EXPERT_SYSTEM_PROMPT from this same module is emitted before these relaxed constraints and still lists LEFT JOIN under unsupported join constructs. That leaves the v0 prompt simultaneously forbidding LEFT JOINs and allowing/recommending them for optional relationship fields, so optional relationship questions in that compatibility mode can still be steered away from the only join type the new relationship section is trying to ground. Update the legacy prompt or gate the new relationship/LEFT JOIN guidance by prompt version.

Useful? React with 👍 / 👎.

- Shared intermediates

**Examples:**
- SELECT o.id, c.name FROM Order o INNER JOIN Customer c ON o.customer_id = c.id
- SELECT o.id, a.Name FROM Order o LEFT JOIN Account a ON a.Id = o.account -- relationship join, keeps orders with no account
- Fields spanning 3-4 adapters with proper INNER JOIN chains

### 3. Predicate Distribution & Pushdown
Expand Down Expand Up @@ -253,7 +258,7 @@
- Common Table Expressions (WITH/CTE)
- Window functions (ROW_NUMBER, RANK, PARTITION BY)
- Self-joins
- LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN (only INNER JOIN supported)
- RIGHT JOIN, FULL OUTER JOIN (general joins must be INNER; LEFT JOIN only for relationship/foreign-key joins on Id)
- CROSS JOIN

**Examples:**
Expand All @@ -275,17 +280,17 @@

### 4. ADVANCED_JOINS
- More than 4 tables in JOIN chain
- LEFT JOIN
- LEFT JOIN for non-relationship joins (LEFT JOIN is allowed ONLY to join a relationship/foreign-key field to its related entity on Id)
- RIGHT JOIN
- FULL OUTER JOIN
- CROSS JOIN
- Self-joins
- Non-equi joins (theta joins)

**Examples:**
- SELECT * FROM t1 RIGHT JOIN t2 -- ❌
- SELECT * FROM t1, t2 -- ❌ (implicit CROSS JOIN)
- SELECT * FROM Employee e1 JOIN Employee e2 ON e1.manager_id = e2.id -- ❌ (self-join)
- SELECT c.id FROM t1 c RIGHT JOIN t2 d ON d.id = c.fk -- ❌
- SELECT c.id FROM t1 c, t2 d -- ❌ (implicit CROSS JOIN)
- SELECT e1.id FROM Employee e1 JOIN Employee e2 ON e1.manager_id = e2.id -- ❌ (self-join)

### 5. UNSUPPORTED_FUNCTIONS
- Date/time manipulation functions (DATE_ADD, DATE_SUB, DATEDIFF)
Expand Down Expand Up @@ -336,7 +341,7 @@

1. **ALWAYS use explicit column names** - Never use SELECT *
2. **Use COUNT(column_name)** - Never use COUNT(*)
3. **Only INNER JOIN** - No LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN, or CROSS JOIN
3. **INNER JOIN by default; LEFT JOIN only for relationships** - General joins must be INNER JOIN (equi-join). LEFT JOIN is permitted ONLY to join a relationship (foreign-key) field to its related entity on Id — use it for optional relationships to keep parent rows, INNER JOIN when the related row must exist. No RIGHT JOIN, FULL OUTER JOIN, CROSS JOIN, or self-joins
4. **Maximum 4 tables** - No more than 4 tables in a JOIN chain
5. **No subqueries** - No subqueries in any clause
6. **No CTEs** - No WITH clauses
Expand Down
13 changes: 13 additions & 0 deletions src/uipath_langchain/agent/tools/datafabric_tool/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,30 @@ class FieldSchema(BaseModel):
is_required: bool = False
is_unique: bool = False
nullable: bool = True
# For relationship (foreign-key) fields: the related entity's SQL table and
# the column to join on. The field itself stores the related record's Id, so
# the join is always ``related.<ref_join_key> = <this table>.<name>``.
ref_entity_table: str | None = None
ref_join_key: str = "Id"
ref_field_name: str | None = None

@property
def display_type(self) -> str:
"""Type string with modifiers for markdown display."""
modifiers = []
if self.is_required:
modifiers.append("required")
if self.is_foreign_key:
modifiers.append("fk")
if modifiers:
return f"{self.type}, {', '.join(modifiers)}"
return self.type

@property
def is_relationship(self) -> bool:
"""True when this field references another entity that can be joined."""
return self.is_foreign_key and self.ref_entity_table is not None

@property
def is_numeric(self) -> bool:
return self.type.lower() in NUMERIC_TYPES
Expand Down
28 changes: 28 additions & 0 deletions src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,34 @@
BAD: SELECT name FROM t1 WHERE id IN (SELECT fk FROM t2 WHERE x = 1)
GOOD: SELECT t1.name FROM t1 INNER JOIN t2 ON t1.id = t2.fk WHERE t2.x = 1

RELATIONSHIP FIELDS (foreign keys):
- A relationship field (marked ``fk`` in the schema) stores the RELATED \
record's Id (a GUID) — not its name, label, or any other attribute. Comparing \
such a column to a human-readable value (e.g. ``WHERE Account = 'Acme'``) will \
never match.
- To filter on, or return, the related entity's attributes, JOIN the related \
entity on its Id and project the specific column(s) you need:
SELECT parent.<cols>, related.<field> FROM parent LEFT JOIN related ON related.Id = parent.<relField>
then put your filter/selection on ``related``'s columns \
(e.g. ``WHERE related.Name = 'Acme'``). The exact join and the related entity's \
representative field are listed under "Relationships for <table>" in the entity \
schemas above.
- Choose the join type by intent (the schema tags a relationship field \
``required`` or not):
- LEFT JOIN when the relationship is optional (not ``required``) and you want \
every parent row, including those where it is unset (the related columns come \
back NULL). Use this when the question is about the parent entity and only \
enriches it with related data.
- INNER JOIN when the relationship is marked ``required`` (the related record \
always exists, so no parent rows are dropped), when the related record must \
otherwise exist, or when you filter on the related entity's columns \
(e.g. "orders whose account region is APAC").
- If you only need the related record's identifier itself, select the \
relationship field directly — no JOIN.
- Only equi-joins on the related entity's Id are supported \
(``JOIN related ON related.Id = parent.<relField>``); the related entity's \
schema is one of the entities listed above.

ERROR RECOVERY (structured error taxonomy):
If ``execute_sql`` returns an ``error`` field, classify it and apply the \
targeted fix:
Expand Down
108 changes: 100 additions & 8 deletions tests/agent/tools/test_datafabric_prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
from uipath_langchain.agent.tools.datafabric_tool.datafabric_prompt_builder import build


def _fake_field(**overrides):
return SimpleNamespace(
name="status",
def _fake_field(name="status", **overrides):
defaults = dict(
display_name="Status",
sql_type=SimpleNamespace(name="varchar"),
description="The canonical workflow status",
Expand All @@ -19,20 +18,36 @@ def _fake_field(**overrides):
is_unique=False,
is_hidden_field=False,
is_system_field=False,
field_display_type=None,
reference_entity=None,
reference_field=None,
)
defaults.update(overrides)
return SimpleNamespace(name=name, **defaults)


def _fake_fk_field(name="account", ref_table="Account", ref_field="Id", **overrides):
return _fake_field(
name=name,
display_name=name.title(),
description=f"Reference to {ref_table}",
is_foreign_key=True,
field_display_type="Relationship",
reference_entity=SimpleNamespace(name=ref_table),
reference_field=SimpleNamespace(definition=SimpleNamespace(name=ref_field)),
**overrides,
)


def _fake_entity(*fields, **overrides):
return SimpleNamespace(
def _fake_entity(*fields, name="Ticket", **overrides):
defaults = dict(
id="entity-1",
name="Ticket",
display_name="Ticket",
description="Support tickets",
record_count=10,
fields=list(fields),
**overrides,
)
defaults.update(overrides)
return SimpleNamespace(name=name, fields=list(fields), **defaults)


def test_build_renders_ecp_aware_prompt_strategy():
Expand All @@ -57,3 +72,80 @@ def test_build_includes_domain_guidance_in_rendered_prompt():

assert "## Domain Guidance" in prompt
assert "Use business-friendly ticket language." in prompt


def test_relationship_field_renders_join_when_target_entity_present():
order = _fake_entity(
_fake_field(),
_fake_fk_field(ref_field="Name"),
name="Order",
display_name="Order",
)
account = _fake_entity(
_fake_field(name="Name"), name="Account", display_name="Account"
)

prompt = build([order, account])

# The FK column is tagged; the join is spelled out against the target Id as a
# LEFT JOIN (keeps parent rows), and the representative field is surfaced.
assert "| account | varchar, fk |" in prompt
assert "**Relationships for Order:**" in prompt
assert "LEFT JOIN Account ON Account.Id = Order.account" in prompt
assert "representative field `Account.Name`" in prompt


def test_relationship_detected_via_display_type_without_is_foreign_key():
# A Relationship-typed field with is_foreign_key unset must still be tagged
# fk and rendered in the Relationships section.
relationship_field = _fake_field(
name="account",
display_name="Account",
field_display_type="Relationship",
reference_entity=SimpleNamespace(name="Account"),
reference_field=SimpleNamespace(definition=SimpleNamespace(name="Name")),
)
order = _fake_entity(relationship_field, name="Order", display_name="Order")
account = _fake_entity(
_fake_field(name="Name"), name="Account", display_name="Account"
)

prompt = build([order, account])

assert "| account | varchar, fk |" in prompt
assert "LEFT JOIN Account ON Account.Id = Order.account" in prompt


def test_v1_prompt_documents_left_vs_inner_join_intent():
prompt = build([_fake_entity(_fake_field())])

# The relationship guidance explains when to use LEFT vs INNER.
assert "LEFT JOIN" in prompt
assert "INNER JOIN" in prompt


def test_relationship_subsection_absent_when_no_foreign_keys():
prompt = build([_fake_entity(_fake_field())])

# The rendered per-entity header (distinct from the static prompt guidance
# that mentions "Relationships for <table>") must not appear.
assert "**Relationships for Ticket:**" not in prompt


def test_relationship_omitted_when_target_entity_not_in_set():
# Order references Account, but Account is not part of the entity set, so a
# join would be unusable — the relationship line must be suppressed.
order = _fake_entity(
_fake_field(), _fake_fk_field(), name="Order", display_name="Order"
)

prompt = build([order])

assert "**Relationships for Order:**" not in prompt
assert "INNER JOIN Account" not in prompt


def test_v1_prompt_documents_relationship_fields():
prompt = build([_fake_entity(_fake_field())])

assert "RELATIONSHIP FIELDS" in prompt
Loading