Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c6e73eb
feat(datafabric): add fetch_ontology tool to DF inner SQL agent
sankalp-uipath Jun 16, 2026
b67e170
Merge branch 'main' into feat/datafabric-ontology-fetch-tool
sankalp-uipath Jun 16, 2026
da19087
feat(datafabric): resolve ontology from agent.json binding (name + fo…
sankalp-uipath Jun 17, 2026
4c22b8f
refactor(datafabric): fetch ontology via SDK EntitiesService.get_onto…
sankalp-uipath Jun 17, 2026
68f7cbf
feat(datafabric): support multiple ontologies per context (ontologySet)
sankalp-uipath Jun 17, 2026
ab77d65
Merge remote-tracking branch 'origin/main' into feat/datafabric-ontol…
sankalp-uipath Jun 17, 2026
40acdec
fix(datafabric): end loop on any successful SQL; drop env-var ontolog…
sankalp-uipath Jun 22, 2026
7a5bb69
test(datafabric): cover ontology fetch tool, subgraph routing, and fa…
sankalp-uipath Jun 22, 2026
04f79c5
fix(datafabric): return only terminal tool msgs on END; drop ToolMess…
sankalp-uipath Jun 22, 2026
0ed6210
perf(datafabric): fetch configured ontologies concurrently (asyncio.g…
sankalp-uipath Jun 22, 2026
e9c4cfb
feat(datafabric): resolve ontologies via ontology_refs
sankalp-uipath Jun 23, 2026
be5ef26
Merge branch 'main' into feat/datafabric-ontology-fetch-tool
sankalp-uipath Jun 23, 2026
1fd7a30
chore: consume uipath dev build (#1728) to unblock CI
sankalp-uipath Jun 23, 2026
a871a0a
chore: revert temp dev-build pin; fix datafabric test mypy
sankalp-uipath Jun 23, 2026
dfdd3d6
Merge branch 'main' into feat/datafabric-ontology-fetch-tool
sankalp-uipath Jun 23, 2026
a07adb9
Merge branch 'main' into feat/datafabric-ontology-fetch-tool
sankalp-uipath Jun 24, 2026
54db78f
refactor(datafabric): resolve ontologies from nested ontologySet
sankalp-uipath Jun 25, 2026
941f3ff
refactor(datafabric): gather ontologies from datafabricontology context
sankalp-uipath Jun 25, 2026
86e5912
feat(datafabric): gate fetch_ontology behind DataFabricOntologyEnable…
sankalp-uipath Jun 29, 2026
826f036
test(datafabric): drop ontology referenceKey fixture
sankalp-uipath Jun 30, 2026
e57d1b0
refactor(datafabric): gate ontology flag at every entry; share flag c…
sankalp-uipath Jun 30, 2026
2f41f40
Merge remote-tracking branch 'origin/main' into feat/datafabric-ontol…
sankalp-uipath Jun 30, 2026
7fab6d5
refactor(datafabric): address review nits (split bind test, single-st…
sankalp-uipath Jun 30, 2026
a35807b
refactor(datafabric): inject ontology into system prompt, drop fetch_…
sankalp-uipath Jul 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions src/uipath_langchain/agent/tools/context_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,17 +158,39 @@
) -> StructuredTool | BaseTool | None:
tool_name = sanitize_tool_name(resource.name)

# An ontology context is not a standalone tool — it only grounds the Data
# Fabric entity tool, which gathers it via resolve_context_ontologies.
if resource.context_type == AgentContextType.DATA_FABRIC_ONTOLOGY:
return None
Comment on lines +161 to +164

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it is not a standalone tool at runtime, I think it is confusing to model it as a top level resource at design time. So far, all "resource nodes" in a lowcode agent (either standalone or part of flow), are independently executable and show up in traces. This is now a different paradigm, it is an optional helper tool that will be part of another tool's subgraph.

That being said this only applies to how it's modeled today. If we indeed plan to expand ontology support in the future such that they will actually allow queries (via something like SPARQL statements for instance); then it will be better for future proofing to define them top level (at least in the package mapping). We can figure out a less confusing design time experience for now

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we have plan to expand ontology support to make it a primary design experience i.e, user will select the ontologies and then it will resolve the entities internally, thus decision of making it top level resource as a part of iterative development.


if resource.context_type == AgentContextType.DATA_FABRIC_ENTITY_SET:
if llm is None:
raise ValueError("Data Fabric entity set tools require an LLM instance")
from .datafabric_tool import create_datafabric_query_tool
from .datafabric_tool.datafabric_tool import BASE_SYSTEM_PROMPT
from uipath.core.feature_flags import FeatureFlags

from .datafabric_tool import (
create_datafabric_query_tool,
resolve_context_ontologies,
)
from .datafabric_tool.datafabric_tool import (
BASE_SYSTEM_PROMPT,
DATAFABRIC_ONTOLOGY_FF,
)

# Feature-gated at the entry: only gather ontologies when the flag is on,
# so with it off the feature is fully inert (no resolution, no prompt
# change) and the agent runs the original entities-only path.
ontologies = (
resolve_context_ontologies(agent.resources if agent else [])

Check warning on line 184 in src/uipath_langchain/agent/tools/context_tool.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Extract this nested conditional expression into an independent statement.

See more on https://sonarcloud.io/project/issues?id=UiPath_uipath-langchain-python&issues=AZ8YKimMMkERh_zDA-zf&open=AZ8YKimMMkERh_zDA-zf&pullRequest=911
if FeatureFlags.is_flag_enabled(DATAFABRIC_ONTOLOGY_FF, default=False)
else []
)
return create_datafabric_query_tool(
resource,
llm,
tool_name=tool_name,
agent_config={BASE_SYSTEM_PROMPT: _extract_system_prompt(agent)},
ontologies=ontologies,
)

assert resource.settings is not None
Expand Down
4 changes: 4 additions & 0 deletions src/uipath_langchain/agent/tools/datafabric_tool/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
"""Data Fabric tool module for entity-based SQL queries."""

from .datafabric_tool import (
DATAFABRIC_ONTOLOGY_FF,
create_datafabric_query_tool,
resolve_context_ontologies,
)

__all__ = [
"DATAFABRIC_ONTOLOGY_FF",
"create_datafabric_query_tool",
"resolve_context_ontologies",
]
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,16 @@ def build_sql_context(
)


def format_sql_context(ctx: SQLContext) -> str:
"""Format a SQLContext as text for system prompt injection."""
def format_sql_context(ctx: SQLContext, ontology_text: str = "") -> str:
"""Format a SQLContext as text for system prompt injection.

Args:
ctx: The built SQL context (entities, prompts, constraints).
ontology_text: The fetched ontology OWL content. When non-empty, an
"Available Ontology" section embeds it as the authoritative schema
the LLM should ground its SQL on — mirroring how the entity set is
surfaced below.
"""
lines: list[str] = []

if ctx.base_system_prompt:
Expand All @@ -143,6 +151,19 @@ def format_sql_context(ctx: SQLContext) -> str:
lines.append(ctx.base_system_prompt)
lines.append("")

if ontology_text:
lines.append(
"## Available Ontology (authoritative semantic schema)\n\n"
"The ontology below is the authoritative source for the exact column "
"names, value formats (date formats, codes, zero-padding), allowed "
"values, and the relationships between entities — richer and more "
"reliable than the field list further down, which omits value formats "
"and semantics. Base your column names, filter values, and joins on "
"it; when it and the entity tables disagree, the ontology wins.\n\n"
f"{ontology_text}"
)
lines.append("")

if ctx.sql_expert_system_prompt:
lines.append("## SQL Query Generation Guidelines")
lines.append("")
Expand Down Expand Up @@ -196,6 +217,7 @@ def build(
resource_description: str = "",
base_system_prompt: str = "",
prompt_version: str | None = None,
ontology_text: str = "",
) -> str:
"""Build the full SQL prompt text for the inner sub-graph LLM.

Expand All @@ -209,6 +231,9 @@ def build(
base_system_prompt: Optional system prompt from the outer agent.
prompt_version: Optional version key (e.g. ``"v0"``, ``"v1"``).
Defaults to the registry's default.
ontology_text: The fetched ontology OWL content. When non-empty, an
"Available Ontology" section embeds it so the LLM grounds its SQL on
the ontology. Empty string → no ontology section.

Returns:
Formatted prompt string for the inner LLM system message.
Expand All @@ -222,4 +247,4 @@ def build(
base_system_prompt,
prompt_version=prompt_version,
)
return format_sql_context(ctx)
return format_sql_context(ctx, ontology_text=ontology_text)
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,21 @@ def __init__(
max_iterations: int = 25,
resource_description: str = "",
base_system_prompt: str = "",
ontology_text: str = "",
) -> None:
self._max_iterations = max_iterations
self._execute_sql_tool = self._create_execute_sql_tool(
entities_service, entities
)
# The ontology (when configured and enabled) is fetched deterministically
# upstream and embedded directly in the system prompt — the inner agent
# still has a single tool, execute_sql.
self._system_message = SystemMessage(
content=datafabric_prompt_builder.build(
entities, resource_description, base_system_prompt
entities,
resource_description,
base_system_prompt,
ontology_text=ontology_text,
)
)
self._inner_llm = llm.model_copy(update={"disable_streaming": True}).bind_tools(
Expand Down Expand Up @@ -226,6 +233,7 @@ def create(
max_iterations: int = 25,
resource_description: str = "",
base_system_prompt: str = "",
ontology_text: str = "",
) -> CompiledStateGraph[Any]:
"""Create and return a compiled Data Fabric sub-graph."""
graph = DataFabricGraph(
Expand All @@ -235,5 +243,6 @@ def create(
max_iterations,
resource_description,
base_system_prompt,
ontology_text,
)
return graph.compiled_graph
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,34 @@

BASE_SYSTEM_PROMPT = "base_system_prompt"

# Feature flag gating the Data Fabric ontology grounding feature. Defaults off.
# Checked at every entry into the feature: ontology resolution (context_tool)
# and inner-tool binding (datafabric_subgraph). Single source of truth so the
# flag name can never drift between call sites.
DATAFABRIC_ONTOLOGY_FF = "DataFabricOntologyEnabled"


def resolve_context_ontologies(
resources: list[Any],
) -> list[tuple[str, str | None]]:
"""Gather ontologies from the agent's ontology context(s).

An ontology is configured in a dedicated ontology context (``contextType``
``datafabricontology``) whose ``ontologySet`` mirrors the entity context's
``entitySet`` — by convention at most one such context per agent. Its
ontologies ground the Data Fabric query tool; each carries its own
``folderId``, so it is fetched from its own folder.
"""
ontologies: list[tuple[str, str | None]] = []
for resource in resources:
if (
isinstance(resource, AgentContextResourceConfig)
and resource.is_datafabric_ontology
):
for item in resource.ontology_set or []:
ontologies.append((item.name, item.folder_key))
return ontologies
Comment on lines +50 to +58

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if I understand correctly, we implicitly assume all topologies will apply to this data service entity context. Shouldn't the link be more explicitly defined? IE either:
a) when defining an Data Service Context resource you can also specify one or more ontologies
b) when defining the Ontology Context resource you specify the list of entities it describes

@sankalp-uipath sankalp-uipath Jul 1, 2026

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am currently working on adding the R2RML mapping which will resolve the entities from ontologies at the agent runtime by the llm node implicitly (I am working on it in separate PR and is currently in progress ).



class DataFabricTextQueryHandler:
"""Manages lazy initialization and invocation of the Data Fabric sub-graph.
Expand All @@ -44,11 +72,13 @@ def __init__(
llm: BaseChatModel,
resource_description: str = "",
base_system_prompt: str = "",
ontologies: list[tuple[str, str | None]] | None = None,
) -> None:
self._entity_set = entity_set
self._llm = llm
self._resource_description = resource_description
self._base_system_prompt = base_system_prompt
self._ontologies = ontologies or []
self._compiled: CompiledStateGraph[Any] | None = None
self._init_lock = asyncio.Lock()

Expand All @@ -65,9 +95,11 @@ async def _ensure_datafabric_graph(self) -> CompiledStateGraph[Any]:
if self._compiled is not None:
return self._compiled

from uipath.core.feature_flags import FeatureFlags
from uipath.platform import UiPath

from .datafabric_subgraph import DataFabricGraph
from .ontology_fetcher import fetch_ontology_text

sdk = UiPath()
resolution = await sdk.entities.resolve_entity_set_async(self._entity_set)
Expand All @@ -76,12 +108,23 @@ async def _ensure_datafabric_graph(self) -> CompiledStateGraph[Any]:
"No Data Fabric entity schemas could be fetched. "
"Check entity identifiers and permissions."
)
# Deterministically fetch the ontology (when configured AND the flag
# is on) and embed it in the inner system prompt — the LLM never has
# to decide to fetch it.
ontology_text = ""
if self._ontologies and FeatureFlags.is_flag_enabled(
DATAFABRIC_ONTOLOGY_FF, default=False
):
ontology_text = await fetch_ontology_text(
resolution.entities_service, self._ontologies
)
self._compiled = DataFabricGraph.create(
llm=self._llm,
entities=resolution.entities,
entities_service=resolution.entities_service,
resource_description=self._resource_description,
base_system_prompt=self._base_system_prompt,
ontology_text=ontology_text,
)
return self._compiled

Expand Down Expand Up @@ -144,6 +187,7 @@ def create_datafabric_query_tool(
llm: BaseChatModel,
tool_name: str = "query_datafabric",
agent_config: dict[str, str] | None = None,
ontologies: list[tuple[str, str | None]] | None = None,
) -> BaseTool:
"""Create the ``query_datafabric`` agentic tool.

Expand All @@ -153,17 +197,23 @@ def create_datafabric_query_tool(
tool_name: Sanitized tool name from the resource.
agent_config: Optional dict with agent-level config.
Key ``base_system_prompt`` carries the outer agent's system prompt.
ontologies: ``(name, folder_key)`` pairs resolved from the context's
nested ``ontology_set`` (see ``resolve_context_ontologies``).
Empty/None → no fetch tool is added. Resolution comes only from the
agent definition (the binding), never from process env.
"""
config = agent_config or {}
entity_set = [
DataFabricEntityItem.model_validate(item.model_dump(by_alias=True))
for item in (resource.entity_set or [])
]
ontologies = ontologies or []
handler = DataFabricTextQueryHandler(
entity_set=entity_set,
llm=llm,
resource_description=resource.description or "",
base_system_prompt=config.get(BASE_SYSTEM_PROMPT, ""),
ontologies=ontologies,
)
entity_lines = []
for e in entity_set:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Fetches ontology OWL schemas from Data Fabric for prompt injection.

A Data Fabric context may attach one or more ontologies (mirroring the entity
set). This module fetches each configured ontology's OWL via the SDK
(``EntitiesService.get_ontology_file_async``) and returns them concatenated,
ready to embed in the inner SQL agent's system prompt.

Fetching is deterministic — done once when the sub-graph is built — rather than
an LLM-decided tool call, so the model always has the ontology in context.
Ontology names/folders are pinned from configuration, never supplied by the LLM.
"""

import asyncio
import logging

from uipath.platform.entities import EntitiesService

logger = logging.getLogger(__name__)

# Defensive cap per ontology so a malformed/oversized OWL can't blow up the
# prompt/token budget.
_MAX_OWL_BYTES = 1_000_000


def _notation_label(media_type: str) -> str:
"""Best-effort label for the OWL serialization (Turtle or OFN)."""
mt = (media_type or "").lower()
if "turtle" in mt or mt.endswith("ttl"):
return "Turtle"
if "functional" in mt or "ofn" in mt:
return "OWL Functional Notation"
return "Turtle or OWL Functional Notation"


async def _fetch_one(
entities_service: EntitiesService, name: str, folder_key: str | None
) -> str:
try:
data = await entities_service.get_ontology_file_async(name, "owl", folder_key)
owl = data.get("content") or ""
media_type = data.get("mediaType") or ""
if len(owl.encode("utf-8")) > _MAX_OWL_BYTES:
raise ValueError(f"Ontology '{name}' OWL exceeds the size limit.")
except Exception as e:
logger.warning("Ontology fetch failed for %r: %s", name, e)
return (
f"Ontology '{name}' is unavailable ({type(e).__name__}). "
"Proceed using the entity schemas in the system prompt."
)
notation = _notation_label(media_type)
return f"--- ONTOLOGY: {name} ({notation}) ---\n{owl}\n--- END ONTOLOGY: {name} ---"


async def fetch_ontology_text(
entities_service: EntitiesService,
ontologies: list[tuple[str, str | None]],
) -> str:
"""Fetch and concatenate the OWL of every configured ontology.

Args:
entities_service: Authenticated SDK service used for the REST call.
ontologies: ``(name, folder_key)`` pairs to fetch (pinned from config).

Returns:
The concatenated ontology text ready for prompt injection, or ``""`` when
no ontologies are configured. Individual fetch failures degrade to a
short "unavailable, use entity schemas" note rather than raising, so a
missing ontology never fails the run.
"""
if not ontologies:
return ""
# Fetch concurrently — each fetch is independent; gather preserves order so
# the concatenation is deterministic.
blocks = await asyncio.gather(
*(_fetch_one(entities_service, name, folder) for name, folder in ontologies)
)
return "\n\n".join(blocks)
Loading
Loading