diff --git a/src/uipath_langchain/agent/tools/context_tool.py b/src/uipath_langchain/agent/tools/context_tool.py index c22906835..968829cc9 100644 --- a/src/uipath_langchain/agent/tools/context_tool.py +++ b/src/uipath_langchain/agent/tools/context_tool.py @@ -158,17 +158,39 @@ def create_context_tool( ) -> StructuredTool | BaseTool | None: tool_name = sanitize_tool_name(resource.name) + # An ontology context is not a standalone tool — it only grounds the Data + # Fabric entity tool, which gathers it via resolve_context_ontologies. + if resource.context_type == AgentContextType.DATA_FABRIC_ONTOLOGY: + return None + if resource.context_type == AgentContextType.DATA_FABRIC_ENTITY_SET: if llm is None: raise ValueError("Data Fabric entity set tools require an LLM instance") - from .datafabric_tool import create_datafabric_query_tool - from .datafabric_tool.datafabric_tool import BASE_SYSTEM_PROMPT + from uipath.core.feature_flags import FeatureFlags + + from .datafabric_tool import ( + create_datafabric_query_tool, + resolve_context_ontologies, + ) + from .datafabric_tool.datafabric_tool import ( + BASE_SYSTEM_PROMPT, + DATAFABRIC_ONTOLOGY_FF, + ) + # Feature-gated at the entry: only gather ontologies when the flag is on, + # so with it off the feature is fully inert (no resolution, no prompt + # change) and the agent runs the original entities-only path. + ontologies = ( + resolve_context_ontologies(agent.resources if agent else []) + if FeatureFlags.is_flag_enabled(DATAFABRIC_ONTOLOGY_FF, default=False) + else [] + ) return create_datafabric_query_tool( resource, llm, tool_name=tool_name, agent_config={BASE_SYSTEM_PROMPT: _extract_system_prompt(agent)}, + ontologies=ontologies, ) assert resource.settings is not None diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/__init__.py b/src/uipath_langchain/agent/tools/datafabric_tool/__init__.py index fccbda389..402d33be3 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/__init__.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/__init__.py @@ -1,9 +1,13 @@ """Data Fabric tool module for entity-based SQL queries.""" from .datafabric_tool import ( + DATAFABRIC_ONTOLOGY_FF, create_datafabric_query_tool, + resolve_context_ontologies, ) __all__ = [ + "DATAFABRIC_ONTOLOGY_FF", "create_datafabric_query_tool", + "resolve_context_ontologies", ] diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py index 8154caf5e..3d1ab5c39 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py @@ -133,8 +133,16 @@ def build_sql_context( ) -def format_sql_context(ctx: SQLContext) -> str: - """Format a SQLContext as text for system prompt injection.""" +def format_sql_context(ctx: SQLContext, ontology_text: str = "") -> str: + """Format a SQLContext as text for system prompt injection. + + Args: + ctx: The built SQL context (entities, prompts, constraints). + ontology_text: The fetched ontology OWL content. When non-empty, an + "Available Ontology" section embeds it as the authoritative schema + the LLM should ground its SQL on — mirroring how the entity set is + surfaced below. + """ lines: list[str] = [] if ctx.base_system_prompt: @@ -143,6 +151,19 @@ def format_sql_context(ctx: SQLContext) -> str: lines.append(ctx.base_system_prompt) lines.append("") + if ontology_text: + lines.append( + "## Available Ontology (authoritative semantic schema)\n\n" + "The ontology below is the authoritative source for the exact column " + "names, value formats (date formats, codes, zero-padding), allowed " + "values, and the relationships between entities — richer and more " + "reliable than the field list further down, which omits value formats " + "and semantics. Base your column names, filter values, and joins on " + "it; when it and the entity tables disagree, the ontology wins.\n\n" + f"{ontology_text}" + ) + lines.append("") + if ctx.sql_expert_system_prompt: lines.append("## SQL Query Generation Guidelines") lines.append("") @@ -196,6 +217,7 @@ def build( resource_description: str = "", base_system_prompt: str = "", prompt_version: str | None = None, + ontology_text: str = "", ) -> str: """Build the full SQL prompt text for the inner sub-graph LLM. @@ -209,6 +231,9 @@ def build( base_system_prompt: Optional system prompt from the outer agent. prompt_version: Optional version key (e.g. ``"v0"``, ``"v1"``). Defaults to the registry's default. + ontology_text: The fetched ontology OWL content. When non-empty, an + "Available Ontology" section embeds it so the LLM grounds its SQL on + the ontology. Empty string → no ontology section. Returns: Formatted prompt string for the inner LLM system message. @@ -222,4 +247,4 @@ def build( base_system_prompt, prompt_version=prompt_version, ) - return format_sql_context(ctx) + return format_sql_context(ctx, ontology_text=ontology_text) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_subgraph.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_subgraph.py index 591227962..724fbcfa0 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_subgraph.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_subgraph.py @@ -88,14 +88,21 @@ def __init__( max_iterations: int = 25, resource_description: str = "", base_system_prompt: str = "", + ontology_text: str = "", ) -> None: self._max_iterations = max_iterations self._execute_sql_tool = self._create_execute_sql_tool( entities_service, entities ) + # The ontology (when configured and enabled) is fetched deterministically + # upstream and embedded directly in the system prompt — the inner agent + # still has a single tool, execute_sql. self._system_message = SystemMessage( content=datafabric_prompt_builder.build( - entities, resource_description, base_system_prompt + entities, + resource_description, + base_system_prompt, + ontology_text=ontology_text, ) ) self._inner_llm = llm.model_copy(update={"disable_streaming": True}).bind_tools( @@ -226,6 +233,7 @@ def create( max_iterations: int = 25, resource_description: str = "", base_system_prompt: str = "", + ontology_text: str = "", ) -> CompiledStateGraph[Any]: """Create and return a compiled Data Fabric sub-graph.""" graph = DataFabricGraph( @@ -235,5 +243,6 @@ def create( max_iterations, resource_description, base_system_prompt, + ontology_text, ) return graph.compiled_graph diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py index aab4e4cfc..200336ada 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py @@ -29,6 +29,34 @@ BASE_SYSTEM_PROMPT = "base_system_prompt" +# Feature flag gating the Data Fabric ontology grounding feature. Defaults off. +# Checked at every entry into the feature: ontology resolution (context_tool) +# and inner-tool binding (datafabric_subgraph). Single source of truth so the +# flag name can never drift between call sites. +DATAFABRIC_ONTOLOGY_FF = "DataFabricOntologyEnabled" + + +def resolve_context_ontologies( + resources: list[Any], +) -> list[tuple[str, str | None]]: + """Gather ontologies from the agent's ontology context(s). + + An ontology is configured in a dedicated ontology context (``contextType`` + ``datafabricontology``) whose ``ontologySet`` mirrors the entity context's + ``entitySet`` — by convention at most one such context per agent. Its + ontologies ground the Data Fabric query tool; each carries its own + ``folderId``, so it is fetched from its own folder. + """ + ontologies: list[tuple[str, str | None]] = [] + for resource in resources: + if ( + isinstance(resource, AgentContextResourceConfig) + and resource.is_datafabric_ontology + ): + for item in resource.ontology_set or []: + ontologies.append((item.name, item.folder_key)) + return ontologies + class DataFabricTextQueryHandler: """Manages lazy initialization and invocation of the Data Fabric sub-graph. @@ -44,11 +72,13 @@ def __init__( llm: BaseChatModel, resource_description: str = "", base_system_prompt: str = "", + ontologies: list[tuple[str, str | None]] | None = None, ) -> None: self._entity_set = entity_set self._llm = llm self._resource_description = resource_description self._base_system_prompt = base_system_prompt + self._ontologies = ontologies or [] self._compiled: CompiledStateGraph[Any] | None = None self._init_lock = asyncio.Lock() @@ -65,9 +95,11 @@ async def _ensure_datafabric_graph(self) -> CompiledStateGraph[Any]: if self._compiled is not None: return self._compiled + from uipath.core.feature_flags import FeatureFlags from uipath.platform import UiPath from .datafabric_subgraph import DataFabricGraph + from .ontology_fetcher import fetch_ontology_text sdk = UiPath() resolution = await sdk.entities.resolve_entity_set_async(self._entity_set) @@ -76,12 +108,23 @@ async def _ensure_datafabric_graph(self) -> CompiledStateGraph[Any]: "No Data Fabric entity schemas could be fetched. " "Check entity identifiers and permissions." ) + # Deterministically fetch the ontology (when configured AND the flag + # is on) and embed it in the inner system prompt — the LLM never has + # to decide to fetch it. + ontology_text = "" + if self._ontologies and FeatureFlags.is_flag_enabled( + DATAFABRIC_ONTOLOGY_FF, default=False + ): + ontology_text = await fetch_ontology_text( + resolution.entities_service, self._ontologies + ) self._compiled = DataFabricGraph.create( llm=self._llm, entities=resolution.entities, entities_service=resolution.entities_service, resource_description=self._resource_description, base_system_prompt=self._base_system_prompt, + ontology_text=ontology_text, ) return self._compiled @@ -144,6 +187,7 @@ def create_datafabric_query_tool( llm: BaseChatModel, tool_name: str = "query_datafabric", agent_config: dict[str, str] | None = None, + ontologies: list[tuple[str, str | None]] | None = None, ) -> BaseTool: """Create the ``query_datafabric`` agentic tool. @@ -153,17 +197,23 @@ def create_datafabric_query_tool( tool_name: Sanitized tool name from the resource. agent_config: Optional dict with agent-level config. Key ``base_system_prompt`` carries the outer agent's system prompt. + ontologies: ``(name, folder_key)`` pairs resolved from the context's + nested ``ontology_set`` (see ``resolve_context_ontologies``). + Empty/None → no fetch tool is added. Resolution comes only from the + agent definition (the binding), never from process env. """ config = agent_config or {} entity_set = [ DataFabricEntityItem.model_validate(item.model_dump(by_alias=True)) for item in (resource.entity_set or []) ] + ontologies = ontologies or [] handler = DataFabricTextQueryHandler( entity_set=entity_set, llm=llm, resource_description=resource.description or "", base_system_prompt=config.get(BASE_SYSTEM_PROMPT, ""), + ontologies=ontologies, ) entity_lines = [] for e in entity_set: diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/ontology_fetcher.py b/src/uipath_langchain/agent/tools/datafabric_tool/ontology_fetcher.py new file mode 100644 index 000000000..258cc37af --- /dev/null +++ b/src/uipath_langchain/agent/tools/datafabric_tool/ontology_fetcher.py @@ -0,0 +1,77 @@ +"""Fetches ontology OWL schemas from Data Fabric for prompt injection. + +A Data Fabric context may attach one or more ontologies (mirroring the entity +set). This module fetches each configured ontology's OWL via the SDK +(``EntitiesService.get_ontology_file_async``) and returns them concatenated, +ready to embed in the inner SQL agent's system prompt. + +Fetching is deterministic — done once when the sub-graph is built — rather than +an LLM-decided tool call, so the model always has the ontology in context. +Ontology names/folders are pinned from configuration, never supplied by the LLM. +""" + +import asyncio +import logging + +from uipath.platform.entities import EntitiesService + +logger = logging.getLogger(__name__) + +# Defensive cap per ontology so a malformed/oversized OWL can't blow up the +# prompt/token budget. +_MAX_OWL_BYTES = 1_000_000 + + +def _notation_label(media_type: str) -> str: + """Best-effort label for the OWL serialization (Turtle or OFN).""" + mt = (media_type or "").lower() + if "turtle" in mt or mt.endswith("ttl"): + return "Turtle" + if "functional" in mt or "ofn" in mt: + return "OWL Functional Notation" + return "Turtle or OWL Functional Notation" + + +async def _fetch_one( + entities_service: EntitiesService, name: str, folder_key: str | None +) -> str: + try: + data = await entities_service.get_ontology_file_async(name, "owl", folder_key) + owl = data.get("content") or "" + media_type = data.get("mediaType") or "" + if len(owl.encode("utf-8")) > _MAX_OWL_BYTES: + raise ValueError(f"Ontology '{name}' OWL exceeds the size limit.") + except Exception as e: + logger.warning("Ontology fetch failed for %r: %s", name, e) + return ( + f"Ontology '{name}' is unavailable ({type(e).__name__}). " + "Proceed using the entity schemas in the system prompt." + ) + notation = _notation_label(media_type) + return f"--- ONTOLOGY: {name} ({notation}) ---\n{owl}\n--- END ONTOLOGY: {name} ---" + + +async def fetch_ontology_text( + entities_service: EntitiesService, + ontologies: list[tuple[str, str | None]], +) -> str: + """Fetch and concatenate the OWL of every configured ontology. + + Args: + entities_service: Authenticated SDK service used for the REST call. + ontologies: ``(name, folder_key)`` pairs to fetch (pinned from config). + + Returns: + The concatenated ontology text ready for prompt injection, or ``""`` when + no ontologies are configured. Individual fetch failures degrade to a + short "unavailable, use entity schemas" note rather than raising, so a + missing ontology never fails the run. + """ + if not ontologies: + return "" + # Fetch concurrently — each fetch is independent; gather preserves order so + # the concatenation is deterministic. + blocks = await asyncio.gather( + *(_fetch_one(entities_service, name, folder) for name, folder in ontologies) + ) + return "\n\n".join(blocks) diff --git a/tests/agent/tools/test_datafabric_ontology_subgraph.py b/tests/agent/tools/test_datafabric_ontology_subgraph.py new file mode 100644 index 000000000..8d8385c0a --- /dev/null +++ b/tests/agent/tools/test_datafabric_ontology_subgraph.py @@ -0,0 +1,111 @@ +"""Tests for the ontology handling in the Data Fabric inner sub-graph. + +The ontology is fetched deterministically upstream and embedded in the inner +system prompt; the sub-graph itself only ever binds ``execute_sql``. Covers: +only execute_sql is bound, the ontology text is threaded into the prompt, and +dispatch/terminal logic in the tool node. +""" + +from unittest.mock import AsyncMock, MagicMock + +import pytest +from langchain_core.messages import AIMessage + +from uipath_langchain.agent.tools.datafabric_tool import datafabric_prompt_builder +from uipath_langchain.agent.tools.datafabric_tool.datafabric_subgraph import ( + DataFabricGraph, + DataFabricSubgraphState, +) + + +@pytest.fixture +def entities_service(): + es = MagicMock() + es.query_entity_records_async = AsyncMock(return_value=[{"x": 1}]) + return es + + +@pytest.fixture +def make_graph(monkeypatch, entities_service): + # Isolate from the prompt builder; we only exercise tools/routing here. + monkeypatch.setattr(datafabric_prompt_builder, "build", lambda *a, **k: "SYS") + + def _make(ontology_text=""): + return DataFabricGraph( + llm=MagicMock(), + entities=[], + entities_service=entities_service, + ontology_text=ontology_text, + ) + + return _make + + +def _tc(name, args=None, cid="c1"): + return {"name": name, "args": args or {}, "id": cid, "type": "tool_call"} + + +def test_ontology_text_threaded_into_prompt(monkeypatch, entities_service): + captured: dict = {} + monkeypatch.setattr( + datafabric_prompt_builder, + "build", + lambda *a, **k: captured.update(k) or "SYS", + ) + DataFabricGraph( + llm=MagicMock(), + entities=[], + entities_service=entities_service, + ontology_text="ONTOLOGY_XYZ", + ) + assert captured.get("ontology_text") == "ONTOLOGY_XYZ" + + +async def test_execute_tool_call_sql_with_rows_is_terminal(make_graph): + graph = make_graph() + msg, ok = await graph._execute_tool_call( + _tc("execute_sql", {"sql_query": "SELECT 1"}) + ) + assert ok is True + + +async def test_execute_tool_call_sql_no_rows_not_terminal(make_graph, entities_service): + entities_service.query_entity_records_async = AsyncMock(return_value=[]) + graph = make_graph() + msg, ok = await graph._execute_tool_call( + _tc("execute_sql", {"sql_query": "SELECT 1"}) + ) + assert ok is False + + +async def test_execute_tool_call_sql_value_error_becomes_error_dict(make_graph): + # execute_sql raises ValueError on multiple statements; it must be caught and + # turned into an error result (non-terminal), not propagated. + graph = make_graph() + msg, ok = await graph._execute_tool_call( + _tc("execute_sql", {"sql_query": "SELECT 1; SELECT 2"}) + ) + assert ok is False + assert "error" in str(msg.content) + + +async def test_tool_node_terminal_on_sql_rows(make_graph): + graph = make_graph() + ai = AIMessage( + content="", tool_calls=[_tc("execute_sql", {"sql_query": "SELECT 1"}, "a")] + ) + out = await graph.tool_node(DataFabricSubgraphState(messages=[ai])) + assert out["last_tool_success"] is True + assert len(out["messages"]) == 1 + assert out["messages"][0].name == "execute_sql" + + +def test_create_returns_compiled_graph(monkeypatch, entities_service): + monkeypatch.setattr(datafabric_prompt_builder, "build", lambda *a, **k: "SYS") + compiled = DataFabricGraph.create( + llm=MagicMock(), + entities=[], + entities_service=entities_service, + ontology_text="onto", + ) + assert hasattr(compiled, "ainvoke") diff --git a/tests/agent/tools/test_datafabric_tool_ontology_factory.py b/tests/agent/tools/test_datafabric_tool_ontology_factory.py new file mode 100644 index 000000000..208709a88 --- /dev/null +++ b/tests/agent/tools/test_datafabric_tool_ontology_factory.py @@ -0,0 +1,91 @@ +"""Tests for ontology resolution + (name, folder) mapping in the DF tool factory. + +Ontologies are configured inline on the Data Fabric context as a nested +``ontologySet`` (alongside the entity set). The caller resolves those items to +``(name, folder_key)`` pairs and passes them to the factory. +""" + +from types import SimpleNamespace +from unittest.mock import MagicMock + +from uipath.agent.models.agent import AgentContextResourceConfig +from uipath.platform.entities import DataFabricEntityItem + +from uipath_langchain.agent.tools.datafabric_tool.datafabric_tool import ( + create_datafabric_query_tool, + resolve_context_ontologies, +) + + +def _entity_resource(): + entity = DataFabricEntityItem.model_validate( + {"id": "e1", "referenceKey": "e1", "name": "LibraryLoan", "folderId": "f1"} + ) + return SimpleNamespace(entity_set=[entity], description="ctx") + + +# --- factory: passes resolved ontologies straight through to the handler --- + + +def test_factory_passes_ontologies_through(): + tool = create_datafabric_query_tool( + _entity_resource(), + MagicMock(), + ontologies=[("library", "f1")], + ) + assert tool.coroutine._ontologies == [("library", "f1")] # type: ignore[attr-defined] + + +def test_factory_no_ontologies_is_empty(): + tool = create_datafabric_query_tool(_entity_resource(), MagicMock()) + assert tool.coroutine._ontologies == [] # type: ignore[attr-defined] + + +# --- resolver: nested ontologySet → (name, folder) pairs --- + + +def _entity_ctx(): + return AgentContextResourceConfig.model_validate( + { + "$resourceType": "context", + "name": "Entities", + "description": "", + "contextType": "datafabricentityset", + "entitySet": [{"id": "e1", "name": "LibraryLoan", "folderId": "f1"}], + } + ) + + +def _ontology_ctx(ontology_set): + return AgentContextResourceConfig.model_validate( + { + "$resourceType": "context", + "name": "Ontologies", + "description": "", + "contextType": "datafabricontology", + "ontologySet": ontology_set, + } + ) + + +def test_resolve_gathers_ontology_context_items(): + # The agent has an entity context + a dedicated ontology context; only the + # ontology context's items are gathered, each as (name, folder_key). + resources = [ + _entity_ctx(), + _ontology_ctx( + [ + {"name": "library", "folderId": "f1"}, + {"name": "finance", "folderId": "f2"}, + ] + ), + ] + assert resolve_context_ontologies(resources) == [ + ("library", "f1"), + ("finance", "f2"), + ] + + +def test_resolve_no_ontology_context_is_empty(): + # Only an entity context, no ontology context → nothing to ground with. + assert resolve_context_ontologies([_entity_ctx()]) == [] diff --git a/tests/agent/tools/test_ontology_fetcher.py b/tests/agent/tools/test_ontology_fetcher.py new file mode 100644 index 000000000..d6dd6b0c3 --- /dev/null +++ b/tests/agent/tools/test_ontology_fetcher.py @@ -0,0 +1,82 @@ +"""Tests for ontology fetching (datafabric_tool/ontology_fetcher.py).""" + +from unittest.mock import AsyncMock, MagicMock + +from uipath_langchain.agent.tools.datafabric_tool import ontology_fetcher +from uipath_langchain.agent.tools.datafabric_tool.ontology_fetcher import ( + _notation_label, + fetch_ontology_text, +) + + +def _entities_service(content: str = "OWLDATA", media_type: str = "text/turtle"): + es = MagicMock() + es.get_ontology_file_async = AsyncMock( + return_value={"content": content, "mediaType": media_type} + ) + return es + + +# --- _notation_label ------------------------------------------------------- + + +def test_notation_label_turtle(): + assert _notation_label("text/turtle") == "Turtle" + assert _notation_label("application/ttl") == "Turtle" + + +def test_notation_label_functional(): + assert _notation_label("application/owl-functional") == "OWL Functional Notation" + assert _notation_label("text/ofn") == "OWL Functional Notation" + + +def test_notation_label_unknown_defaults(): + assert _notation_label("") == "Turtle or OWL Functional Notation" + assert _notation_label("application/json") == "Turtle or OWL Functional Notation" + + +# --- fetch_ontology_text --------------------------------------------------- + + +async def test_no_ontologies_returns_empty(): + assert await fetch_ontology_text(_entities_service(), []) == "" + + +async def test_single_ontology_returns_fenced_block(): + es = _entities_service(content="OWLBODY", media_type="text/turtle") + + result = await fetch_ontology_text(es, [("library", "folder-1")]) + + assert "ONTOLOGY: library" in result + assert "OWLBODY" in result + assert "Turtle" in result + es.get_ontology_file_async.assert_awaited_once_with("library", "owl", "folder-1") + + +async def test_multiple_ontologies_concatenated(): + es = _entities_service() + + result = await fetch_ontology_text(es, [("library", None), ("finance", "f2")]) + + assert "ONTOLOGY: library" in result + assert "ONTOLOGY: finance" in result + assert es.get_ontology_file_async.await_count == 2 + + +async def test_graceful_degrade_on_error(): + es = MagicMock() + es.get_ontology_file_async = AsyncMock(side_effect=RuntimeError("boom")) + + result = await fetch_ontology_text(es, [("library", None)]) + + assert "unavailable" in result + assert "RuntimeError" in result # the exception type is surfaced, not raised + + +async def test_oversized_owl_is_degraded(monkeypatch): + monkeypatch.setattr(ontology_fetcher, "_MAX_OWL_BYTES", 5) + es = _entities_service(content="0123456789") # 10 bytes > cap + + result = await fetch_ontology_text(es, [("library", None)]) + + assert "unavailable" in result