From 80886b27f69cb0b28f0738c320a136ad05662cc6 Mon Sep 17 00:00:00 2001 From: Harshit Rohatgi Date: Sat, 27 Jun 2026 11:33:27 +0530 Subject: [PATCH 1/6] feat: add Data Fabric native entity write tool (P1 LDO writes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a write_datafabric tool alongside the existing query_datafabric read tool for structured CRUD (insert/update/delete) against native Data Fabric entities. Writes use structured mutation intent delegated to EntitiesService native CRUD — no LLM-generated DML. Key components: - DataFabricWriteInput / WriteResult / EntityWriteSchema models - is_entity_writable: native-only (excludes federated, ChoiceSet, system) - derive_writable_fields: filters system/hidden/PK/attachment fields, surfaces ChoiceSet bindings - validate_mutation_intent: entity allowlist, required-field and field-allowlist checks, record_id requirements per operation - WriteExecutor: insert/update/delete via EntitiesService - build_write_tool_description: NL intermediate representation for the tool description (replaces raw OWL injection per write RFC v2) - DataFabricWriteHandler: lazy entity resolution; writability enforced after async resolution since entity_type/external_fields are only on resolved Entity objects - create_datafabric_tools: returns [read_tool, write_tool] - HITL: require_conversational_confirmation propagated for conversational agents 87 tests including the contact-center refund hero case (read 4 entities, decide, write RefundRequest + update Order/CustomerRisk/Contact). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../agent/tools/context_tool.py | 6 +- .../agent/tools/datafabric_tool/__init__.py | 4 +- .../datafabric_prompt_builder.py | 41 + .../tools/datafabric_tool/datafabric_tool.py | 212 ++++- .../agent/tools/datafabric_tool/models.py | 75 ++ .../tools/datafabric_tool/write_executor.py | 101 +++ .../datafabric_tool/write_schema_builder.py | 84 ++ .../tools/datafabric_tool/write_validation.py | 152 ++++ .../agent/tools/tool_factory.py | 11 +- tests/agent/tools/datafabric_tool/__init__.py | 0 .../test_refund_agent_integ.py | 778 ++++++++++++++++++ .../datafabric_tool/test_write_executor.py | 149 ++++ .../datafabric_tool/test_write_integration.py | 598 ++++++++++++++ .../test_write_schema_builder.py | 278 +++++++ .../datafabric_tool/test_write_validation.py | 433 ++++++++++ 15 files changed, 2914 insertions(+), 8 deletions(-) create mode 100644 src/uipath_langchain/agent/tools/datafabric_tool/write_executor.py create mode 100644 src/uipath_langchain/agent/tools/datafabric_tool/write_schema_builder.py create mode 100644 src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py create mode 100644 tests/agent/tools/datafabric_tool/__init__.py create mode 100644 tests/agent/tools/datafabric_tool/test_refund_agent_integ.py create mode 100644 tests/agent/tools/datafabric_tool/test_write_executor.py create mode 100644 tests/agent/tools/datafabric_tool/test_write_integration.py create mode 100644 tests/agent/tools/datafabric_tool/test_write_schema_builder.py create mode 100644 tests/agent/tools/datafabric_tool/test_write_validation.py diff --git a/src/uipath_langchain/agent/tools/context_tool.py b/src/uipath_langchain/agent/tools/context_tool.py index c22906835..c9a8446e1 100644 --- a/src/uipath_langchain/agent/tools/context_tool.py +++ b/src/uipath_langchain/agent/tools/context_tool.py @@ -155,16 +155,16 @@ def create_context_tool( resource: AgentContextResourceConfig, llm: BaseChatModel | None = None, agent: LowCodeAgentDefinition | None = None, -) -> StructuredTool | BaseTool | None: +) -> StructuredTool | BaseTool | list[BaseTool] | None: tool_name = sanitize_tool_name(resource.name) if resource.context_type == AgentContextType.DATA_FABRIC_ENTITY_SET: if llm is None: raise ValueError("Data Fabric entity set tools require an LLM instance") - from .datafabric_tool import create_datafabric_query_tool + from .datafabric_tool import create_datafabric_tools from .datafabric_tool.datafabric_tool import BASE_SYSTEM_PROMPT - return create_datafabric_query_tool( + return create_datafabric_tools( resource, llm, tool_name=tool_name, diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/__init__.py b/src/uipath_langchain/agent/tools/datafabric_tool/__init__.py index fccbda389..04f16c120 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/__init__.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/__init__.py @@ -1,9 +1,11 @@ -"""Data Fabric tool module for entity-based SQL queries.""" +"""Data Fabric tool module for entity-based SQL queries and writes.""" from .datafabric_tool import ( create_datafabric_query_tool, + create_datafabric_tools, ) __all__ = [ "create_datafabric_query_tool", + "create_datafabric_tools", ] diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py index 8154caf5e..a497e0f3f 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py @@ -17,11 +17,14 @@ from .models import ( EntitySchema, EntitySQLContext, + EntityWriteSchema, FieldSchema, QueryPattern, SQLContext, ) from .prompts import build_prompt_context, get_prompt_version +from .write_schema_builder import build_write_tool_description +from .write_validation import derive_writable_fields, is_entity_writable logger = logging.getLogger(__name__) @@ -223,3 +226,41 @@ def build( prompt_version=prompt_version, ) return format_sql_context(ctx) + + +def build_write_context(entities: list[Entity]) -> str: + """Build write-relevant schema context for the system prompt. + + Generates a natural-language description of writable entities, their + fields with types and constraints, ChoiceSet indicators, and allowed + operations. This is appended to the outer agent's system prompt so + the LLM knows which entities can be written to and how. + + Args: + entities: Resolved Entity objects from the platform. + + Returns: + Formatted markdown string describing writable entities and + their schemas, or an empty string if no entities are writable. + """ + write_schemas: dict[str, EntityWriteSchema] = {} + for entity in entities: + if not is_entity_writable(entity): + continue + writable_fields = derive_writable_fields(entity) + if writable_fields: + write_schemas[entity.name] = EntityWriteSchema( + entity_key=entity.name, + display_name=entity.display_name or entity.name, + writable_fields=writable_fields, + ) + + if not write_schemas: + return "" + + lines: list[str] = [ + "## Writable Data Fabric Entities", + "", + build_write_tool_description(write_schemas), + ] + return "\n".join(lines) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py index aab4e4cfc..08198ffde 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py @@ -1,17 +1,21 @@ """Data Fabric tool creation and resource detection. This module provides an agentic ``query_datafabric`` tool with an inner -LLM sub-graph. +LLM sub-graph, and a ``write_datafabric`` tool for entity CRUD operations. -The tool accepts natural language queries, runs an inner LangGraph +The read tool accepts natural language queries, runs an inner LangGraph sub-graph for SQL generation + execution + self-correction, and returns a natural language answer. +The write tool accepts structured write intents (insert/update/delete) +with schema-level validation for context-derived entities. + Prompt building is in ``datafabric_prompt_builder.py``. Sub-graph definition is in ``datafabric_subgraph.py``. """ import asyncio +import json import logging from typing import Any @@ -23,7 +27,17 @@ from uipath.platform.entities import DataFabricEntityItem from ..base_uipath_structured_tool import BaseUiPathStructuredTool -from .models import DataFabricQueryInput +from .models import ( + DataFabricQueryInput, + DataFabricWriteInput, + EntityWriteSchema, +) +from .write_schema_builder import build_write_tool_description +from .write_validation import ( + derive_writable_fields, + is_entity_writable, + validate_mutation_intent, +) logger = logging.getLogger(__name__) @@ -139,6 +153,116 @@ def _format_terminal_tool_messages(tool_messages: list[ToolMessage]) -> str: ) +class DataFabricWriteHandler: + """Manages lazy initialization and invocation of Data Fabric write operations. + + On first call, resolves entity schemas via the platform layer and builds + EntityWriteSchema objects for context-derived entities. Subsequent calls + reuse the cached schemas and executor. + """ + + def __init__( + self, + entity_set: list[DataFabricEntityItem], + ) -> None: + self._entity_set = entity_set + self._write_schemas: dict[str, EntityWriteSchema] | None = None + self._write_tool_description: str | None = None + self._executor: Any | None = None + self._init_lock = asyncio.Lock() + + async def _ensure_initialized(self) -> None: + """Lazy-init: resolve entities and build write schemas on first call.""" + if self._executor is not None: + return + + async with self._init_lock: + if self._executor is not None: + return + + from uipath.platform import UiPath + + from .write_executor import WriteExecutor + + sdk = UiPath() + resolution = await sdk.entities.resolve_entity_set_async(self._entity_set) + if not resolution.entities: + raise ValueError( + "No Data Fabric entity schemas could be fetched. " + "Check entity identifiers and permissions." + ) + + self._write_schemas = {} + for entity in resolution.entities: + if not is_entity_writable(entity): + continue + writable = derive_writable_fields(entity) + self._write_schemas[entity.name] = EntityWriteSchema( + entity_key=entity.name, + display_name=entity.display_name or entity.name, + writable_fields=writable, + ) + + self._write_tool_description = build_write_tool_description( + self._write_schemas + ) + + self._executor = WriteExecutor(resolution.entities_service) + + async def __call__( + self, + entity_key: str, + operation: str, + record_id: str | None = None, + fields: dict[str, Any] | None = None, + ) -> str: + """Execute a write operation against a Data Fabric entity. + + Args: + entity_key: The entity name to write to. + operation: One of 'insert', 'update', 'delete'. + record_id: Record ID (required for update/delete). + fields: Field name-value pairs (required for insert/update). + + Returns: + JSON string with the WriteResult. + """ + logger.debug( + "write_datafabric called: entity=%s op=%s record_id=%s", + entity_key, + operation, + record_id, + ) + + await self._ensure_initialized() + + intent = DataFabricWriteInput( + entity_key=entity_key, + operation=operation, + record_id=record_id, + fields=fields, + ) + + # Validate + errors = validate_mutation_intent(intent, self._write_schemas) + if errors: + return json.dumps( + { + "success": False, + "operation": operation, + "entity_key": entity_key, + "errors": errors, + } + ) + + # Execute + from .write_executor import WriteExecutor + + assert isinstance(self._executor, WriteExecutor) + result = await self._executor.execute(intent) + return result.model_dump_json() + + def create_datafabric_query_tool( resource: AgentContextResourceConfig, llm: BaseChatModel, @@ -185,3 +309,85 @@ def create_datafabric_query_tool( coroutine=handler, metadata={"tool_type": "datafabric_sql"}, ) + + +def _build_initial_write_tool_description( + entity_set: list[DataFabricEntityItem], +) -> str: + """Build a pre-resolution description for the write tool from the entity set. + + This is the description used at tool-creation time, before entity + schemas have been lazily resolved. It lists entity names and + descriptions from the ``DataFabricEntityItem`` objects available + in the agent config. After first invocation the handler builds a + richer field-level description via ``build_write_tool_description``. + """ + entity_lines = [] + for e in entity_set: + line = f"- {e.name}" + if e.description: + line += f": {e.description}" + entity_lines.append(line) + entity_summary = "\n".join(entity_lines) + + return ( + "Modify Data Fabric entities using structured operations " + "(insert, update, delete).\n\n" + "Available entities:\n" + f"{entity_summary}\n\n" + "Operations:\n" + "- insert: provide entity_key and fields. " + "All required fields must be included.\n" + "- update: provide entity_key, record_id (from a prior read), " + "and fields to change.\n" + "- delete: provide entity_key and record_id. Requires confirmation.\n\n" + "Query the entity first (using the read tool) to discover record IDs " + "and current field values before updating or deleting." + ) + + +def create_datafabric_tools( + resource: AgentContextResourceConfig, + llm: BaseChatModel, + tool_name: str = "query_datafabric", + agent_config: dict[str, str] | None = None, +) -> list[BaseTool]: + """Create both read and write Data Fabric tools. + + Returns a list containing: + 1. The ``query_datafabric`` read tool (NL-to-SQL subgraph) + 2. The ``write_datafabric`` write tool (structured CRUD) + + Args: + resource: The Data Fabric context resource configuration. + llm: The language model for the inner SQL generation loop. + tool_name: Sanitized tool name for the read tool. + agent_config: Optional dict with agent-level config. + """ + # Read tool (unchanged) + read_tool = create_datafabric_query_tool( + resource, llm, tool_name=tool_name, agent_config=agent_config + ) + + # Write tool — always created; writability is enforced at handler level + # after async entity resolution (entity_type / external_fields are only + # available on resolved Entity objects, not on DataFabricEntityItem). + entity_set = [ + DataFabricEntityItem.model_validate(item.model_dump(by_alias=True)) + for item in (resource.entity_set or []) + ] + write_handler = DataFabricWriteHandler(entity_set=entity_set) + write_tool_name = f"{tool_name}_write" + + write_tool = BaseUiPathStructuredTool( + name=write_tool_name, + description=_build_initial_write_tool_description(entity_set), + args_schema=DataFabricWriteInput, + coroutine=write_handler, + metadata={ + "tool_type": "datafabric_write", + "require_conversational_confirmation": True, + }, + ) + + return [read_tool, write_tool] diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/models.py b/src/uipath_langchain/agent/tools/datafabric_tool/models.py index 09f4436ee..30db0d3f7 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/models.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/models.py @@ -1,5 +1,8 @@ """Pydantic models for Data Fabric entity schemas.""" +from enum import Enum +from typing import Any, Optional + from pydantic import BaseModel, Field NUMERIC_TYPES = frozenset({"int", "decimal", "float", "double", "bigint"}) @@ -94,3 +97,75 @@ class DataFabricExecuteSqlInput(BaseModel): "Use exact table and column names from the entity schemas." ), ) + + +# --------------------------------------------------------------------------- +# Write models +# --------------------------------------------------------------------------- + + +class EntityWriteOperation(str, Enum): + """Supported write operations on Data Fabric entities.""" + + insert = "insert" + update = "update" + delete = "delete" + + +class DataFabricWriteInput(BaseModel): + """Input schema for write operations against Data Fabric entities. + + This is the tool args schema presented to the LLM. + """ + + entity_key: str = Field( + ..., + description="The entity name (table name) to write to.", + ) + operation: EntityWriteOperation = Field( + ..., + description="The write operation: 'insert', 'update', or 'delete'.", + ) + record_id: Optional[str] = Field( + default=None, + description="The record ID. Required for update and delete operations.", + ) + fields: Optional[dict[str, Any]] = Field( + default=None, + description=( + "Field name-value pairs for the record. " + "Required for insert and update operations." + ), + ) + + +class WriteResult(BaseModel): + """Result of a write operation against a Data Fabric entity.""" + + success: bool + operation: str + entity_key: str + record_id: Optional[str] = None + record: Optional[dict[str, Any]] = None + error: Optional[str] = None + + +class WritableFieldInfo(BaseModel): + """Schema information for a writable field on an entity.""" + + name: str + display_name: str + type_name: str + is_required: bool + description: Optional[str] = None + choiceset_id: Optional[str] = None + allowed_values: Optional[list[str]] = None + is_choiceset: bool = False + + +class EntityWriteSchema(BaseModel): + """Pre-resolved write schema for a context-derived entity.""" + + entity_key: str + display_name: str + writable_fields: list[WritableFieldInfo] diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/write_executor.py b/src/uipath_langchain/agent/tools/datafabric_tool/write_executor.py new file mode 100644 index 000000000..d64f9c380 --- /dev/null +++ b/src/uipath_langchain/agent/tools/datafabric_tool/write_executor.py @@ -0,0 +1,101 @@ +"""Write executor for Data Fabric entity CRUD operations. + +Wraps EntitiesService single-record methods (insert, update, delete) +and translates results/exceptions into WriteResult objects. +""" + +from __future__ import annotations + +import logging + +from uipath.platform.entities import EntitiesService + +from .models import DataFabricWriteInput, EntityWriteOperation, WriteResult + +logger = logging.getLogger(__name__) + + +class WriteExecutor: + """Executes validated write intents against the Data Fabric API. + + Uses single-record methods which fire Data Fabric triggers. + Batch methods are reserved for a future phase. + + Args: + entities_service: The resolved EntitiesService instance. + """ + + def __init__(self, entities_service: EntitiesService) -> None: + self._entities_service = entities_service + + async def execute(self, intent: DataFabricWriteInput) -> WriteResult: + """Execute a write operation and return the result. + + Args: + intent: A validated DataFabricWriteInput. + + Returns: + WriteResult with success/failure info and the affected record. + """ + op = intent.operation + try: + if op == EntityWriteOperation.insert: + record = await self._entities_service.insert_record_async( + intent.entity_key, intent.fields + ) + return WriteResult( + success=True, + operation=op.value, + entity_key=intent.entity_key, + record_id=record.id, + record=record.model_dump(by_alias=True), + ) + + elif op == EntityWriteOperation.update: + assert intent.record_id is not None + record = await self._entities_service.update_record_async( + intent.entity_key, intent.record_id, intent.fields + ) + return WriteResult( + success=True, + operation=op.value, + entity_key=intent.entity_key, + record_id=record.id, + record=record.model_dump(by_alias=True), + ) + + elif op == EntityWriteOperation.delete: + assert intent.record_id is not None + await self._entities_service.delete_record_async( + intent.entity_key, intent.record_id + ) + return WriteResult( + success=True, + operation=op.value, + entity_key=intent.entity_key, + record_id=intent.record_id, + ) + + else: + return WriteResult( + success=False, + operation=str(op), + entity_key=intent.entity_key, + error=f"Unsupported operation: {op}", + ) + + except Exception as exc: + logger.warning( + "Data Fabric write failed: entity=%s op=%s error=%s", + intent.entity_key, + op.value, + exc, + exc_info=True, + ) + return WriteResult( + success=False, + operation=op.value, + entity_key=intent.entity_key, + record_id=intent.record_id, + error=str(exc), + ) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/write_schema_builder.py b/src/uipath_langchain/agent/tools/datafabric_tool/write_schema_builder.py new file mode 100644 index 000000000..df98c0e1f --- /dev/null +++ b/src/uipath_langchain/agent/tools/datafabric_tool/write_schema_builder.py @@ -0,0 +1,84 @@ +"""Natural-language write schema builder for Data Fabric tool descriptions. + +Converts resolved ``EntityWriteSchema`` objects into a token-efficient, +LLM-native tool description. This replaces raw OWL injection — the +intermediate representation pattern from the RFC §5.6. + +The generated description is consumed by the outer agent as the write +tool's ``description`` field, giving the LLM structured knowledge of +which entities are writable, their fields, types, constraints, and +allowed operations. +""" + +from __future__ import annotations + +from .models import EntityWriteSchema + + +def build_write_tool_description( + write_schemas: dict[str, EntityWriteSchema], + entity_access: dict[str, set[str]] | None = None, +) -> str: + """Build a natural-language write tool description from resolved entity schemas. + + This replaces raw OWL injection. The description is token-efficient and + LLM-native, following the intermediate representation pattern from the RFC. + + Args: + write_schemas: Mapping of entity_key -> EntityWriteSchema for writable + entities. These carry the field-level detail (name, type, + required flag, choiceset indicator). + entity_access: Optional mapping of entity_key -> set of allowed + operations (e.g. ``{"insert", "update"}``). When provided the + description lists allowed ops per entity; otherwise all three + operations are assumed available. + + Returns: + A multi-line markdown-ish description string suitable for use as + a LangChain tool ``description``. + """ + if not write_schemas: + return ( + "Modify Data Fabric entities using structured operations " + "(insert, update, delete).\n\n" + "No writable entities are currently configured." + ) + + lines: list[str] = [ + "Modify Data Fabric entities using structured operations " + "(insert, update, delete).", + ] + + all_ops = {"insert", "update", "delete"} + + for entity_key, schema in sorted(write_schemas.items()): + ops = entity_access.get(entity_key, all_ops) if entity_access else all_ops + ops_str = ", ".join(sorted(ops)) + lines.append(f"\n### {schema.display_name} ({ops_str})") + + for field in schema.writable_fields: + parts: list[str] = [field.type_name.upper()] + if field.is_required: + parts.append("required") + if field.is_choiceset: + parts.append("CHOICE_SET") + field_desc = f" - {field.name} ({', '.join(parts)})" + lines.append(field_desc) + + lines.append("") + lines.append("Operations:") + lines.append( + "- insert: provide entity_key and fields. All required fields must be included." + ) + lines.append( + "- update: provide entity_key, record_id (from a prior read), " + "and fields to change." + ) + lines.append("- delete: provide entity_key and record_id. Requires confirmation.") + lines.append("") + lines.append( + "Query the entity first (using the read tool) to discover record IDs " + "and current field values before updating or deleting." + ) + + return "\n".join(lines) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py b/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py new file mode 100644 index 000000000..ce3d6ef60 --- /dev/null +++ b/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py @@ -0,0 +1,152 @@ +"""Validation logic for Data Fabric write operations. + +Provides entity writability checks, schema-derived field filtering, +and mutation intent validation. Only native Data Fabric entities are +writable; writes to context-derived writable entities get strict +field-level validation. +""" + +from __future__ import annotations + +from uipath.platform.entities import Entity + +from .models import ( + DataFabricWriteInput, + EntityWriteOperation, + EntityWriteSchema, + WritableFieldInfo, +) + + +def is_entity_writable(entity: Entity) -> bool: + """Check if an entity supports writes via EntitiesService CRUD. + + Only native Data Fabric entities are writable. Federated entities + (with external_fields), ChoiceSets, SystemEntities, and InternalEntities + are not writable through this path. + """ + # Only "Entity" type is writable (not ChoiceSet, SystemEntity, InternalEntity) + if entity.entity_type != "Entity": + return False + # Federated entities have external_fields — writes go to source system, not DF + if entity.external_fields: + return False + return True + + +def derive_writable_fields(entity: Entity) -> list[WritableFieldInfo]: + """Extract writable fields from an Entity's metadata. + + Filters out system fields, hidden fields, primary keys, and attachment + fields — these are not user-settable via the write API. + + Returns an empty list for non-writable entities (federated, ChoiceSet, etc.). + + Args: + entity: A resolved Entity object with field metadata. + + Returns: + List of WritableFieldInfo for fields the LLM may write to. + """ + if not is_entity_writable(entity): + return [] + + writable: list[WritableFieldInfo] = [] + for field in entity.fields or []: + if ( + field.is_system_field + or field.is_hidden_field + or field.is_primary_key + or field.is_attachment + ): + continue + type_name = field.sql_type.name if field.sql_type else "unknown" + choiceset_id = getattr(field, "choiceset_id", None) or None + writable.append( + WritableFieldInfo( + name=field.name, + display_name=field.display_name, + type_name=type_name, + is_required=field.is_required, + description=field.description, + choiceset_id=choiceset_id if choiceset_id else None, + is_choiceset=bool(choiceset_id), + ) + ) + return writable + + +def validate_mutation_intent( + intent: DataFabricWriteInput, + write_schemas: dict[str, EntityWriteSchema] | None = None, +) -> list[str]: + """Validate a write intent before executing. + + v1 only writes to context-derived writable entities. If the target + entity is not present in *write_schemas* the request is rejected. + + Args: + intent: The write operation intent to validate. + write_schemas: Mapping of entity_key -> EntityWriteSchema + for writable context-derived entities. + + Returns: + Empty list if valid; list of human-readable error strings otherwise. + """ + errors: list[str] = [] + op = intent.operation + + # Entity must be in the writable set + schemas = write_schemas or {} + schema = schemas.get(intent.entity_key) + if schema is None: + writable_list = sorted(schemas.keys()) if schemas else [] + errors.append( + f"Entity '{intent.entity_key}' is not configured for writes. " + f"Writable entities: {writable_list}" + ) + return errors + + # Structural: DELETE and UPDATE require record_id + if op in (EntityWriteOperation.delete, EntityWriteOperation.update): + if not intent.record_id: + errors.append( + f"'{op.value}' operation requires 'record_id'. " + f"Query the entity first to obtain the record ID." + ) + + # Structural: INSERT and UPDATE require fields + if op in (EntityWriteOperation.insert, EntityWriteOperation.update): + if not intent.fields: + errors.append( + f"'{op.value}' operation requires 'fields' with at least one " + f"field name-value pair." + ) + + # If structural errors exist, return early — field-level checks need fields + if errors: + return errors + + # Strict mode: validate against pre-resolved writable fields + if intent.fields: + writable_names = {f.name for f in schema.writable_fields} + unknown = set(intent.fields.keys()) - writable_names + if unknown: + available = ", ".join(sorted(writable_names)) + errors.append( + f"Unknown field(s) for entity '{intent.entity_key}': " + f"{', '.join(sorted(unknown))}. " + f"Available writable fields: {available}" + ) + + # INSERT: enforce required fields from metadata + if op == EntityWriteOperation.insert and intent.fields: + required_names = {f.name for f in schema.writable_fields if f.is_required} + missing = required_names - set(intent.fields.keys()) + if missing: + errors.append( + f"INSERT on '{intent.entity_key}' requires field(s): " + f"{', '.join(sorted(missing))}" + ) + + return errors diff --git a/src/uipath_langchain/agent/tools/tool_factory.py b/src/uipath_langchain/agent/tools/tool_factory.py index f6a7fb4b7..90f126686 100644 --- a/src/uipath_langchain/agent/tools/tool_factory.py +++ b/src/uipath_langchain/agent/tools/tool_factory.py @@ -79,7 +79,16 @@ async def create_tools_from_resources( ) if tool is not None: if isinstance(tool, list): - tools.extend(tool) + for t in tool: + tools.append(t) + if agent.is_conversational: + props = getattr(resource, "properties", None) + if props and getattr( + props, REQUIRE_CONVERSATIONAL_CONFIRMATION, False + ): + if t.metadata is None: + t.metadata = {} + t.metadata[REQUIRE_CONVERSATIONAL_CONFIRMATION] = True else: tools.append(tool) diff --git a/tests/agent/tools/datafabric_tool/__init__.py b/tests/agent/tools/datafabric_tool/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/agent/tools/datafabric_tool/test_refund_agent_integ.py b/tests/agent/tools/datafabric_tool/test_refund_agent_integ.py new file mode 100644 index 000000000..8eae25e27 --- /dev/null +++ b/tests/agent/tools/datafabric_tool/test_refund_agent_integ.py @@ -0,0 +1,778 @@ +"""Integration test: simulates the refund agent's tool-calling pattern. + +Proves the AGENT plane works for the Contact Center Refund Agent hero case. +The LLM reads entities via ``query_datafabric`` and writes via +``query_datafabric_write``. This test mocks the platform layer and +invokes the real write handler directly with structured args, verifying +that the correct EntitiesService methods are called with the correct +arguments. + +Hero case entities (from RFC p1-write-rfc-v2-ontology.md): + - Customer (federated / read-only) + - Contact (native / read + write) + - Order (native / read + write) + - CustomerRisk (native / read + write) + - RefundRequest (native / write - insert) +""" + +from __future__ import annotations + +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from uipath_langchain.agent.tools.datafabric_tool import create_datafabric_tools + +# --------------------------------------------------------------------------- +# Helpers (same patterns as test_write_integration.py / test_write_validation.py) +# --------------------------------------------------------------------------- + + +def _make_field( + name: str, + display_name: str | None = None, + is_system_field: bool = False, + is_hidden_field: bool = False, + is_primary_key: bool = False, + is_attachment: bool = False, + is_required: bool = False, + sql_type_name: str = "varchar", + description: str | None = None, + choiceset_id: str | None = None, +) -> MagicMock: + """Create a mock FieldMetadata object.""" + field = MagicMock() + field.name = name + field.display_name = display_name or name + field.is_system_field = is_system_field + field.is_hidden_field = is_hidden_field + field.is_primary_key = is_primary_key + field.is_attachment = is_attachment + field.is_required = is_required + field.description = description + field.choiceset_id = choiceset_id + sql_type = MagicMock() + sql_type.name = sql_type_name + field.sql_type = sql_type + return field + + +def _make_entity( + name: str, + fields: list[MagicMock], + entity_type: str = "Entity", + external_fields: list | None = None, + display_name: str | None = None, +) -> MagicMock: + """Create a mock Entity object.""" + entity = MagicMock() + entity.name = name + entity.display_name = display_name or name + entity.fields = fields + entity.entity_type = entity_type + entity.external_fields = external_fields + return entity + + +def _make_resolution(entities: list[MagicMock]) -> MagicMock: + """Create a mock resolution result from resolve_entity_set_async.""" + resolution = MagicMock() + resolution.entities = entities + resolution.entities_service = MagicMock() + return resolution + + +def _make_record(record_id: str = "rec-new", data: dict | None = None) -> MagicMock: + """Create a mock record returned by insert/update.""" + record = MagicMock() + record.id = record_id + record.model_dump.return_value = {"Id": record_id, **(data or {})} + return record + + +def _mock_llm() -> MagicMock: + return MagicMock() + + +# --------------------------------------------------------------------------- +# Hero-case entity factory +# --------------------------------------------------------------------------- + + +def _make_refund_entities() -> list[MagicMock]: + """Build the 5 entities from the refund agent hero case. + + - Customer: federated (external_fields non-empty), read-only + - Contact: native, writable (ContactReason, RequestedRefundAmount, + OrderId, ResolutionStatus) + - Order: native, writable (OrderNumber, TotalAmount, Status) + - CustomerRisk: native, writable (RiskScore, LifetimeValue, FraudFlag) + - RefundRequest: native, writable (ApprovedAmount[required], + Reason[required], OrderId, CustomerId, Status) + """ + customer = _make_entity( + "Customer", + [ + _make_field("Id", is_primary_key=True), + _make_field("CreatedOn", is_system_field=True), + _make_field("Name", description="Customer full name"), + _make_field("AccountTier", description="Gold/Silver/Bronze"), + ], + entity_type="Entity", + external_fields=[{"source": "salesforce"}], # federated + display_name="Customer", + ) + + contact = _make_entity( + "Contact", + [ + _make_field("Id", is_primary_key=True), + _make_field("CreatedOn", is_system_field=True), + _make_field( + "ContactReason", + choiceset_id="ContactReasonCS", + description="Reason for contact", + ), + _make_field( + "RequestedRefundAmount", + sql_type_name="decimal", + description="Requested refund amount", + ), + _make_field("OrderId", description="References Order"), + _make_field( + "ResolutionStatus", + choiceset_id="ResolutionStatusCS", + description="Open, Approved, Denied, Escalated", + ), + ], + display_name="Contact", + ) + + order = _make_entity( + "Order", + [ + _make_field("Id", is_primary_key=True), + _make_field("CreatedOn", is_system_field=True), + _make_field("OrderNumber", is_required=True), + _make_field("TotalAmount", sql_type_name="decimal"), + _make_field( + "Status", + choiceset_id="OrderStatusCS", + description="Placed, Shipped, Delivered, Returned, Cancelled", + ), + ], + display_name="Order", + ) + + customer_risk = _make_entity( + "CustomerRisk", + [ + _make_field("Id", is_primary_key=True), + _make_field("CreatedOn", is_system_field=True), + _make_field( + "RiskScore", + sql_type_name="int", + description="Numeric risk score, additive", + ), + _make_field( + "LifetimeValue", + sql_type_name="decimal", + description="Customer lifetime value", + ), + _make_field("FraudFlag", sql_type_name="bit", description="Fraud flag"), + ], + display_name="Customer Risk", + ) + + refund_request = _make_entity( + "RefundRequest", + [ + _make_field("Id", is_primary_key=True), + _make_field("CreatedOn", is_system_field=True), + _make_field( + "ApprovedAmount", + sql_type_name="decimal", + is_required=True, + description="Approved refund amount", + ), + _make_field( + "Reason", + is_required=True, + description="Reason for refund", + ), + _make_field("OrderId", description="References Order"), + _make_field("CustomerId", description="References Customer"), + _make_field( + "Status", + choiceset_id="RefundStatusCS", + description="Pending, Processed, Failed", + ), + ], + display_name="Refund Request", + ) + + return [customer, contact, order, customer_risk, refund_request] + + +def _make_refund_context_resource(): + """Build an AgentContextResourceConfig for the refund hero case.""" + from uipath.agent.models.agent import ( + AgentContextResourceConfig, + AgentContextType, + ) + from uipath.platform.entities import DataFabricEntityItem + + items = [ + { + "id": "e-cust", + "name": "Customer", + "folderId": "f1", + "description": "Customer from CRM", + }, + { + "id": "e-contact", + "name": "Contact", + "folderId": "f1", + "description": "Inbound contact", + }, + { + "id": "e-order", + "name": "Order", + "folderId": "f1", + "description": "Order records", + }, + { + "id": "e-risk", + "name": "CustomerRisk", + "folderId": "f1", + "description": "Risk scoring", + }, + { + "id": "e-refund", + "name": "RefundRequest", + "folderId": "f1", + "description": "Refund records", + }, + ] + entity_set = [DataFabricEntityItem.model_validate(item) for item in items] + return AgentContextResourceConfig( + name="refund_data", + description="Refund agent data fabric", + resource_type="context", + context_type=AgentContextType.DATA_FABRIC_ENTITY_SET, + entity_set=entity_set, + is_enabled=True, + ) + + +# --------------------------------------------------------------------------- +# Shared fixture: patched SDK returning refund entities +# --------------------------------------------------------------------------- + + +@pytest.fixture +def refund_resolution(): + """Resolution with all 5 refund hero case entities.""" + entities = _make_refund_entities() + return _make_resolution(entities) + + +@pytest.fixture +def refund_tools(refund_resolution): + """Create read + write tools with mocked SDK resolution.""" + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=refund_resolution + ) + mock_uipath_cls.return_value = mock_sdk + resource = _make_refund_context_resource() + tools = create_datafabric_tools(resource, _mock_llm()) + # Yield both the tools and the resolution so tests can set up + # EntitiesService mocks and verify calls. + yield tools, refund_resolution + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestRefundAgentFlow: + """Integration test: simulates the refund agent's tool-calling pattern.""" + + # -- Tool creation -- + + @pytest.mark.asyncio + async def test_write_tool_created_for_writable_entities(self, refund_tools) -> None: + """create_datafabric_tools returns read + write tools. + Write tool excludes federated Customer from write schemas. + """ + tools, resolution = refund_tools + assert len(tools) == 2 + + write_tool = tools[1] + assert write_tool.metadata is not None + assert write_tool.metadata["tool_type"] == "datafabric_write" + + # Invoke once to trigger lazy init (with a simple validation-error call) + resolution.entities_service.insert_record_async = AsyncMock() + result_str = await write_tool.ainvoke( + { + "entity_key": "Customer", + "operation": "insert", + "fields": {"Name": "Test"}, + } + ) + result = json.loads(result_str) + # Customer is federated -> not configured for writes + assert result["success"] is False + assert any("not configured for writes" in e for e in result["errors"]) + + @pytest.mark.asyncio + async def test_write_tool_description_contains_writable_entities( + self, refund_tools + ) -> None: + """After lazy init, handler has write schemas for Contact, Order, + CustomerRisk, RefundRequest but NOT Customer. + """ + tools, resolution = refund_tools + write_tool = tools[1] + + # Trigger lazy init + resolution.entities_service.insert_record_async = AsyncMock() + await write_tool.ainvoke( + { + "entity_key": "__trigger_init__", + "operation": "insert", + "fields": {"x": 1}, + } + ) + + # Access the handler's resolved write_schemas + handler = write_tool.coroutine + assert handler._write_schemas is not None + writable_keys = set(handler._write_schemas.keys()) + assert "Contact" in writable_keys + assert "Order" in writable_keys + assert "CustomerRisk" in writable_keys + assert "RefundRequest" in writable_keys + assert "Customer" not in writable_keys + + # -- Individual write operations -- + + @pytest.mark.asyncio + async def test_insert_refund_request(self, refund_tools) -> None: + """Agent creates a RefundRequest record with correct fields.""" + tools, resolution = refund_tools + write_tool = tools[1] + + insert_record = _make_record( + "refund-001", + { + "ApprovedAmount": 200.00, + "Reason": "Auto-approved: risk score below threshold", + "OrderId": "order-uuid", + "CustomerId": "customer-uuid", + "Status": "Pending", + }, + ) + resolution.entities_service.insert_record_async = AsyncMock( + return_value=insert_record + ) + + result_str = await write_tool.ainvoke( + { + "entity_key": "RefundRequest", + "operation": "insert", + "fields": { + "ApprovedAmount": 200.00, + "Reason": "Auto-approved: risk score below threshold", + "OrderId": "order-uuid", + "CustomerId": "customer-uuid", + "Status": "Pending", + }, + } + ) + result = json.loads(result_str) + + assert result["success"] is True + assert result["operation"] == "insert" + assert result["entity_key"] == "RefundRequest" + assert result["record_id"] == "refund-001" + + resolution.entities_service.insert_record_async.assert_awaited_once_with( + "RefundRequest", + { + "ApprovedAmount": 200.00, + "Reason": "Auto-approved: risk score below threshold", + "OrderId": "order-uuid", + "CustomerId": "customer-uuid", + "Status": "Pending", + }, + ) + + @pytest.mark.asyncio + async def test_update_order_status(self, refund_tools) -> None: + """Agent updates Order status to Returned.""" + tools, resolution = refund_tools + write_tool = tools[1] + + update_record = _make_record("order-uuid", {"Status": "Returned"}) + resolution.entities_service.update_record_async = AsyncMock( + return_value=update_record + ) + + result_str = await write_tool.ainvoke( + { + "entity_key": "Order", + "operation": "update", + "record_id": "order-uuid", + "fields": {"Status": 3}, # ChoiceSet NumberId for "Returned" + } + ) + result = json.loads(result_str) + + assert result["success"] is True + assert result["operation"] == "update" + assert result["entity_key"] == "Order" + + resolution.entities_service.update_record_async.assert_awaited_once_with( + "Order", "order-uuid", {"Status": 3} + ) + + @pytest.mark.asyncio + async def test_update_customer_risk(self, refund_tools) -> None: + """Agent updates CustomerRisk: increment score, decrement LTV.""" + tools, resolution = refund_tools + write_tool = tools[1] + + update_record = _make_record( + "risk-uuid", + { + "RiskScore": 3, + "LifetimeValue": 4800.00, + }, + ) + resolution.entities_service.update_record_async = AsyncMock( + return_value=update_record + ) + + # Agent read current values (RiskScore=2, LTV=5000) and computed new ones + result_str = await write_tool.ainvoke( + { + "entity_key": "CustomerRisk", + "operation": "update", + "record_id": "risk-uuid", + "fields": {"RiskScore": 3, "LifetimeValue": 4800.00}, + } + ) + result = json.loads(result_str) + + assert result["success"] is True + assert result["operation"] == "update" + + resolution.entities_service.update_record_async.assert_awaited_once_with( + "CustomerRisk", + "risk-uuid", + {"RiskScore": 3, "LifetimeValue": 4800.00}, + ) + + @pytest.mark.asyncio + async def test_update_contact_resolution(self, refund_tools) -> None: + """Agent updates Contact resolution to Approved.""" + tools, resolution = refund_tools + write_tool = tools[1] + + update_record = _make_record( + "contact-uuid", + { + "ResolutionStatus": "Approved", + }, + ) + resolution.entities_service.update_record_async = AsyncMock( + return_value=update_record + ) + + result_str = await write_tool.ainvoke( + { + "entity_key": "Contact", + "operation": "update", + "record_id": "contact-uuid", + "fields": {"ResolutionStatus": "Approved"}, + } + ) + result = json.loads(result_str) + + assert result["success"] is True + assert result["operation"] == "update" + + resolution.entities_service.update_record_async.assert_awaited_once_with( + "Contact", + "contact-uuid", + {"ResolutionStatus": "Approved"}, + ) + + # -- Federated entity rejection -- + + @pytest.mark.asyncio + async def test_write_to_federated_entity_rejected(self, refund_tools) -> None: + """Writing to Customer (federated) returns validation error.""" + tools, resolution = refund_tools + write_tool = tools[1] + + resolution.entities_service.insert_record_async = AsyncMock() + + result_str = await write_tool.ainvoke( + { + "entity_key": "Customer", + "operation": "insert", + "fields": {"Name": "New Customer", "AccountTier": "Gold"}, + } + ) + result = json.loads(result_str) + + assert result["success"] is False + assert any("not configured for writes" in e for e in result["errors"]) + # Writable entities should be listed in the error + assert any("Contact" in e for e in result["errors"]) + assert any("Order" in e for e in result["errors"]) + assert any("CustomerRisk" in e for e in result["errors"]) + assert any("RefundRequest" in e for e in result["errors"]) + # API must NOT be called + resolution.entities_service.insert_record_async.assert_not_awaited() + + # -- Full refund flow -- + + @pytest.mark.asyncio + async def test_full_refund_flow(self, refund_tools) -> None: + """End-to-end: all 4 writes in sequence, all succeed, all verified. + + Simulates the complete agent SOP: + 1. Insert RefundRequest + 2. Update Order status to Returned + 3. Update CustomerRisk (score + LTV) + 4. Update Contact resolution to Approved + """ + tools, resolution = refund_tools + write_tool = tools[1] + + # Set up mocks for all 4 operations + refund_record = _make_record( + "refund-001", + { + "ApprovedAmount": 200.00, + "Reason": "Auto-approved: risk score below threshold", + }, + ) + order_record = _make_record("order-uuid", {"Status": "Returned"}) + risk_record = _make_record( + "risk-uuid", + { + "RiskScore": 3, + "LifetimeValue": 4800.00, + }, + ) + contact_record = _make_record( + "contact-uuid", + { + "ResolutionStatus": "Approved", + }, + ) + + resolution.entities_service.insert_record_async = AsyncMock( + return_value=refund_record + ) + resolution.entities_service.update_record_async = AsyncMock( + side_effect=[order_record, risk_record, contact_record] + ) + + # Step 1: Insert RefundRequest + r1 = json.loads( + await write_tool.ainvoke( + { + "entity_key": "RefundRequest", + "operation": "insert", + "fields": { + "ApprovedAmount": 200.00, + "Reason": "Auto-approved: risk score below threshold", + "OrderId": "order-uuid", + "CustomerId": "customer-uuid", + "Status": "Pending", + }, + } + ) + ) + assert r1["success"] is True + assert r1["operation"] == "insert" + assert r1["entity_key"] == "RefundRequest" + + # Step 2: Update Order -> Returned + r2 = json.loads( + await write_tool.ainvoke( + { + "entity_key": "Order", + "operation": "update", + "record_id": "order-uuid", + "fields": {"Status": 3}, + } + ) + ) + assert r2["success"] is True + assert r2["operation"] == "update" + assert r2["entity_key"] == "Order" + + # Step 3: Update CustomerRisk + r3 = json.loads( + await write_tool.ainvoke( + { + "entity_key": "CustomerRisk", + "operation": "update", + "record_id": "risk-uuid", + "fields": {"RiskScore": 3, "LifetimeValue": 4800.00}, + } + ) + ) + assert r3["success"] is True + assert r3["operation"] == "update" + assert r3["entity_key"] == "CustomerRisk" + + # Step 4: Update Contact -> Approved + r4 = json.loads( + await write_tool.ainvoke( + { + "entity_key": "Contact", + "operation": "update", + "record_id": "contact-uuid", + "fields": {"ResolutionStatus": "Approved"}, + } + ) + ) + assert r4["success"] is True + assert r4["operation"] == "update" + assert r4["entity_key"] == "Contact" + + # Verify all 4 calls were made with correct args + resolution.entities_service.insert_record_async.assert_awaited_once_with( + "RefundRequest", + { + "ApprovedAmount": 200.00, + "Reason": "Auto-approved: risk score below threshold", + "OrderId": "order-uuid", + "CustomerId": "customer-uuid", + "Status": "Pending", + }, + ) + assert resolution.entities_service.update_record_async.await_count == 3 + update_calls = resolution.entities_service.update_record_async.await_args_list + # Call 0: Order + assert update_calls[0].args == ("Order", "order-uuid", {"Status": 3}) + # Call 1: CustomerRisk + assert update_calls[1].args == ( + "CustomerRisk", + "risk-uuid", + {"RiskScore": 3, "LifetimeValue": 4800.00}, + ) + # Call 2: Contact + assert update_calls[2].args == ( + "Contact", + "contact-uuid", + {"ResolutionStatus": "Approved"}, + ) + + # -- Validation edge cases -- + + @pytest.mark.asyncio + async def test_insert_missing_required_field_rejected(self, refund_tools) -> None: + """Insert RefundRequest without ApprovedAmount returns validation error.""" + tools, resolution = refund_tools + write_tool = tools[1] + + resolution.entities_service.insert_record_async = AsyncMock() + + # Missing ApprovedAmount (required) and Reason (required) + result_str = await write_tool.ainvoke( + { + "entity_key": "RefundRequest", + "operation": "insert", + "fields": { + "OrderId": "order-uuid", + "CustomerId": "customer-uuid", + "Status": "Pending", + }, + } + ) + result = json.loads(result_str) + + assert result["success"] is False + assert any("ApprovedAmount" in e for e in result["errors"]) + assert any("Reason" in e for e in result["errors"]) + resolution.entities_service.insert_record_async.assert_not_awaited() + + @pytest.mark.asyncio + async def test_delete_requires_record_id(self, refund_tools) -> None: + """Delete without record_id returns validation error.""" + tools, resolution = refund_tools + write_tool = tools[1] + + resolution.entities_service.delete_record_async = AsyncMock() + + result_str = await write_tool.ainvoke( + { + "entity_key": "Order", + "operation": "delete", + } + ) + result = json.loads(result_str) + + assert result["success"] is False + assert any("record_id" in e for e in result["errors"]) + resolution.entities_service.delete_record_async.assert_not_awaited() + + @pytest.mark.asyncio + async def test_update_unknown_field_rejected(self, refund_tools) -> None: + """Update with a field not in the entity schema returns validation error.""" + tools, resolution = refund_tools + write_tool = tools[1] + + resolution.entities_service.update_record_async = AsyncMock() + + result_str = await write_tool.ainvoke( + { + "entity_key": "Order", + "operation": "update", + "record_id": "order-uuid", + "fields": {"BogusField": "value"}, + } + ) + result = json.loads(result_str) + + assert result["success"] is False + assert any("BogusField" in e for e in result["errors"]) + resolution.entities_service.update_record_async.assert_not_awaited() + + @pytest.mark.asyncio + async def test_insert_with_system_field_rejected(self, refund_tools) -> None: + """Insert with system field (CreatedOn) returns validation error.""" + tools, resolution = refund_tools + write_tool = tools[1] + + resolution.entities_service.insert_record_async = AsyncMock() + + result_str = await write_tool.ainvoke( + { + "entity_key": "RefundRequest", + "operation": "insert", + "fields": { + "ApprovedAmount": 100.00, + "Reason": "Test", + "CreatedOn": "2026-01-01", + }, + } + ) + result = json.loads(result_str) + + assert result["success"] is False + assert any("CreatedOn" in e for e in result["errors"]) + resolution.entities_service.insert_record_async.assert_not_awaited() diff --git a/tests/agent/tools/datafabric_tool/test_write_executor.py b/tests/agent/tools/datafabric_tool/test_write_executor.py new file mode 100644 index 000000000..06d15076d --- /dev/null +++ b/tests/agent/tools/datafabric_tool/test_write_executor.py @@ -0,0 +1,149 @@ +"""Tests for Data Fabric write executor.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from uipath_langchain.agent.tools.datafabric_tool.models import ( + DataFabricWriteInput, + EntityWriteOperation, +) +from uipath_langchain.agent.tools.datafabric_tool.write_executor import WriteExecutor + + +def _mock_entities_service() -> MagicMock: + """Create a mock EntitiesService with async CRUD methods.""" + svc = MagicMock() + svc.insert_record_async = AsyncMock() + svc.update_record_async = AsyncMock() + svc.delete_record_async = AsyncMock() + return svc + + +def _mock_entity_record(record_id: str = "rec-123") -> MagicMock: + """Create a mock EntityRecord.""" + record = MagicMock() + record.id = record_id + record.model_dump.return_value = {"Id": record_id, "Name": "Test"} + return record + + +class TestWriteExecutor: + """Tests for WriteExecutor.execute.""" + + @pytest.mark.asyncio + async def test_insert_calls_insert_record_async(self) -> None: + svc = _mock_entities_service() + record = _mock_entity_record("rec-new") + svc.insert_record_async.return_value = record + + executor = WriteExecutor(svc) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"Name": "New Order"}, + ) + result = await executor.execute(intent) + + svc.insert_record_async.assert_called_once_with("Orders", {"Name": "New Order"}) + assert result.success is True + assert result.operation == "insert" + assert result.entity_key == "Orders" + assert result.record_id == "rec-new" + assert result.record is not None + assert result.error is None + + @pytest.mark.asyncio + async def test_update_calls_update_record_async(self) -> None: + svc = _mock_entities_service() + record = _mock_entity_record("rec-1") + svc.update_record_async.return_value = record + + executor = WriteExecutor(svc) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.update, + record_id="rec-1", + fields={"Amount": 200}, + ) + result = await executor.execute(intent) + + svc.update_record_async.assert_called_once_with( + "Orders", "rec-1", {"Amount": 200} + ) + assert result.success is True + assert result.operation == "update" + assert result.record_id == "rec-1" + + @pytest.mark.asyncio + async def test_delete_calls_delete_record_async(self) -> None: + svc = _mock_entities_service() + svc.delete_record_async.return_value = None + + executor = WriteExecutor(svc) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.delete, + record_id="rec-1", + ) + result = await executor.execute(intent) + + svc.delete_record_async.assert_called_once_with("Orders", "rec-1") + assert result.success is True + assert result.operation == "delete" + assert result.record_id == "rec-1" + assert result.record is None + + @pytest.mark.asyncio + async def test_insert_error_returns_failure(self) -> None: + svc = _mock_entities_service() + svc.insert_record_async.side_effect = RuntimeError("API error: 403 Forbidden") + + executor = WriteExecutor(svc) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"Name": "Test"}, + ) + result = await executor.execute(intent) + + assert result.success is False + assert result.operation == "insert" + assert result.entity_key == "Orders" + assert "403 Forbidden" in (result.error or "") + + @pytest.mark.asyncio + async def test_update_error_returns_failure(self) -> None: + svc = _mock_entities_service() + svc.update_record_async.side_effect = RuntimeError("Not found") + + executor = WriteExecutor(svc) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.update, + record_id="rec-1", + fields={"Amount": 100}, + ) + result = await executor.execute(intent) + + assert result.success is False + assert "Not found" in (result.error or "") + + @pytest.mark.asyncio + async def test_delete_error_returns_failure(self) -> None: + svc = _mock_entities_service() + svc.delete_record_async.side_effect = RuntimeError("Not found") + + executor = WriteExecutor(svc) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.delete, + record_id="rec-1", + ) + result = await executor.execute(intent) + + assert result.success is False + assert result.record_id == "rec-1" + assert "Not found" in (result.error or "") diff --git a/tests/agent/tools/datafabric_tool/test_write_integration.py b/tests/agent/tools/datafabric_tool/test_write_integration.py new file mode 100644 index 000000000..495016b1a --- /dev/null +++ b/tests/agent/tools/datafabric_tool/test_write_integration.py @@ -0,0 +1,598 @@ +"""Integration tests for the full Data Fabric write tool creation pipeline. + +Verifies end-to-end tool creation, metadata, schema, validation, and +execution flow WITHOUT a live UiPath connection. All platform calls are +mocked. +""" + +from __future__ import annotations + +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from langchain_core.tools import BaseTool +from uipath.agent.models.agent import AgentContextResourceConfig, AgentContextType +from uipath.platform.entities import DataFabricEntityItem + +from uipath_langchain.agent.tools.datafabric_tool import create_datafabric_tools +from uipath_langchain.agent.tools.datafabric_tool.models import ( + DataFabricQueryInput, + DataFabricWriteInput, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_field( + name: str, + display_name: str | None = None, + is_system_field: bool = False, + is_hidden_field: bool = False, + is_primary_key: bool = False, + is_attachment: bool = False, + is_required: bool = False, + sql_type_name: str = "varchar", + description: str | None = None, + choiceset_id: str | None = None, +) -> MagicMock: + """Create a mock FieldMetadata object.""" + field = MagicMock() + field.name = name + field.display_name = display_name or name + field.is_system_field = is_system_field + field.is_hidden_field = is_hidden_field + field.is_primary_key = is_primary_key + field.is_attachment = is_attachment + field.is_required = is_required + field.description = description + field.choiceset_id = choiceset_id + sql_type = MagicMock() + sql_type.name = sql_type_name + field.sql_type = sql_type + return field + + +def _make_entity( + name: str, + fields: list[MagicMock], + entity_type: str = "Entity", + external_fields: list | None = None, +) -> MagicMock: + """Create a mock Entity object with .name, .display_name, .fields.""" + entity = MagicMock() + entity.name = name + entity.display_name = name + entity.fields = fields + entity.entity_type = entity_type + entity.external_fields = external_fields + return entity + + +def _make_resolution(entities: list[MagicMock]) -> MagicMock: + """Create a mock resolution result from resolve_entity_set_async.""" + resolution = MagicMock() + resolution.entities = entities + resolution.entities_service = MagicMock() + return resolution + + +def _make_context_resource( + name: str = "my_data_fabric", + description: str = "Test Data Fabric tool", + entity_items: list[dict] | None = None, +) -> AgentContextResourceConfig: + """Build an AgentContextResourceConfig with DATA_FABRIC_ENTITY_SET type.""" + if entity_items is None: + entity_items = [ + { + "id": "e1", + "name": "Orders", + "folderId": "f1", + "description": "Customer orders", + }, + { + "id": "e2", + "name": "Products", + "folderId": "f1", + "description": "Product catalog", + }, + ] + entity_set = [DataFabricEntityItem.model_validate(item) for item in entity_items] + return AgentContextResourceConfig( + name=name, + description=description, + resource_type="context", + context_type=AgentContextType.DATA_FABRIC_ENTITY_SET, + entity_set=entity_set, + is_enabled=True, + ) + + +def _mock_llm() -> MagicMock: + """Create a mock LLM for the read tool's subgraph.""" + return MagicMock() + + +# --------------------------------------------------------------------------- +# 1. create_datafabric_tools returns [read_tool, write_tool] +# --------------------------------------------------------------------------- + + +class TestCreateDatafabricToolsReturnsToolPair: + """Verify create_datafabric_tools returns a list with read + write tools.""" + + def test_returns_list_of_two_tools(self) -> None: + resource = _make_context_resource() + tools = create_datafabric_tools(resource, _mock_llm()) + assert isinstance(tools, list) + assert len(tools) == 2 + assert all(isinstance(t, BaseTool) for t in tools) + + def test_read_tool_is_first(self) -> None: + resource = _make_context_resource() + tools = create_datafabric_tools(resource, _mock_llm()) + read_tool = tools[0] + assert read_tool.metadata is not None + assert read_tool.metadata.get("tool_type") == "datafabric_sql" + + def test_write_tool_is_second(self) -> None: + resource = _make_context_resource() + tools = create_datafabric_tools(resource, _mock_llm()) + write_tool = tools[1] + assert write_tool.metadata is not None + assert write_tool.metadata.get("tool_type") == "datafabric_write" + + def test_tool_names_match_convention(self) -> None: + resource = _make_context_resource(name="customer_data") + tools = create_datafabric_tools( + resource, _mock_llm(), tool_name="customer_data" + ) + assert tools[0].name == "customer_data" + assert tools[1].name == "customer_data_write" + + +# --------------------------------------------------------------------------- +# 2. Write tool metadata +# --------------------------------------------------------------------------- + + +class TestWriteToolMetadata: + """Verify the write tool's metadata flags.""" + + def test_tool_type_is_datafabric_write(self) -> None: + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + assert write_tool.metadata is not None + assert write_tool.metadata["tool_type"] == "datafabric_write" + + def test_require_conversational_confirmation_is_true(self) -> None: + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + assert write_tool.metadata is not None + assert write_tool.metadata["require_conversational_confirmation"] is True + + +# --------------------------------------------------------------------------- +# 3. Write tool args_schema is DataFabricWriteInput +# --------------------------------------------------------------------------- + + +class TestWriteToolArgsSchema: + """Verify the write tool's args_schema matches DataFabricWriteInput.""" + + def test_args_schema_is_datafabric_write_input(self) -> None: + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + assert write_tool.args_schema is DataFabricWriteInput + + def test_schema_has_expected_fields(self) -> None: + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + schema = tools[1].args_schema.model_json_schema() + props = schema["properties"] + assert "entity_key" in props + assert "operation" in props + assert "record_id" in props + assert "fields" in props + + +# --------------------------------------------------------------------------- +# 4. DataFabricWriteHandler validates before executing +# --------------------------------------------------------------------------- + + +class TestWriteHandlerValidationAndExecution: + """End-to-end handler tests with mocked SDK. Verifies validation + intercepts bad inputs and valid operations reach the API. + """ + + @pytest.fixture + def mock_entities(self) -> list[MagicMock]: + """Two mock entities: Orders (with required OrderName) and Products.""" + orders = _make_entity( + "Orders", + [ + _make_field("Id", is_primary_key=True), + _make_field("CreatedOn", is_system_field=True), + _make_field("OrderName", is_required=True), + _make_field("Amount", sql_type_name="decimal"), + _make_field("Notes"), + ], + ) + products = _make_entity( + "Products", + [ + _make_field("Id", is_primary_key=True), + _make_field("ProductName", is_required=True), + _make_field("Price", sql_type_name="decimal"), + ], + ) + return [orders, products] + + @pytest.fixture + def mock_record(self) -> MagicMock: + """A mock record returned by insert/update.""" + record = MagicMock() + record.id = "rec-123" + record.model_dump.return_value = {"Id": "rec-123", "OrderName": "Test"} + return record + + @pytest.mark.asyncio + async def test_insert_with_valid_fields_calls_api( + self, mock_entities: list[MagicMock], mock_record: MagicMock + ) -> None: + """INSERT with valid fields on a context-derived entity calls insert_record_async.""" + resolution = _make_resolution(mock_entities) + resolution.entities_service.insert_record_async = AsyncMock( + return_value=mock_record + ) + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + resource = _make_context_resource() + tools = create_datafabric_tools(resource, _mock_llm()) + write_tool = tools[1] + + result_str = await write_tool.ainvoke( + { + "entity_key": "Orders", + "operation": "insert", + "fields": {"OrderName": "New Order", "Amount": 99.99}, + } + ) + result = json.loads(result_str) + + assert result["success"] is True + assert result["operation"] == "insert" + resolution.entities_service.insert_record_async.assert_awaited_once_with( + "Orders", {"OrderName": "New Order", "Amount": 99.99} + ) + + @pytest.mark.asyncio + async def test_update_with_record_id_and_fields_calls_api( + self, mock_entities: list[MagicMock], mock_record: MagicMock + ) -> None: + """UPDATE with record_id + fields calls update_record_async.""" + resolution = _make_resolution(mock_entities) + resolution.entities_service.update_record_async = AsyncMock( + return_value=mock_record + ) + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + + result_str = await write_tool.ainvoke( + { + "entity_key": "Orders", + "operation": "update", + "record_id": "rec-1", + "fields": {"Amount": 150}, + } + ) + result = json.loads(result_str) + + assert result["success"] is True + assert result["operation"] == "update" + resolution.entities_service.update_record_async.assert_awaited_once_with( + "Orders", "rec-1", {"Amount": 150} + ) + + @pytest.mark.asyncio + async def test_delete_with_record_id_calls_api( + self, mock_entities: list[MagicMock] + ) -> None: + """DELETE with record_id calls delete_record_async.""" + resolution = _make_resolution(mock_entities) + resolution.entities_service.delete_record_async = AsyncMock(return_value=None) + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + + result_str = await write_tool.ainvoke( + { + "entity_key": "Orders", + "operation": "delete", + "record_id": "rec-42", + } + ) + result = json.loads(result_str) + + assert result["success"] is True + assert result["operation"] == "delete" + assert result["record_id"] == "rec-42" + resolution.entities_service.delete_record_async.assert_awaited_once_with( + "Orders", "rec-42" + ) + + @pytest.mark.asyncio + async def test_insert_missing_required_field_returns_validation_error( + self, mock_entities: list[MagicMock] + ) -> None: + """INSERT missing a required field returns a validation error, does NOT call API.""" + resolution = _make_resolution(mock_entities) + resolution.entities_service.insert_record_async = AsyncMock() + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + + # OrderName is required but not provided + result_str = await write_tool.ainvoke( + { + "entity_key": "Orders", + "operation": "insert", + "fields": {"Amount": 50}, + } + ) + result = json.loads(result_str) + + assert result["success"] is False + assert len(result["errors"]) >= 1 + assert "OrderName" in result["errors"][0] + # API should NOT have been called + resolution.entities_service.insert_record_async.assert_not_awaited() + + @pytest.mark.asyncio + async def test_insert_with_system_field_name_returns_validation_error( + self, mock_entities: list[MagicMock] + ) -> None: + """INSERT with a system field name returns validation error.""" + resolution = _make_resolution(mock_entities) + resolution.entities_service.insert_record_async = AsyncMock() + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + + # CreatedOn is a system field and not writable + result_str = await write_tool.ainvoke( + { + "entity_key": "Orders", + "operation": "insert", + "fields": {"OrderName": "Test", "CreatedOn": "2026-01-01"}, + } + ) + result = json.loads(result_str) + + assert result["success"] is False + assert any("CreatedOn" in e for e in result["errors"]) + resolution.entities_service.insert_record_async.assert_not_awaited() + + @pytest.mark.asyncio + async def test_write_to_unknown_entity_returns_validation_error( + self, mock_entities: list[MagicMock] + ) -> None: + """Writing to an entity not in write_schemas returns a validation error.""" + resolution = _make_resolution(mock_entities) + resolution.entities_service.insert_record_async = AsyncMock() + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + + # "UnknownEntity" is not in the resolved schemas + result_str = await write_tool.ainvoke( + { + "entity_key": "UnknownEntity", + "operation": "insert", + "fields": {"AnyField": "value"}, + } + ) + result = json.loads(result_str) + + assert result["success"] is False + assert any("not configured for writes" in e for e in result["errors"]) + resolution.entities_service.insert_record_async.assert_not_awaited() + + +# --------------------------------------------------------------------------- +# 5. Federated / non-writable entities +# --------------------------------------------------------------------------- + + +class TestFederatedEntitiesNotWritable: + """Verify that federated and non-native entities are excluded from writes.""" + + @pytest.mark.asyncio + async def test_all_federated_entities_reject_writes(self) -> None: + """When all resolved entities are federated, writes are rejected.""" + federated_orders = _make_entity( + "Orders", + [ + _make_field("Id", is_primary_key=True), + _make_field("OrderName", is_required=True), + ], + entity_type="Entity", + external_fields=[{"source": "salesforce"}], + ) + federated_products = _make_entity( + "Products", + [ + _make_field("Id", is_primary_key=True), + _make_field("ProductName", is_required=True), + ], + entity_type="Entity", + external_fields=[{"source": "sap"}], + ) + resolution = _make_resolution([federated_orders, federated_products]) + resolution.entities_service.insert_record_async = AsyncMock() + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + + result_str = await write_tool.ainvoke( + { + "entity_key": "Orders", + "operation": "insert", + "fields": {"OrderName": "Test"}, + } + ) + result = json.loads(result_str) + + assert result["success"] is False + assert any("not configured for writes" in e for e in result["errors"]) + resolution.entities_service.insert_record_async.assert_not_awaited() + + @pytest.mark.asyncio + async def test_choiceset_entity_not_writable(self) -> None: + """ChoiceSet entities are excluded from writes.""" + choiceset = _make_entity( + "StatusOptions", + [_make_field("Label")], + entity_type="ChoiceSet", + ) + native = _make_entity( + "Orders", + [ + _make_field("Id", is_primary_key=True), + _make_field("OrderName", is_required=True), + ], + ) + resolution = _make_resolution([choiceset, native]) + resolution.entities_service.insert_record_async = AsyncMock() + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + write_tool = tools[1] + + # ChoiceSet should be rejected + result_str = await write_tool.ainvoke( + { + "entity_key": "StatusOptions", + "operation": "insert", + "fields": {"Label": "Active"}, + } + ) + result = json.loads(result_str) + assert result["success"] is False + assert any("not configured for writes" in e for e in result["errors"]) + + +# --------------------------------------------------------------------------- +# 6. Read tool is unchanged (renumbered from 5) +# --------------------------------------------------------------------------- + + +class TestReadToolUnchanged: + """Verify the read tool retains its original properties.""" + + def test_read_tool_type_is_datafabric_sql(self) -> None: + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + read_tool = tools[0] + assert read_tool.metadata is not None + assert read_tool.metadata["tool_type"] == "datafabric_sql" + + def test_read_tool_args_schema_is_datafabric_query_input(self) -> None: + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + read_tool = tools[0] + assert read_tool.args_schema is DataFabricQueryInput + + def test_read_tool_description_mentions_query(self) -> None: + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + read_tool = tools[0] + assert "Query" in read_tool.description or "query" in read_tool.description + + +# --------------------------------------------------------------------------- +# 7. context_tool.py integration +# --------------------------------------------------------------------------- + + +class TestContextToolIntegration: + """Verify create_context_tool returns a list for DATA_FABRIC_ENTITY_SET.""" + + def test_create_context_tool_returns_list_for_entity_set(self) -> None: + from uipath_langchain.agent.tools.context_tool import create_context_tool + + resource = _make_context_resource() + mock_llm = _mock_llm() + + with patch( + "uipath_langchain.agent.tools.datafabric_tool.create_datafabric_tools" + ) as mock_create: + mock_create.return_value = [MagicMock(), MagicMock()] + result = create_context_tool(resource, llm=mock_llm) + + assert isinstance(result, list) + assert len(result) == 2 + mock_create.assert_called_once() + + def test_create_context_tool_raises_without_llm(self) -> None: + from uipath_langchain.agent.tools.context_tool import create_context_tool + + resource = _make_context_resource() + + with pytest.raises(ValueError, match="LLM"): + create_context_tool(resource, llm=None) diff --git a/tests/agent/tools/datafabric_tool/test_write_schema_builder.py b/tests/agent/tools/datafabric_tool/test_write_schema_builder.py new file mode 100644 index 000000000..cadb81612 --- /dev/null +++ b/tests/agent/tools/datafabric_tool/test_write_schema_builder.py @@ -0,0 +1,278 @@ +"""Tests for write_schema_builder — NL schema generation for write tool descriptions.""" + +from __future__ import annotations + +from uipath_langchain.agent.tools.datafabric_tool.models import ( + EntityWriteSchema, + WritableFieldInfo, +) +from uipath_langchain.agent.tools.datafabric_tool.write_schema_builder import ( + build_write_tool_description, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _field( + name: str, + type_name: str = "varchar", + is_required: bool = False, + is_choiceset: bool = False, + choiceset_id: str | None = None, + description: str | None = None, +) -> WritableFieldInfo: + return WritableFieldInfo( + name=name, + display_name=name, + type_name=type_name, + is_required=is_required, + is_choiceset=is_choiceset, + choiceset_id=choiceset_id, + description=description, + ) + + +def _schema( + entity_key: str, + display_name: str, + fields: list[WritableFieldInfo], +) -> EntityWriteSchema: + return EntityWriteSchema( + entity_key=entity_key, + display_name=display_name, + writable_fields=fields, + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestBuildWriteToolDescription: + """Tests for build_write_tool_description.""" + + def test_empty_schemas_returns_no_entities_message(self) -> None: + result = build_write_tool_description({}) + assert "No writable entities" in result + + def test_single_entity_with_fields(self) -> None: + schemas = { + "Orders": _schema( + "Orders", + "Orders", + [ + _field("OrderName", is_required=True), + _field("Amount", type_name="decimal"), + ], + ) + } + result = build_write_tool_description(schemas) + assert "### Orders" in result + assert "OrderName" in result + assert "required" in result + assert "DECIMAL" in result + + def test_multiple_entities_sorted_by_key(self) -> None: + schemas = { + "Zebra": _schema("Zebra", "Zebra", [_field("Name")]), + "Alpha": _schema("Alpha", "Alpha", [_field("Value")]), + } + result = build_write_tool_description(schemas) + alpha_pos = result.index("### Alpha") + zebra_pos = result.index("### Zebra") + assert alpha_pos < zebra_pos + + def test_choiceset_field_shows_choice_set_indicator(self) -> None: + schemas = { + "Orders": _schema( + "Orders", + "Orders", + [ + _field( + "Status", + type_name="varchar", + is_choiceset=True, + choiceset_id="OrderStatusCS", + ), + ], + ) + } + result = build_write_tool_description(schemas) + assert "CHOICE_SET" in result + + def test_entity_access_restricts_operations(self) -> None: + schemas = { + "RefundRequest": _schema( + "RefundRequest", + "Refund Request", + [_field("Amount", type_name="decimal", is_required=True)], + ) + } + access = {"RefundRequest": {"insert"}} + result = build_write_tool_description(schemas, entity_access=access) + assert "### Refund Request (insert)" in result + + def test_entity_access_multiple_ops(self) -> None: + schemas = { + "Orders": _schema( + "Orders", + "Orders", + [_field("Status")], + ) + } + access = {"Orders": {"update", "delete"}} + result = build_write_tool_description(schemas, entity_access=access) + assert "(delete, update)" in result + + def test_no_entity_access_shows_all_ops(self) -> None: + schemas = { + "Orders": _schema( + "Orders", + "Orders", + [_field("Name")], + ) + } + result = build_write_tool_description(schemas, entity_access=None) + assert "(delete, insert, update)" in result + + def test_operations_section_present(self) -> None: + schemas = { + "Orders": _schema("Orders", "Orders", [_field("Name")]), + } + result = build_write_tool_description(schemas) + assert "Operations:" in result + assert "- insert:" in result + assert "- update:" in result + assert "- delete:" in result + + def test_query_first_advice_present(self) -> None: + schemas = { + "Orders": _schema("Orders", "Orders", [_field("Name")]), + } + result = build_write_tool_description(schemas) + assert "Query the entity first" in result + + def test_required_field_formatting(self) -> None: + schemas = { + "Items": _schema( + "Items", + "Items", + [ + _field("ItemName", is_required=True), + _field("Notes", is_required=False), + ], + ) + } + result = build_write_tool_description(schemas) + # Required field should have "required" in its line + lines = result.split("\n") + item_name_line = [ln for ln in lines if "ItemName" in ln][0] + notes_line = [ln for ln in lines if "Notes" in ln][0] + assert "required" in item_name_line + assert "required" not in notes_line + + def test_hero_case_refund_schema(self) -> None: + """Full hero case: multiple entities with mixed ops, choicesets, required fields.""" + schemas = { + "PurchaseOrder": _schema( + "PurchaseOrder", + "PurchaseOrder", + [ + _field("OrderNumber", is_required=True), + _field("TotalAmount", type_name="decimal"), + _field( + "OrderStatus", + is_choiceset=True, + choiceset_id="OrderStatusCS", + ), + ], + ), + "RefundRequest": _schema( + "RefundRequest", + "RefundRequest", + [ + _field("ApprovedAmount", type_name="decimal", is_required=True), + _field("Reason", is_required=True), + _field("OrderRef"), + _field("CustomerRef"), + _field("RefundStatus", is_choiceset=True, choiceset_id="RefundCS"), + ], + ), + "CustomerRisk": _schema( + "CustomerRisk", + "CustomerRisk", + [ + _field("RiskScore", type_name="int"), + _field("LifetimeValue", type_name="decimal"), + _field("FraudFlag", type_name="bit"), + ], + ), + } + access = { + "PurchaseOrder": {"update"}, + "RefundRequest": {"insert"}, + "CustomerRisk": {"update"}, + } + result = build_write_tool_description(schemas, entity_access=access) + + # All entities present + assert "PurchaseOrder" in result + assert "RefundRequest" in result + assert "CustomerRisk" in result + + # Ops are correct + assert "(update)" in result # PurchaseOrder and CustomerRisk + assert "(insert)" in result # RefundRequest + + # ChoiceSet indicators + assert "CHOICE_SET" in result + + # Required fields + assert "required" in result + + +# --------------------------------------------------------------------------- +# WritableFieldInfo choiceset extension +# --------------------------------------------------------------------------- + + +class TestWritableFieldInfoChoiceset: + """Tests for the choiceset extension on WritableFieldInfo.""" + + def test_choiceset_fields_default_to_false(self) -> None: + field = WritableFieldInfo( + name="Status", + display_name="Status", + type_name="varchar", + is_required=False, + ) + assert field.is_choiceset is False + assert field.choiceset_id is None + assert field.allowed_values is None + + def test_choiceset_field_with_id(self) -> None: + field = WritableFieldInfo( + name="Status", + display_name="Status", + type_name="varchar", + is_required=False, + choiceset_id="OrderStatusCS", + is_choiceset=True, + ) + assert field.is_choiceset is True + assert field.choiceset_id == "OrderStatusCS" + + def test_allowed_values_can_be_set(self) -> None: + field = WritableFieldInfo( + name="Status", + display_name="Status", + type_name="varchar", + is_required=False, + choiceset_id="OrderStatusCS", + is_choiceset=True, + allowed_values=["Pending", "Approved", "Denied"], + ) + assert field.allowed_values == ["Pending", "Approved", "Denied"] diff --git a/tests/agent/tools/datafabric_tool/test_write_validation.py b/tests/agent/tools/datafabric_tool/test_write_validation.py new file mode 100644 index 000000000..01260ac48 --- /dev/null +++ b/tests/agent/tools/datafabric_tool/test_write_validation.py @@ -0,0 +1,433 @@ +"""Tests for Data Fabric write validation logic.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from uipath_langchain.agent.tools.datafabric_tool.models import ( + DataFabricWriteInput, + EntityWriteOperation, + EntityWriteSchema, + WritableFieldInfo, +) +from uipath_langchain.agent.tools.datafabric_tool.write_validation import ( + derive_writable_fields, + is_entity_writable, + validate_mutation_intent, +) + + +def _make_field( + name: str, + display_name: str | None = None, + is_system_field: bool = False, + is_hidden_field: bool = False, + is_primary_key: bool = False, + is_attachment: bool = False, + is_required: bool = False, + sql_type_name: str = "varchar", + description: str | None = None, + choiceset_id: str | None = None, +) -> MagicMock: + """Create a mock FieldMetadata object.""" + field = MagicMock() + field.name = name + field.display_name = display_name or name + field.is_system_field = is_system_field + field.is_hidden_field = is_hidden_field + field.is_primary_key = is_primary_key + field.is_attachment = is_attachment + field.is_required = is_required + field.description = description + field.choiceset_id = choiceset_id + sql_type = MagicMock() + sql_type.name = sql_type_name + field.sql_type = sql_type + return field + + +def _make_entity( + name: str, + fields: list[MagicMock], + entity_type: str = "Entity", + external_fields: list | None = None, +) -> MagicMock: + """Create a mock Entity object.""" + entity = MagicMock() + entity.name = name + entity.display_name = name + entity.fields = fields + entity.entity_type = entity_type + entity.external_fields = external_fields + return entity + + +class TestIsEntityWritable: + """Tests for is_entity_writable.""" + + def test_native_entity_is_writable(self) -> None: + entity = _make_entity("Orders", [], entity_type="Entity", external_fields=None) + assert is_entity_writable(entity) is True + + def test_native_entity_empty_external_fields_is_writable(self) -> None: + entity = _make_entity("Orders", [], entity_type="Entity", external_fields=[]) + assert is_entity_writable(entity) is True + + def test_federated_entity_is_not_writable(self) -> None: + entity = _make_entity( + "SalesForceAccounts", + [], + entity_type="Entity", + external_fields=[{"source": "salesforce"}], + ) + assert is_entity_writable(entity) is False + + def test_choiceset_is_not_writable(self) -> None: + entity = _make_entity("StatusOptions", [], entity_type="ChoiceSet") + assert is_entity_writable(entity) is False + + def test_system_entity_is_not_writable(self) -> None: + entity = _make_entity("AuditLog", [], entity_type="SystemEntity") + assert is_entity_writable(entity) is False + + def test_internal_entity_is_not_writable(self) -> None: + entity = _make_entity("Internal", [], entity_type="InternalEntity") + assert is_entity_writable(entity) is False + + +class TestDeriveWritableFields: + """Tests for derive_writable_fields.""" + + def test_filters_system_fields(self) -> None: + entity = _make_entity( + "Orders", + [ + _make_field("Id", is_primary_key=True), + _make_field("CreatedOn", is_system_field=True), + _make_field("ModifiedOn", is_system_field=True), + _make_field("OrderName"), + _make_field("Amount", sql_type_name="decimal"), + ], + ) + result = derive_writable_fields(entity) + names = [f.name for f in result] + assert "OrderName" in names + assert "Amount" in names + assert "Id" not in names + assert "CreatedOn" not in names + assert "ModifiedOn" not in names + + def test_filters_hidden_fields(self) -> None: + entity = _make_entity( + "Orders", + [ + _make_field("InternalRef", is_hidden_field=True), + _make_field("OrderName"), + ], + ) + result = derive_writable_fields(entity) + names = [f.name for f in result] + assert "OrderName" in names + assert "InternalRef" not in names + + def test_filters_attachment_fields(self) -> None: + entity = _make_entity( + "Orders", + [ + _make_field("Document", is_attachment=True), + _make_field("OrderName"), + ], + ) + result = derive_writable_fields(entity) + names = [f.name for f in result] + assert "OrderName" in names + assert "Document" not in names + + def test_preserves_required_flag(self) -> None: + entity = _make_entity( + "Orders", + [ + _make_field("OrderName", is_required=True), + _make_field("Notes"), + ], + ) + result = derive_writable_fields(entity) + by_name = {f.name: f for f in result} + assert by_name["OrderName"].is_required is True + assert by_name["Notes"].is_required is False + + def test_preserves_type_and_description(self) -> None: + entity = _make_entity( + "Orders", + [ + _make_field( + "Amount", + sql_type_name="decimal", + description="Order total", + ), + ], + ) + result = derive_writable_fields(entity) + assert len(result) == 1 + assert result[0].type_name == "decimal" + assert result[0].description == "Order total" + + def test_empty_fields(self) -> None: + entity = _make_entity("Empty", []) + assert derive_writable_fields(entity) == [] + + def test_none_fields(self) -> None: + entity = MagicMock() + entity.fields = None + assert derive_writable_fields(entity) == [] + + def test_all_fields_filtered_out(self) -> None: + entity = _make_entity( + "SystemOnly", + [ + _make_field("Id", is_primary_key=True), + _make_field("CreatedOn", is_system_field=True), + ], + ) + assert derive_writable_fields(entity) == [] + + def test_returns_empty_for_non_writable_entity(self) -> None: + """Federated entities return no writable fields even if they have user fields.""" + entity = _make_entity( + "Federated", + [_make_field("Name"), _make_field("Value")], + entity_type="Entity", + external_fields=[{"source": "ext"}], + ) + assert derive_writable_fields(entity) == [] + + def test_returns_empty_for_choiceset(self) -> None: + entity = _make_entity( + "StatusOptions", + [_make_field("Label")], + entity_type="ChoiceSet", + ) + assert derive_writable_fields(entity) == [] + + def test_choiceset_field_sets_is_choiceset(self) -> None: + """Field with choiceset_id gets is_choiceset=True and stores the id.""" + entity = _make_entity( + "Orders", + [ + _make_field( + "Status", + choiceset_id="OrderStatusCS", + ), + ], + ) + result = derive_writable_fields(entity) + assert len(result) == 1 + assert result[0].is_choiceset is True + assert result[0].choiceset_id == "OrderStatusCS" + + def test_non_choiceset_field_has_is_choiceset_false(self) -> None: + """Field without choiceset_id gets is_choiceset=False.""" + entity = _make_entity( + "Orders", + [_make_field("OrderName")], + ) + result = derive_writable_fields(entity) + assert len(result) == 1 + assert result[0].is_choiceset is False + assert result[0].choiceset_id is None + + def test_mixed_choiceset_and_regular_fields(self) -> None: + """Mix of choiceset and regular fields are handled correctly.""" + entity = _make_entity( + "Orders", + [ + _make_field("OrderName"), + _make_field("Status", choiceset_id="OrderStatusCS"), + _make_field("Amount", sql_type_name="decimal"), + ], + ) + result = derive_writable_fields(entity) + by_name = {f.name: f for f in result} + assert by_name["OrderName"].is_choiceset is False + assert by_name["Status"].is_choiceset is True + assert by_name["Status"].choiceset_id == "OrderStatusCS" + assert by_name["Amount"].is_choiceset is False + + +class TestValidateMutationIntent: + """Tests for validate_mutation_intent.""" + + def _schema(self) -> dict[str, EntityWriteSchema]: + """Build a sample write_schemas dict.""" + return { + "Orders": EntityWriteSchema( + entity_key="Orders", + display_name="Orders", + writable_fields=[ + WritableFieldInfo( + name="OrderName", + display_name="Order Name", + type_name="varchar", + is_required=True, + ), + WritableFieldInfo( + name="Amount", + display_name="Amount", + type_name="decimal", + is_required=False, + ), + WritableFieldInfo( + name="Notes", + display_name="Notes", + type_name="varchar", + is_required=False, + ), + ], + ) + } + + # -- Structural validation -- + + def test_delete_requires_record_id(self) -> None: + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.delete, + ) + errors = validate_mutation_intent(intent, self._schema()) + assert any("record_id" in e for e in errors) + + def test_update_requires_record_id(self) -> None: + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.update, + fields={"Amount": 100}, + ) + errors = validate_mutation_intent(intent, self._schema()) + assert len(errors) == 1 + assert "record_id" in errors[0] + + def test_insert_requires_fields(self) -> None: + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + ) + errors = validate_mutation_intent(intent, self._schema()) + assert any("fields" in e for e in errors) + + def test_update_requires_fields(self) -> None: + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.update, + record_id="rec-1", + ) + errors = validate_mutation_intent(intent, self._schema()) + assert len(errors) == 1 + assert "fields" in errors[0] + + # -- Entity not configured for writes -- + + def test_entity_not_in_schemas_returns_error(self) -> None: + """Entity not in write_schemas returns a validation error.""" + intent = DataFabricWriteInput( + entity_key="Unknown", + operation=EntityWriteOperation.insert, + fields={"Anything": "goes"}, + ) + errors = validate_mutation_intent(intent, write_schemas=self._schema()) + assert len(errors) == 1 + assert "not configured for writes" in errors[0] + assert "Orders" in errors[0] # listed as writable + + def test_no_write_schemas_returns_error(self) -> None: + """No write_schemas at all means nothing is writable.""" + intent = DataFabricWriteInput( + entity_key="SomeEntity", + operation=EntityWriteOperation.insert, + fields={"AnyField": "value"}, + ) + errors = validate_mutation_intent(intent, write_schemas=None) + assert len(errors) == 1 + assert "not configured for writes" in errors[0] + + def test_empty_write_schemas_returns_error(self) -> None: + """Empty write_schemas means nothing is writable.""" + intent = DataFabricWriteInput( + entity_key="SomeEntity", + operation=EntityWriteOperation.delete, + record_id="rec-99", + ) + errors = validate_mutation_intent(intent, write_schemas={}) + assert len(errors) == 1 + assert "not configured for writes" in errors[0] + + # -- Strict mode (context-derived) -- + + def test_strict_insert_valid(self) -> None: + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"OrderName": "Test", "Amount": 50}, + ) + errors = validate_mutation_intent(intent, self._schema()) + assert errors == [] + + def test_strict_insert_missing_required(self) -> None: + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"Amount": 50}, + ) + errors = validate_mutation_intent(intent, self._schema()) + assert len(errors) == 1 + assert "OrderName" in errors[0] + + def test_strict_insert_unknown_field(self) -> None: + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"OrderName": "Test", "Bogus": "value"}, + ) + errors = validate_mutation_intent(intent, self._schema()) + assert len(errors) == 1 + assert "Bogus" in errors[0] + + def test_strict_update_no_required_enforcement(self) -> None: + """UPDATE does not enforce required fields - agent decides what to change.""" + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.update, + record_id="rec-1", + fields={"Notes": "Updated notes"}, + ) + errors = validate_mutation_intent(intent, self._schema()) + assert errors == [] + + def test_strict_update_unknown_field(self) -> None: + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.update, + record_id="rec-1", + fields={"Bogus": "value"}, + ) + errors = validate_mutation_intent(intent, self._schema()) + assert len(errors) == 1 + assert "Bogus" in errors[0] + + def test_strict_delete_valid(self) -> None: + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.delete, + record_id="rec-1", + ) + errors = validate_mutation_intent(intent, self._schema()) + assert errors == [] + + def test_multiple_errors(self) -> None: + """INSERT with unknown fields AND missing required fields.""" + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"Bogus": "value"}, + ) + errors = validate_mutation_intent(intent, self._schema()) + assert len(errors) == 2 # unknown field + missing required From 96912a610eecfe6738274637bcdd18d45a7908f3 Mon Sep 17 00:00:00 2001 From: Harshit Rohatgi Date: Sat, 27 Jun 2026 11:46:50 +0530 Subject: [PATCH 2/6] feat: add optional OWL ontology compiler for Data Fabric writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds the ontology layer from write RFC v2. The OWL ontology is the authoring/storage format; it is compiled into a CompiledOntology intermediate representation (NOT injected as raw OWL — research shows LLMs reason poorly over raw OWL Turtle). - compiled_ontology.py: CompiledOntology model (entity_access, measure_fields, state_fields, reference_fields, hitl_operations, entity_relationships) per RFC §5.2 - ontology_compiler.py: compile_ontology(owl_turtle) via rdflib. Supports both ontology dialects — the .ttl dialect (rdfs:subClassOf df:WritableEntity + action-derived ops + df:hasField) and the RFC dialect (a df:WritableEntity + df:allowsOperation). Resilient to partial annotations; raises OntologyCompileError only on malformed Turtle. - write_validation.py: validate_mutation_intent gains optional compiled_ontology — rejects operations not in entity_access. State transition validation deferred to v3 (documented TODO). - datafabric_tool.py: DataFabricWriteHandler best-effort fetches + compiles the ontology via get_ontology_file_async. The method is absent from the current platform package, so this degrades gracefully to the metadata-only path (compiled_ontology stays None) — the build does not break. - rdflib>=7.0.0 added to dependencies. 23 new tests (refund + order-management dialects, RFC dialect, graceful paths). 109 datafabric_tool tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 1 + .../datafabric_tool/compiled_ontology.py | 62 ++++ .../tools/datafabric_tool/datafabric_tool.py | 58 +++- .../datafabric_tool/ontology_compiler.py | 317 ++++++++++++++++++ .../tools/datafabric_tool/write_validation.py | 28 ++ .../datafabric_tool/test_ontology_compiler.py | 295 ++++++++++++++++ .../datafabric_tool/test_write_validation.py | 86 +++++ uv.lock | 23 ++ 8 files changed, 867 insertions(+), 3 deletions(-) create mode 100644 src/uipath_langchain/agent/tools/datafabric_tool/compiled_ontology.py create mode 100644 src/uipath_langchain/agent/tools/datafabric_tool/ontology_compiler.py create mode 100644 tests/agent/tools/datafabric_tool/test_ontology_compiler.py diff --git a/pyproject.toml b/pyproject.toml index 54f92dd67..bfe72d016 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "pillow>=12.1.1", "a2a-sdk>=0.2.0,<1.0.0", "uipath-langchain-client[openai]>=1.14.0,<1.15.0", + "rdflib>=7.0.0,<8.0.0", ] classifiers = [ diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/compiled_ontology.py b/src/uipath_langchain/agent/tools/datafabric_tool/compiled_ontology.py new file mode 100644 index 000000000..0cf0b596f --- /dev/null +++ b/src/uipath_langchain/agent/tools/datafabric_tool/compiled_ontology.py @@ -0,0 +1,62 @@ +"""Compiled OWL ontology model for Data Fabric writes. + +A ``CompiledOntology`` is the intermediate representation produced by +``ontology_compiler.compile_ontology`` from an OWL 2 QL Turtle source +(the ``df:`` write-extension vocabulary, see ``p1-owl-write-extension.ttl``). + +The ontology is OPTIONAL. When present it enriches and constrains writes +with semantics that entity metadata alone cannot express: + + - which entities are writable and which operations they allow + - field semantics (state / measure / reference) + - HITL markers on destructive operations + - entity-to-entity relationships + +When the ontology is absent the metadata-only write path still works +(graceful fallback). See RFC ``p1-write-rfc-v2-ontology.md`` §5.2. +""" + +from __future__ import annotations + +from pydantic import BaseModel, Field + + +class CompiledOntology(BaseModel): + """Result of compiling an OWL write-extension ontology. + + All members are keyed by the ``df:entityKey`` / ``df:fieldKey`` strings, + which are the exact values the LLM uses in ``DataFabricWriteInput``. + Field-level members are keyed as ``"."``. + + Every member defaults to empty, so a partial or empty ontology yields a + valid (if sparse) ``CompiledOntology`` rather than raising. + """ + + entity_access: dict[str, set[str]] = Field(default_factory=dict) + """entity_key -> set of allowed operations, e.g. ``{"insert", "update"}``.""" + + measure_fields: dict[str, str] = Field(default_factory=dict) + """``"entity_key.field_key"`` -> ``"additive"`` | ``"replacement"``.""" + + state_fields: dict[str, str] = Field(default_factory=dict) + """``"entity_key.field_key"`` -> choiceset / state-machine key.""" + + reference_fields: dict[str, str] = Field(default_factory=dict) + """``"entity_key.field_key"`` -> referenced entity_key (FK target).""" + + hitl_operations: dict[str, set[str]] = Field(default_factory=dict) + """entity_key -> set of operations that require human-in-the-loop.""" + + entity_relationships: dict[str, list[str]] = Field(default_factory=dict) + """entity_key -> list of referenced entity_keys (semantic relationships).""" + + def is_empty(self) -> bool: + """True if no ontology facts were extracted (graceful-fallback signal).""" + return not ( + self.entity_access + or self.measure_fields + or self.state_fields + or self.reference_fields + or self.hitl_operations + or self.entity_relationships + ) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py index 08198ffde..324e861e1 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py @@ -27,6 +27,7 @@ from uipath.platform.entities import DataFabricEntityItem from ..base_uipath_structured_tool import BaseUiPathStructuredTool +from .compiled_ontology import CompiledOntology from .models import ( DataFabricQueryInput, DataFabricWriteInput, @@ -168,6 +169,7 @@ def __init__( self._entity_set = entity_set self._write_schemas: dict[str, EntityWriteSchema] | None = None self._write_tool_description: str | None = None + self._compiled_ontology: CompiledOntology | None = None self._executor: Any | None = None self._init_lock = asyncio.Lock() @@ -203,12 +205,60 @@ async def _ensure_initialized(self) -> None: writable_fields=writable, ) + # Optional ontology layer: fetch + compile the OWL ontology if the + # platform exposes get_ontology_file_async. This method may only + # exist on a feature branch — if it is absent we degrade gracefully + # to the metadata-only write path (compiled_ontology stays None). + self._compiled_ontology = await self._maybe_compile_ontology( + resolution.entities_service + ) + + entity_access = ( + self._compiled_ontology.entity_access + if self._compiled_ontology + else None + ) self._write_tool_description = build_write_tool_description( - self._write_schemas + self._write_schemas, + entity_access=entity_access, ) self._executor = WriteExecutor(resolution.entities_service) + async def _maybe_compile_ontology( + self, entities_service: Any + ) -> CompiledOntology | None: + """Best-effort fetch + compile of the optional OWL ontology. + + Returns the compiled ontology, or ``None`` when no ontology is + available or the platform package does not expose the fetch method. + Never raises — any failure degrades to the metadata-only path. + """ + get_ontology = getattr(entities_service, "get_ontology_file_async", None) + if not callable(get_ontology): + logger.debug( + "EntitiesService has no get_ontology_file_async; " + "skipping ontology compilation (metadata-only writes)." + ) + return None + + from .ontology_compiler import compile_ontology + + try: + owl_turtle = await get_ontology("owl") + if not owl_turtle: + logger.debug("No OWL ontology returned; metadata-only writes.") + return None + compiled = compile_ontology(owl_turtle) + logger.debug( + "Compiled ontology with %d writable entities.", + len(compiled.entity_access), + ) + return compiled + except Exception as exc: # graceful no-op on any fetch/parse failure + logger.debug("Ontology fetch/compile skipped: %s", exc) + return None + async def __call__( self, entity_key: str, @@ -243,8 +293,10 @@ async def __call__( fields=fields, ) - # Validate - errors = validate_mutation_intent(intent, self._write_schemas) + # Validate (ontology, when present, constrains allowed operations) + errors = validate_mutation_intent( + intent, self._write_schemas, self._compiled_ontology + ) if errors: return json.dumps( { diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/ontology_compiler.py b/src/uipath_langchain/agent/tools/datafabric_tool/ontology_compiler.py new file mode 100644 index 000000000..27ebbd2ec --- /dev/null +++ b/src/uipath_langchain/agent/tools/datafabric_tool/ontology_compiler.py @@ -0,0 +1,317 @@ +"""Compile an OWL 2 QL Turtle ontology into a ``CompiledOntology``. + +The OWL source is the *authoring and storage* format (see +``p1-owl-write-extension.ttl``). It is NOT the prompt format — it is +compiled here into a structured ``CompiledOntology`` that the write +validator and tool-description builder consume. + +Two ontology dialects are supported, because the RFC prose +(``p1-write-rfc-v2-ontology.md`` §4.1) and the shipped ``df:`` vocabulary +(``p1-owl-write-extension.ttl``) differ slightly: + + Entity access modes + - .ttl dialect: ``ex:Order rdfs:subClassOf df:WritableEntity`` and the + allowed operations come from the entity's actions + (``df:hasAction`` -> action ``df:writeOperation``). + - RFC dialect: ``ex:Order a df:WritableEntity ; df:allowsOperation "update"``. + Both are extracted. + + Field semantics + - Fields are individuals typed ``df:StateField`` / ``df:MeasureField`` / + ``df:ReferenceField`` / ``df:ForeignKeyField`` with a ``df:fieldKey``. + - Fields are bound to their owning entity via ``ex:Entity df:hasField + ex:field_...``. When a field is not bound via ``df:hasField`` (RFC + dialect omits it) we fall back to matching the field individual's + local name (``field__<...>``) against entity local names. + + HITL + - ``action df:requiresHITL true`` with ``df:targetEntity`` + + ``df:writeOperation`` -> ``hitl_operations[entity] = {op}``. + + Relationships + - ``ex:A df:relatedEntity ex:B`` (.ttl) or ``ex:A df:referencesEntity + ex:B`` between two entities (RFC) -> ``entity_relationships``. + +The compiler is resilient to missing or partial annotations: it extracts +only what is present and never raises on a well-formed-but-sparse ontology. +A parse error on malformed Turtle is surfaced as ``OntologyCompileError``. +""" + +from __future__ import annotations + +import logging + +from rdflib import RDF, RDFS, Graph, URIRef +from rdflib.term import Literal, Node + +from .compiled_ontology import CompiledOntology + +logger = logging.getLogger(__name__) + +DF = "https://ontology.uipath.com/datafabric#" + + +class OntologyCompileError(ValueError): + """Raised when the OWL Turtle source cannot be parsed.""" + + +# df: vocabulary terms ------------------------------------------------------ + +_WRITABLE_ENTITY = URIRef(DF + "WritableEntity") +_STATE_FIELD = URIRef(DF + "StateField") +_MEASURE_FIELD = URIRef(DF + "MeasureField") +_REFERENCE_FIELD = URIRef(DF + "ReferenceField") +_FOREIGN_KEY_FIELD = URIRef(DF + "ForeignKeyField") + +_ENTITY_KEY = URIRef(DF + "entityKey") +_FIELD_KEY = URIRef(DF + "fieldKey") +_HAS_FIELD = URIRef(DF + "hasField") +_HAS_ACTION = URIRef(DF + "hasAction") +_ALLOWS_OPERATION = URIRef(DF + "allowsOperation") +_WRITE_OPERATION = URIRef(DF + "writeOperation") +_TARGET_ENTITY = URIRef(DF + "targetEntity") +_REQUIRES_HITL = URIRef(DF + "requiresHITL") +_MEASURE_SEMANTICS = URIRef(DF + "measureSemantics") +_CHOICESET_KEY = URIRef(DF + "choiceSetKey") +_GOVERNED_BY = URIRef(DF + "governedBy") +_REFERENCES_ENTITY = URIRef(DF + "referencesEntity") +_RELATED_ENTITY = URIRef(DF + "relatedEntity") + +_VALID_OPS = {"insert", "update", "delete"} + + +def _local_name(node: Node) -> str: + """Return the fragment / last path segment of a URI (e.g. ``ex:Order`` -> ``Order``).""" + text = str(node) + if "#" in text: + return text.rsplit("#", 1)[1] + return text.rsplit("/", 1)[-1] + + +def compile_ontology(owl_turtle: str) -> CompiledOntology: + """Parse an OWL 2 QL Turtle ontology into a ``CompiledOntology``. + + Extracts entity access modes, field semantics (measure / state / + reference), HITL markers, and entity relationships from the ``df:`` + vocabulary. Resilient to missing or partial annotations — only + extracts what is present. + + Args: + owl_turtle: The OWL ontology serialised as Turtle. + + Returns: + A ``CompiledOntology``. Empty/whitespace input yields an empty + ``CompiledOntology`` (``is_empty()`` is True). + + Raises: + OntologyCompileError: if the Turtle source is malformed. + """ + if not owl_turtle or not owl_turtle.strip(): + return CompiledOntology() + + graph = Graph() + try: + graph.parse(data=owl_turtle, format="turtle") + except Exception as exc: # rdflib raises a variety of parser exceptions + raise OntologyCompileError(f"Failed to parse OWL Turtle: {exc}") from exc + + # 1. Map each entity *individual* (URIRef) -> its df:entityKey string. + # Entities are anything carrying df:entityKey (typed/subclassed as an + # entity by the vocabulary). We key the compiled output by entityKey. + entity_key_by_uri: dict[URIRef, str] = {} + for subj, _pred, obj in graph.triples((None, _ENTITY_KEY, None)): + if isinstance(subj, URIRef) and isinstance(obj, Literal): + entity_key_by_uri[subj] = str(obj) + + # 2. Determine writable entities and their allowed operations. + entity_access: dict[str, set[str]] = {} + + def _writable_uris() -> set[URIRef]: + uris: set[URIRef] = set() + # rdf:type df:WritableEntity (RFC dialect) + for subj in graph.subjects(RDF.type, _WRITABLE_ENTITY): + if isinstance(subj, URIRef): + uris.add(subj) + # rdfs:subClassOf df:WritableEntity (.ttl dialect) + for subj in graph.subjects(RDFS.subClassOf, _WRITABLE_ENTITY): + if isinstance(subj, URIRef): + uris.add(subj) + return uris + + for entity_uri in _writable_uris(): + key = entity_key_by_uri.get(entity_uri) + if key is None: + continue + entity_access.setdefault(key, set()) + + # 2a. Direct df:allowsOperation (RFC dialect). + for subj, _pred, obj in graph.triples((None, _ALLOWS_OPERATION, None)): + key = entity_key_by_uri.get(subj) if isinstance(subj, URIRef) else None + if key is None: + continue + op = str(obj).strip().lower() + if op in _VALID_OPS: + entity_access.setdefault(key, set()).add(op) + + # 2b. Action-derived operations (.ttl dialect): an action declares a + # df:writeOperation and df:targetEntity; the entity may also bind the + # action via df:hasAction. Build action -> (op, target entity key). + action_op: dict[URIRef, str] = {} + action_target_key: dict[URIRef, str] = {} + for subj, _pred, obj in graph.triples((None, _WRITE_OPERATION, None)): + if isinstance(subj, URIRef): + op = str(obj).strip().lower() + if op in _VALID_OPS: + action_op[subj] = op + for subj, _pred, obj in graph.triples((None, _TARGET_ENTITY, None)): + if isinstance(subj, URIRef) and isinstance(obj, URIRef): + target_key = entity_key_by_uri.get(obj) + if target_key is not None: + action_target_key[subj] = target_key + # Also honour df:hasAction (entity -> action) as a target source. + for entity_uri, _pred, action_uri in graph.triples((None, _HAS_ACTION, None)): + if not (isinstance(entity_uri, URIRef) and isinstance(action_uri, URIRef)): + continue + key = entity_key_by_uri.get(entity_uri) + if key is not None: + action_target_key.setdefault(action_uri, key) + + for action_uri, op in action_op.items(): + target_key = action_target_key.get(action_uri) + if target_key is not None: + entity_access.setdefault(target_key, set()).add(op) + + # 3. HITL operations: action df:requiresHITL true -> hitl_operations. + hitl_operations: dict[str, set[str]] = {} + for subj, _pred, obj in graph.triples((None, _REQUIRES_HITL, None)): + if not isinstance(subj, URIRef): + continue + if not (isinstance(obj, Literal) and bool(obj.toPython())): + continue + target_key = action_target_key.get(subj) + op = action_op.get(subj) + if target_key is not None and op is not None: + hitl_operations.setdefault(target_key, set()).add(op) + + # 4. Field -> owning entity binding. + # Primary: ex:Entity df:hasField ex:field_... (the .ttl dialect). + field_entity_key: dict[URIRef, str] = {} + for entity_uri, _pred, field_uri in graph.triples((None, _HAS_FIELD, None)): + if not (isinstance(entity_uri, URIRef) and isinstance(field_uri, URIRef)): + continue + key = entity_key_by_uri.get(entity_uri) + if key is not None: + field_entity_key[field_uri] = key + + # Fallback: infer owning entity from the field's local name + # field__<...> matched against entity local names. + entity_localname_to_key = { + _local_name(uri): key for uri, key in entity_key_by_uri.items() + } + + def _owning_entity_key(field_uri: URIRef) -> str | None: + if field_uri in field_entity_key: + return field_entity_key[field_uri] + local = _local_name(field_uri) + if local.startswith("field_"): + remainder = local[len("field_") :] + # Greedily match the longest entity local-name prefix. + best: str | None = None + for ent_local in entity_localname_to_key: + if remainder.startswith(ent_local + "_") or remainder == ent_local: + if best is None or len(ent_local) > len(best): + best = ent_local + if best is not None: + return entity_localname_to_key[best] + return None + + def _field_key(field_uri: URIRef) -> str | None: + val = graph.value(field_uri, _FIELD_KEY) + return str(val) if val is not None else None + + def _compound_key(field_uri: URIRef) -> str | None: + entity_key = _owning_entity_key(field_uri) + field_key = _field_key(field_uri) + if entity_key is None or field_key is None: + return None + return f"{entity_key}.{field_key}" + + # 5. Field semantics. + measure_fields: dict[str, str] = {} + state_fields: dict[str, str] = {} + reference_fields: dict[str, str] = {} + + # Measure fields. + for field_uri in graph.subjects(RDF.type, _MEASURE_FIELD): + if not isinstance(field_uri, URIRef): + continue + compound = _compound_key(field_uri) + if compound is None: + continue + semantics = graph.value(field_uri, _MEASURE_SEMANTICS) + measure_fields[compound] = ( + str(semantics).strip().lower() if semantics is not None else "replacement" + ) + + # State fields -> choiceset / state-machine key. + for field_uri in graph.subjects(RDF.type, _STATE_FIELD): + if not isinstance(field_uri, URIRef): + continue + compound = _compound_key(field_uri) + if compound is None: + continue + # Prefer df:choiceSetKey (RFC); fall back to df:governedBy (.ttl). + cs = graph.value(field_uri, _CHOICESET_KEY) + if cs is not None: + state_fields[compound] = str(cs) + else: + gov = graph.value(field_uri, _GOVERNED_BY) + state_fields[compound] = _local_name(gov) if isinstance(gov, URIRef) else "" + + # Reference / foreign-key fields -> referenced entity key. + for ref_type in (_REFERENCE_FIELD, _FOREIGN_KEY_FIELD): + for field_uri in graph.subjects(RDF.type, ref_type): + if not isinstance(field_uri, URIRef): + continue + compound = _compound_key(field_uri) + if compound is None: + continue + target = graph.value(field_uri, _REFERENCES_ENTITY) + if isinstance(target, URIRef): + target_key = entity_key_by_uri.get(target) or _local_name(target) + reference_fields[compound] = target_key + + # 6. Entity-to-entity relationships (df:relatedEntity or df:referencesEntity + # where BOTH subject and object are entities). + entity_relationships: dict[str, list[str]] = {} + for pred in (_RELATED_ENTITY, _REFERENCES_ENTITY): + for subj, _pred, obj in graph.triples((None, pred, None)): + if not (isinstance(subj, URIRef) and isinstance(obj, URIRef)): + continue + subj_key = entity_key_by_uri.get(subj) + obj_key = entity_key_by_uri.get(obj) + # Only entity<->entity relationships (skip field->entity FK triples). + if subj_key is None or obj_key is None: + continue + targets = entity_relationships.setdefault(subj_key, []) + if obj_key not in targets: + targets.append(obj_key) + + compiled = CompiledOntology( + entity_access=entity_access, + measure_fields=measure_fields, + state_fields=state_fields, + reference_fields=reference_fields, + hitl_operations=hitl_operations, + entity_relationships=entity_relationships, + ) + logger.debug( + "Compiled ontology: %d writable entities, %d measure, %d state, " + "%d reference fields, %d HITL entities", + len(compiled.entity_access), + len(compiled.measure_fields), + len(compiled.state_fields), + len(compiled.reference_fields), + len(compiled.hitl_operations), + ) + return compiled diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py b/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py index ce3d6ef60..67a2d64c2 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py @@ -10,6 +10,7 @@ from uipath.platform.entities import Entity +from .compiled_ontology import CompiledOntology from .models import ( DataFabricWriteInput, EntityWriteOperation, @@ -79,16 +80,25 @@ def derive_writable_fields(entity: Entity) -> list[WritableFieldInfo]: def validate_mutation_intent( intent: DataFabricWriteInput, write_schemas: dict[str, EntityWriteSchema] | None = None, + compiled_ontology: CompiledOntology | None = None, ) -> list[str]: """Validate a write intent before executing. v1 only writes to context-derived writable entities. If the target entity is not present in *write_schemas* the request is rejected. + When a *compiled_ontology* is supplied (the optional OWL ontology layer, + see ``ontology_compiler``), the operation is additionally checked against + the ontology's per-entity access modes: an operation not listed in + ``entity_access[entity_key]`` is rejected. When the ontology is ``None`` + the metadata-only behaviour is unchanged. + Args: intent: The write operation intent to validate. write_schemas: Mapping of entity_key -> EntityWriteSchema for writable context-derived entities. + compiled_ontology: Optional compiled OWL ontology. When present its + ``entity_access`` constrains the allowed operations per entity. Returns: Empty list if valid; list of human-readable error strings otherwise. @@ -107,6 +117,24 @@ def validate_mutation_intent( ) return errors + # Ontology-derived: operation must be allowed for this entity. + # Only enforced when the ontology actually carries an access entry for + # this entity (graceful fallback when the ontology is partial/absent). + if compiled_ontology is not None: + allowed_ops = compiled_ontology.entity_access.get(intent.entity_key) + if allowed_ops is not None and op.value not in allowed_ops: + errors.append( + f"Operation '{op.value}' is not allowed on '{intent.entity_key}' " + f"by the ontology. Allowed operation(s): {sorted(allowed_ops)}" + ) + return errors + + # TODO(state-machine): when compiled_ontology.state_fields covers a field + # being written, validate that the new value is a legal transition from + # the current state. Requires reading the current record + the state + # machine's transition edges (df:fromState/df:toState). Deferred to v3 + # per RFC §9. For now state fields are validated only structurally. + # Structural: DELETE and UPDATE require record_id if op in (EntityWriteOperation.delete, EntityWriteOperation.update): if not intent.record_id: diff --git a/tests/agent/tools/datafabric_tool/test_ontology_compiler.py b/tests/agent/tools/datafabric_tool/test_ontology_compiler.py new file mode 100644 index 000000000..37dedaa28 --- /dev/null +++ b/tests/agent/tools/datafabric_tool/test_ontology_compiler.py @@ -0,0 +1,295 @@ +"""Tests for the OWL ontology compiler (refund hero case).""" + +from __future__ import annotations + +import pytest + +from uipath_langchain.agent.tools.datafabric_tool.compiled_ontology import ( + CompiledOntology, +) +from uipath_langchain.agent.tools.datafabric_tool.ontology_compiler import ( + OntologyCompileError, + compile_ontology, +) + +# A small refund-domain ontology in the .ttl dialect (subClassOf + actions + +# df:hasField + df:requiresHITL), mirroring p1-owl-write-extension.ttl. +REFUND_OWL = """ +@prefix df: . +@prefix ex: . +@prefix owl: . +@prefix rdfs: . + +# ---- Entities ---- + +# Federated / read-only: NOT a df:WritableEntity +ex:Customer a owl:Class ; + rdfs:subClassOf df:ReadableEntity ; + df:entityKey "Customer" . + +# Writable entities +ex:RefundRequest a owl:Class ; + rdfs:subClassOf df:WritableEntity ; + df:entityKey "RefundRequest" . + +ex:Order a owl:Class ; + rdfs:subClassOf df:WritableEntity ; + df:entityKey "Order" . + +ex:CustomerRisk a owl:Class ; + rdfs:subClassOf df:WritableEntity ; + df:entityKey "CustomerRisk" . + +# ---- Fields ---- + +ex:field_RefundRequest_Amount a df:MeasureField ; + df:fieldKey "ApprovedAmount" ; + df:measureSemantics "additive" . + +ex:field_Order_Status a df:StateField ; + df:fieldKey "Status" ; + df:choiceSetKey "OrderStatusChoiceSet" . + +ex:field_RefundRequest_OrderId a df:ReferenceField ; + df:fieldKey "OrderId" ; + df:referencesEntity ex:Order . + +ex:field_CustomerRisk_Score a df:MeasureField ; + df:fieldKey "RiskScore" ; + df:measureSemantics "additive" . + +# Field -> entity binding +ex:RefundRequest df:hasField ex:field_RefundRequest_Amount , + ex:field_RefundRequest_OrderId . +ex:Order df:hasField ex:field_Order_Status . +ex:CustomerRisk df:hasField ex:field_CustomerRisk_Score . + +# ---- Actions ---- + +ex:CreateRefund a df:InsertAction ; + df:writeOperation "insert" ; + df:targetEntity ex:RefundRequest ; + df:requiresHITL false . + +ex:UpdateOrder a df:UpdateAction ; + df:writeOperation "update" ; + df:targetEntity ex:Order ; + df:requiresHITL false . + +ex:DeleteRefund a df:DeleteAction ; + df:writeOperation "delete" ; + df:targetEntity ex:RefundRequest ; + df:requiresHITL true . + +ex:RefundRequest df:hasAction ex:CreateRefund , ex:DeleteRefund . +ex:Order df:hasAction ex:UpdateOrder . + +# ---- Relationships ---- +ex:RefundRequest df:relatedEntity ex:Order . +ex:RefundRequest df:relatedEntity ex:Customer . +ex:CustomerRisk df:relatedEntity ex:Customer . +""" + + +class TestCompileRefundOntology: + """End-to-end extraction over the refund hero case.""" + + @pytest.fixture + def compiled(self) -> CompiledOntology: + return compile_ontology(REFUND_OWL) + + def test_returns_compiled_ontology(self, compiled: CompiledOntology) -> None: + assert isinstance(compiled, CompiledOntology) + assert not compiled.is_empty() + + def test_writable_entities_extracted(self, compiled: CompiledOntology) -> None: + assert set(compiled.entity_access.keys()) == { + "RefundRequest", + "Order", + "CustomerRisk", + } + + def test_read_only_entity_not_writable(self, compiled: CompiledOntology) -> None: + # Customer is df:ReadableEntity only -> never in entity_access. + assert "Customer" not in compiled.entity_access + + def test_allowed_operations_from_actions(self, compiled: CompiledOntology) -> None: + # RefundRequest has an insert action and a delete action. + assert compiled.entity_access["RefundRequest"] == {"insert", "delete"} + assert compiled.entity_access["Order"] == {"update"} + # CustomerRisk is writable but has no action -> empty op set. + assert compiled.entity_access["CustomerRisk"] == set() + + def test_measure_field_additive(self, compiled: CompiledOntology) -> None: + assert compiled.measure_fields["RefundRequest.ApprovedAmount"] == "additive" + assert compiled.measure_fields["CustomerRisk.RiskScore"] == "additive" + + def test_state_field_choiceset(self, compiled: CompiledOntology) -> None: + assert compiled.state_fields["Order.Status"] == "OrderStatusChoiceSet" + + def test_reference_field_target(self, compiled: CompiledOntology) -> None: + assert compiled.reference_fields["RefundRequest.OrderId"] == "Order" + + def test_hitl_on_destructive_op(self, compiled: CompiledOntology) -> None: + # DeleteRefund requires HITL -> delete op flagged on RefundRequest. + assert compiled.hitl_operations.get("RefundRequest") == {"delete"} + # Non-destructive ops are not flagged. + assert "Order" not in compiled.hitl_operations + + def test_entity_relationships(self, compiled: CompiledOntology) -> None: + rels = compiled.entity_relationships["RefundRequest"] + assert set(rels) == {"Order", "Customer"} + assert compiled.entity_relationships["CustomerRisk"] == ["Customer"] + + +class TestRfcDialect: + """The RFC §4.1 dialect: rdf:type df:WritableEntity + df:allowsOperation.""" + + def test_allows_operation_dialect(self) -> None: + owl = """ + @prefix df: . + @prefix ex: . + @prefix owl: . + + ex:Contact a owl:Class, df:WritableEntity ; + df:entityKey "Contact" ; + df:allowsOperation "update" . + + ex:RefundRequest a owl:Class, df:WritableEntity ; + df:entityKey "RefundRequest" ; + df:allowsOperation "insert" . + """ + compiled = compile_ontology(owl) + assert compiled.entity_access["Contact"] == {"update"} + assert compiled.entity_access["RefundRequest"] == {"insert"} + + def test_field_binding_via_local_name_fallback(self) -> None: + # No df:hasField -> compiler infers owner from field__ name. + owl = """ + @prefix df: . + @prefix ex: . + @prefix owl: . + + ex:CustomerRisk a owl:Class, df:WritableEntity ; + df:entityKey "CustomerRisk" ; + df:allowsOperation "update" . + + ex:field_CustomerRisk_Score a df:MeasureField ; + df:fieldKey "RiskScore" ; + df:measureSemantics "additive" . + """ + compiled = compile_ontology(owl) + assert compiled.measure_fields["CustomerRisk.RiskScore"] == "additive" + + +class TestGracefulPaths: + """Empty / partial / malformed inputs.""" + + def test_empty_string_returns_empty_ontology(self) -> None: + compiled = compile_ontology("") + assert isinstance(compiled, CompiledOntology) + assert compiled.is_empty() + + def test_whitespace_returns_empty_ontology(self) -> None: + assert compile_ontology(" \n ").is_empty() + + def test_prefixes_only_returns_empty(self) -> None: + owl = "@prefix df: .\n" + assert compile_ontology(owl).is_empty() + + def test_partial_ontology_extracts_what_is_present(self) -> None: + # Only one writable entity, no fields/actions/relationships. + owl = """ + @prefix df: . + @prefix ex: . + @prefix owl: . + + ex:Order a owl:Class, df:WritableEntity ; + df:entityKey "Order" . + """ + compiled = compile_ontology(owl) + assert compiled.entity_access == {"Order": set()} + assert compiled.measure_fields == {} + assert compiled.entity_relationships == {} + + def test_malformed_turtle_raises_compile_error(self) -> None: + owl = "this is not valid turtle <<< @@@ ;;;" + with pytest.raises(OntologyCompileError): + compile_ontology(owl) + + def test_measure_field_without_semantics_defaults_replacement(self) -> None: + owl = """ + @prefix df: . + @prefix ex: . + @prefix owl: . + + ex:Acct a owl:Class, df:WritableEntity ; df:entityKey "Acct" . + ex:field_Acct_Bal a df:MeasureField ; df:fieldKey "Balance" . + ex:Acct df:hasField ex:field_Acct_Bal . + """ + compiled = compile_ontology(owl) + assert compiled.measure_fields["Acct.Balance"] == "replacement" + + +# The order-management domain, mirroring p1-owl-write-extension.ttl exactly +# (subClassOf dialect, action-derived ops, df:hasField, df:relatedEntity). +# Self-contained so the test does not depend on the sibling df-agent-os repo. +ORDER_OWL = """ +@prefix df: . +@prefix ex: . +@prefix owl: . +@prefix rdfs: . + +ex:Customer a owl:Class ; rdfs:subClassOf df:ReadableEntity ; df:entityKey "Customer" . +ex:Product a owl:Class ; rdfs:subClassOf df:ReadableEntity ; df:entityKey "Product" . +ex:Order a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "Order" . +ex:OrderItem a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "OrderItem" . + +ex:field_Order_Status a df:StateField ; + df:fieldKey "Status" ; df:governedBy ex:OrderStatusMachine . +ex:field_Order_CustomerId a df:ReferenceField ; + df:fieldKey "CustomerId" ; df:referencesEntity ex:Customer . +ex:field_OrderItem_OrderId a df:ReferenceField ; + df:fieldKey "OrderId" ; df:referencesEntity ex:Order . +ex:field_OrderItem_ProductId a df:ReferenceField ; + df:fieldKey "ProductId" ; df:referencesEntity ex:Product . + +ex:Order df:hasField ex:field_Order_Status , ex:field_Order_CustomerId . +ex:OrderItem df:hasField ex:field_OrderItem_OrderId , ex:field_OrderItem_ProductId . + +ex:CreateOrder a df:InsertAction ; df:writeOperation "insert" ; df:targetEntity ex:Order . +ex:UpdateOrderStatus a df:UpdateAction ; df:writeOperation "update" ; + df:targetEntity ex:Order ; df:requiresHITL true . +ex:AddOrderItem a df:InsertAction ; df:writeOperation "insert" ; df:targetEntity ex:OrderItem . +ex:DeleteOrderItem a df:DeleteAction ; df:writeOperation "delete" ; + df:targetEntity ex:OrderItem ; df:requiresHITL true . + +ex:Order df:hasAction ex:CreateOrder , ex:UpdateOrderStatus . +ex:OrderItem df:hasAction ex:AddOrderItem , ex:DeleteOrderItem . + +ex:Order df:relatedEntity ex:Customer , ex:OrderItem . +ex:OrderItem df:relatedEntity ex:Order , ex:Product . +""" + + +def test_compiles_order_management_dialect(): + """The order-management example (mirrors p1-owl-write-extension.ttl) compiles. + + Guards the .ttl dialect: rdfs:subClassOf df:WritableEntity, action-derived + operations, df:hasField binding, df:governedBy state machines, and + df:relatedEntity relationships. Read-only entities (Customer, Product) + must be excluded from the writable set. + """ + compiled = compile_ontology(ORDER_OWL) + + assert not compiled.is_empty() + assert compiled.entity_access["Order"] == {"insert", "update"} + assert compiled.entity_access["OrderItem"] == {"insert", "delete"} + assert "Customer" not in compiled.entity_access + assert "Product" not in compiled.entity_access + assert compiled.hitl_operations["Order"] == {"update"} + assert compiled.hitl_operations["OrderItem"] == {"delete"} + assert compiled.state_fields["Order.Status"] == "OrderStatusMachine" + assert compiled.reference_fields["Order.CustomerId"] == "Customer" + assert compiled.reference_fields["OrderItem.OrderId"] == "Order" + assert compiled.reference_fields["OrderItem.ProductId"] == "Product" diff --git a/tests/agent/tools/datafabric_tool/test_write_validation.py b/tests/agent/tools/datafabric_tool/test_write_validation.py index 01260ac48..43c0770eb 100644 --- a/tests/agent/tools/datafabric_tool/test_write_validation.py +++ b/tests/agent/tools/datafabric_tool/test_write_validation.py @@ -4,6 +4,9 @@ from unittest.mock import MagicMock +from uipath_langchain.agent.tools.datafabric_tool.compiled_ontology import ( + CompiledOntology, +) from uipath_langchain.agent.tools.datafabric_tool.models import ( DataFabricWriteInput, EntityWriteOperation, @@ -431,3 +434,86 @@ def test_multiple_errors(self) -> None: ) errors = validate_mutation_intent(intent, self._schema()) assert len(errors) == 2 # unknown field + missing required + + +class TestValidateMutationIntentWithOntology: + """Ontology-constrained operation validation (optional layer).""" + + def _schema(self) -> dict[str, EntityWriteSchema]: + return { + "Orders": EntityWriteSchema( + entity_key="Orders", + display_name="Orders", + writable_fields=[ + WritableFieldInfo( + name="OrderName", + display_name="Order Name", + type_name="varchar", + is_required=True, + ), + WritableFieldInfo( + name="Amount", + display_name="Amount", + type_name="decimal", + is_required=False, + ), + ], + ) + } + + def test_ontology_disallows_operation_returns_error(self) -> None: + """Ontology allows only update on Orders -> insert is rejected.""" + ontology = CompiledOntology(entity_access={"Orders": {"update"}}) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"OrderName": "Test"}, + ) + errors = validate_mutation_intent(intent, self._schema(), ontology) + assert len(errors) == 1 + assert "not allowed" in errors[0] + assert "insert" in errors[0] + assert "update" in errors[0] # lists allowed ops + + def test_ontology_allows_operation_passes(self) -> None: + """Ontology allows insert -> a valid insert passes.""" + ontology = CompiledOntology(entity_access={"Orders": {"insert", "update"}}) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"OrderName": "Test", "Amount": 50}, + ) + errors = validate_mutation_intent(intent, self._schema(), ontology) + assert errors == [] + + def test_ontology_allows_operation_update_passes(self) -> None: + ontology = CompiledOntology(entity_access={"Orders": {"update"}}) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.update, + record_id="rec-1", + fields={"Amount": 99}, + ) + errors = validate_mutation_intent(intent, self._schema(), ontology) + assert errors == [] + + def test_ontology_none_preserves_existing_behavior(self) -> None: + """ontology=None -> metadata-only validation, insert still allowed.""" + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"OrderName": "Test", "Amount": 50}, + ) + errors = validate_mutation_intent(intent, self._schema(), None) + assert errors == [] + + def test_ontology_without_entry_for_entity_does_not_constrain(self) -> None: + """Entity absent from ontology.entity_access -> no op constraint applied.""" + ontology = CompiledOntology(entity_access={"OtherEntity": {"update"}}) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"OrderName": "Test"}, + ) + errors = validate_mutation_intent(intent, self._schema(), ontology) + assert errors == [] diff --git a/uv.lock b/uv.lock index 51ceb50ae..fc9becb00 100644 --- a/uv.lock +++ b/uv.lock @@ -3412,6 +3412,15 @@ crypto = [ { name = "cryptography" }, ] +[[package]] +name = "pyparsing" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload-time = "2026-01-21T03:57:59.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, +] + [[package]] name = "pysignalr" version = "1.3.0" @@ -3699,6 +3708,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "rdflib" +version = "7.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyparsing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/f5/18bb77b7af9526add0c727a3b2048959847dc5fb030913e2918bf384fec3/rdflib-7.6.0.tar.gz", hash = "sha256:6c831288d5e4a5a7ece85d0ccde9877d512a3d0f02d7c06455d00d6d0ea379df", size = 4943826, upload-time = "2026-02-13T07:15:55.938Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/c2/6604a71269e0c1bd75656d5a001432d16f2cc5b8c057140ec797155c295e/rdflib-7.6.0-py3-none-any.whl", hash = "sha256:30c0a3ebf4c0e09215f066be7246794b6492e054e782d7ac2a34c9f70a15e0dd", size = 615416, upload-time = "2026-02-13T07:15:46.487Z" }, +] + [[package]] name = "referencing" version = "0.37.0" @@ -4405,6 +4426,7 @@ dependencies = [ { name = "pillow" }, { name = "pydantic-settings" }, { name = "python-dotenv" }, + { name = "rdflib" }, { name = "uipath" }, { name = "uipath-core" }, { name = "uipath-langchain-client", extra = ["openai"] }, @@ -4463,6 +4485,7 @@ requires-dist = [ { name = "pillow", specifier = ">=12.1.1" }, { name = "pydantic-settings", specifier = ">=2.6.0" }, { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "rdflib", specifier = ">=7.0.0,<8.0.0" }, { name = "uipath", specifier = ">=2.10.79,<2.12.0" }, { name = "uipath-core", specifier = ">=0.5.20,<0.6.0" }, { name = "uipath-langchain-client", extras = ["all"], marker = "extra == 'all'", specifier = ">=1.14.0,<1.15.0" }, From 4ea506293f3be7e69629d61e581f396e4385b311 Mon Sep 17 00:00:00 2001 From: Harshit Rohatgi Date: Sat, 27 Jun 2026 12:13:11 +0530 Subject: [PATCH 3/6] feat: CLI helper to run a coded agent with an injected OWL ontology MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scripts/run_agent_with_ontology.py bridges the gap until the platform ships ontology storage/fetch: it monkeypatches EntitiesService.get_ontology_file_async (currently absent) to return a user-supplied .ttl, which activates the real _maybe_compile_ontology path in DataFabricWriteHandler — the ontology is compiled and used in write validation + tool description exactly as it will be once the platform method lands. Usage: python scripts/run_agent_with_ontology.py \ --ontology PATH.ttl --entity-set PATH.json --prompt "..." \ [--model NAME] [--system-prompt PATH.txt] [--dry-run] --dry-run compiles the ontology and prints the extracted facts (entity_access, hitl_operations, state/reference/measure fields, relationships) WITHOUT network. The live run needs UiPath auth env vars + real tenant entity ids. Companions: - sample_refund_entity_set.json (hero-case entities, placeholder ids) - sample_refund_sop.txt (refund SOP from RFC §4.3) - README_run_agent_with_ontology.md (mechanism + offline/real run steps) Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/README_run_agent_with_ontology.md | 110 ++++++ scripts/run_agent_with_ontology.py | 415 ++++++++++++++++++++++ scripts/sample_refund_entity_set.json | 37 ++ scripts/sample_refund_sop.txt | 39 ++ 4 files changed, 601 insertions(+) create mode 100644 scripts/README_run_agent_with_ontology.md create mode 100644 scripts/run_agent_with_ontology.py create mode 100644 scripts/sample_refund_entity_set.json create mode 100644 scripts/sample_refund_sop.txt diff --git a/scripts/README_run_agent_with_ontology.md b/scripts/README_run_agent_with_ontology.md new file mode 100644 index 000000000..01ca7ca25 --- /dev/null +++ b/scripts/README_run_agent_with_ontology.md @@ -0,0 +1,110 @@ +# run_agent_with_ontology.py + +A standalone CLI helper that loads an OWL ontology, injects it into the Data +Fabric **write** tool's fetch path, and runs a coded ReAct agent against a Data +Fabric entity set. + +This is a developer helper, not part of the shipped package. No tests cover it. + +## What it does + +1. **Compiles + prints the ontology up front.** It runs + `compile_ontology()` on your `.ttl` and prints the extracted facts — + `entity_access`, `hitl_operations`, `state_fields`, `reference_fields`, + `measure_fields`, `entity_relationships`. This validates the `.ttl` and + shows exactly what the ontology contributes before any agent work. This step + needs **no network**. +2. **Bridges a not-yet-shipped platform method** (see below). +3. Builds the Data Fabric read + write tools, force-initializes the write + handler, prints the compiled ontology actually in use and the generated + write tool description, then (unless `--dry-run`) runs the agent. + +## The monkeypatch bridge — and why it exists + +The runtime's `DataFabricWriteHandler._maybe_compile_ontology()` fetches the +ontology by calling: + +```python +entities_service.get_ontology_file_async("owl") +``` + +**The platform does not yet expose that method.** Verified: the attribute is +absent on +`uipath.platform.entities._entities_service.EntitiesService`. When it is +missing, the handler degrades to the metadata-only write path +(`_compiled_ontology` stays `None`). + +This CLI closes that gap by monkeypatching the method onto the class **before** +the handler runs: + +``` +uipath.platform.entities._entities_service.EntitiesService.get_ontology_file_async +``` + +The injected async method returns the text of your `--ontology` file for the +`"owl"` file type. The handler's own `_maybe_compile_ontology` then discovers +it via `getattr`, compiles it, and uses it in write validation and the write +tool description — exactly as it will behave once the platform ships the real +method. + +A class-level patch is used (not an instance patch) because the handler +constructs `UiPath()` internally and resolves the `EntitiesService` lazily, so +the instance is never reachable from the CLI. If the class patch ever fails, +the script falls back to setting `handler._compiled_ontology` directly and +rebuilding the description. + +After initialization the script prints either: + +- `ontology ACTIVE` — the handler compiled and is using the ontology, or +- `ontology INACTIVE (fell back to metadata-only)` — it did not. + +## Run it offline (dry-run) + +The ontology compilation + fact printing needs no network. Building tools / +resolving entities **does** need UiPath auth + network; in `--dry-run` that +failure is caught and the script still prints the standalone ontology facts and +exits `0`. + +```bash +uv run python scripts/run_agent_with_ontology.py \ + --ontology /Users/harshit/DF-Agents-2/df-agent-os/roadmap/p1-owl-write-extension.ttl \ + --entity-set scripts/sample_refund_entity_set.json \ + --prompt "test" --dry-run +``` + +## Run it for real (against staging) + +1. `uip login` (sets `UIPATH_ACCESS_TOKEN`, `UIPATH_URL`, + `UIPATH_TENANT_ID`, `UIPATH_ORGANIZATION_ID`). +2. Edit `scripts/sample_refund_entity_set.json` and replace the **placeholder + fake UUIDs** (`id`, `folderId`, `referenceKey`) with the real ids for your + tenant's entities. The shipped values are clearly fake and will not resolve. +3. Run without `--dry-run`: + +```bash +uv run python scripts/run_agent_with_ontology.py \ + --ontology /path/to/ontology.ttl \ + --entity-set scripts/sample_refund_entity_set.json \ + --prompt "Process the refund for contact Jane Doe on order PO-1042" \ + --model anthropic.claude-sonnet-4-5-20250929-v1:0 \ + --system-prompt scripts/sample_refund_sop.txt +``` + +## Options + +| Flag | Required | Description | +|------|----------|-------------| +| `--ontology` | yes | Path to the OWL 2 QL Turtle `.ttl` file. | +| `--entity-set` | yes | JSON list of `DataFabricEntityItem` dicts (`id`, `name`, `folderId`, `referenceKey`, `description`). | +| `--prompt` | yes | The user prompt for the agent. | +| `--model` | no | UiPath-gateway model name. Default: `anthropic.claude-sonnet-4-5-20250929-v1:0`. | +| `--system-prompt` | no | Path to a system-prompt/SOP `.txt`. Generic default when omitted. | +| `--resource-name` | no | Name for the Data Fabric context resource. Default: `datafabric`. | +| `--dry-run` | no | Do not call the LLM; build tools, inject the ontology, print facts + write tool description, exit. Degrades gracefully offline. | + +## Sample files + +- `sample_refund_entity_set.json` — refund hero-case entities (Customer, + Contact, Order/PurchaseOrder, CustomerRisk, RefundRequest) with **placeholder + fake ids**. Fill in real tenant ids to run against staging. +- `sample_refund_sop.txt` — the refund SOP (RFC §4.3) as a system prompt. diff --git a/scripts/run_agent_with_ontology.py b/scripts/run_agent_with_ontology.py new file mode 100644 index 000000000..088a4feef --- /dev/null +++ b/scripts/run_agent_with_ontology.py @@ -0,0 +1,415 @@ +#!/usr/bin/env python +"""Run a coded Data Fabric agent with an OWL ontology injected into the write path. + +This is a standalone CLI helper, not part of the shipped package. + +What it does +------------ +1. Loads an OWL 2 QL Turtle ontology and compiles it up front with + ``compile_ontology`` so the user can see exactly what the ontology + contributes (entity access, HITL operations, state/reference/measure + fields, relationships) before any agent work happens. This also + validates the .ttl. +2. Bridges a gap in the platform package: the runtime's + ``DataFabricWriteHandler._maybe_compile_ontology`` calls + ``entities_service.get_ontology_file_async("owl")`` to fetch the ontology, + but the platform does NOT yet expose that method (verified: the attribute + is absent on ``uipath.platform.entities._entities_service.EntitiesService``). + This CLI monkeypatches that method onto the class so it returns the + user-supplied .ttl text. That activates the real ontology-compilation path + inside the handler exactly as it will behave once the platform ships the + method -- the ontology is compiled and used in write validation and the + write tool's description. +3. Builds the Data Fabric read + write tools, force-initializes the write + handler so the compiled ontology and the generated write tool description + can be printed, then (unless ``--dry-run``) runs the coded ReAct agent. + +The monkeypatch target is:: + + uipath.platform.entities._entities_service.EntitiesService.get_ontology_file_async + +Usage +----- +Offline dry-run (compiles + prints ontology facts without the LLM; degrades +gracefully if entity resolution needs network and fails):: + + uv run python scripts/run_agent_with_ontology.py \ + --ontology /path/to/p1-owl-write-extension.ttl \ + --entity-set scripts/sample_refund_entity_set.json \ + --prompt "test" --dry-run + +Real run against staging (requires ``uip login`` + real entity ids in the +entity-set JSON):: + + uv run python scripts/run_agent_with_ontology.py \ + --ontology /path/to/ontology.ttl \ + --entity-set scripts/sample_refund_entity_set.json \ + --prompt "Process the refund for contact ..." \ + --model anthropic.claude-sonnet-4-5-20250929-v1:0 +""" + +from __future__ import annotations + +import argparse +import asyncio +import sys +from pathlib import Path +from typing import Any + +# The handler resolves entities lazily through ``UiPath().entities`` and the +# EntitiesService instance is not reachable from here, so we patch the method +# at the class level. The handler's own ``_maybe_compile_ontology`` then finds +# it via ``getattr`` and fetches the ontology naturally. +ENTITIES_SERVICE_MODULE = "uipath.platform.entities._entities_service" + +# Default to a UiPath-gateway-routable model. Override with --model. +DEFAULT_MODEL = "anthropic.claude-sonnet-4-5-20250929-v1:0" + +DEFAULT_SYSTEM_PROMPT = ( + "You are a Data Fabric operations agent. Use the read tool to discover " + "records and field values before writing. Use the write tool only with " + "valid entity keys, operations, and fields. Respect any human-in-the-loop " + "(HITL) requirements: for operations marked HITL, ask for explicit " + "confirmation before executing. Never invent record IDs -- always look " + "them up first." +) + +# Module-level holder so the monkeypatched method (defined once, bound to the +# class) can read whichever ontology text the current CLI invocation supplied. +_ONTOLOGY_TEXT: str | None = None + + +def _install_ontology_monkeypatch(ttl_text: str) -> str: + """Patch EntitiesService.get_ontology_file_async to return the .ttl text. + + Returns a human-readable description of the exact patch target. Raises on + failure so the caller can fall back to the direct-set strategy. + """ + global _ONTOLOGY_TEXT + _ONTOLOGY_TEXT = ttl_text + + import importlib + + module = importlib.import_module(ENTITIES_SERVICE_MODULE) + service_cls = module.EntitiesService + + async def get_ontology_file_async(self: Any, file_type: str = "owl") -> str | None: + """Injected by run_agent_with_ontology.py (CLI bridge). + + Returns the user-supplied ontology text for the ``owl`` file type. + """ + if file_type and file_type.lower() != "owl": + return None + return _ONTOLOGY_TEXT + + service_cls.get_ontology_file_async = get_ontology_file_async # type: ignore[attr-defined] + return f"{ENTITIES_SERVICE_MODULE}.EntitiesService.get_ontology_file_async" + + +def _print_ontology_facts(compiled: Any, *, header: str) -> None: + """Pretty-print the facts extracted from a CompiledOntology.""" + print(f"\n=== {header} ===") + if compiled is None: + print("(no compiled ontology)") + return + if hasattr(compiled, "is_empty") and compiled.is_empty(): + print("(ontology compiled but EMPTY -- no facts extracted)") + return + + def _fmt_set_map(m: dict[str, Any]) -> str: + if not m: + return " (none)" + lines = [] + for key in sorted(m): + val = m[key] + if isinstance(val, set): + val = sorted(val) + lines.append(f" {key}: {val}") + return "\n".join(lines) + + print(" entity_access (entity -> allowed ops):") + print(_fmt_set_map(compiled.entity_access)) + print(" hitl_operations (entity -> ops requiring HITL):") + print(_fmt_set_map(compiled.hitl_operations)) + print(" state_fields (entity.field -> state machine / choiceset):") + print(_fmt_set_map(compiled.state_fields)) + print(" reference_fields (entity.field -> referenced entity):") + print(_fmt_set_map(compiled.reference_fields)) + print(" measure_fields (entity.field -> semantics):") + print(_fmt_set_map(compiled.measure_fields)) + print(" entity_relationships (entity -> related entities):") + print(_fmt_set_map(compiled.entity_relationships)) + + +def _load_entity_set(path: Path) -> list[Any]: + """Load a JSON list of DataFabricEntityItem dicts into model instances.""" + import json + + from uipath.platform.entities import DataFabricEntityItem + + raw = json.loads(path.read_text()) + if not isinstance(raw, list): + raise ValueError( + f"Entity-set JSON must be a list of objects; got {type(raw).__name__}." + ) + return [DataFabricEntityItem.model_validate(item) for item in raw] + + +def _find_write_handler(tools: list[Any]) -> Any: + """Locate the DataFabricWriteHandler from the created tools.""" + for tool in tools: + meta = getattr(tool, "metadata", None) or {} + if meta.get("tool_type") == "datafabric_write": + # The handler is the tool's coroutine callable. + handler = getattr(tool, "coroutine", None) or getattr(tool, "func", None) + return tool, handler + return None, None + + +class _AuthOrNetworkError(RuntimeError): + """Raised to signal an expected auth/network failure (no traceback wanted).""" + + +async def _async_main(args: argparse.Namespace) -> int: + ontology_path = Path(args.ontology).expanduser() + entity_set_path = Path(args.entity_set).expanduser() + + if not ontology_path.is_file(): + print(f"ERROR: ontology file not found: {ontology_path}", file=sys.stderr) + return 2 + if not entity_set_path.is_file(): + print(f"ERROR: entity-set file not found: {entity_set_path}", file=sys.stderr) + return 2 + + ttl_text = ontology_path.read_text() + + # --- Step 1: compile + print ontology facts up front (no network). ------- + # This validates the .ttl before any agent work and shows the user exactly + # what the ontology contributes. + from uipath_langchain.agent.tools.datafabric_tool.ontology_compiler import ( + OntologyCompileError, + compile_ontology, + ) + + try: + standalone_compiled = compile_ontology(ttl_text) + except OntologyCompileError as exc: + print(f"ERROR: failed to compile ontology: {exc}", file=sys.stderr) + return 2 + + print(f"Loaded ontology: {ontology_path}") + _print_ontology_facts( + standalone_compiled, header="ONTOLOGY FACTS (standalone compile)" + ) + + # --- Step 2: load entity set. -------------------------------------------- + try: + entity_items = _load_entity_set(entity_set_path) + except Exception as exc: + print(f"ERROR: failed to load entity-set JSON: {exc}", file=sys.stderr) + return 2 + print(f"\nLoaded {len(entity_items)} entity item(s) from {entity_set_path}") + + # --- Step 3: build the resource config. ---------------------------------- + from uipath.agent.models.agent import ( + AgentContextResourceConfig, + AgentContextType, + ) + + resource = AgentContextResourceConfig( + name=args.resource_name, + description="Data Fabric entity set for the ontology-injected agent run.", + context_type=AgentContextType.DATA_FABRIC_ENTITY_SET, + entity_set=entity_items, + ) + + # --- Step 4: install the monkeypatch BEFORE the handler runs. ------------ + try: + patch_target = _install_ontology_monkeypatch(ttl_text) + print(f"\nInstalled ontology bridge at: {patch_target}") + except Exception as exc: + print( + f"WARNING: class-level monkeypatch failed ({exc}); " + "will fall back to direct-set on the handler.", + file=sys.stderr, + ) + patch_target = None + + # --- Step 5 + 6: build the LLM and the Data Fabric tools. ---------------- + # Everything from here may need network/auth. Wrap so the common auth case + # degrades gracefully (and, in --dry-run, still shows the ontology facts). + try: + from uipath_langchain.agent.tools.datafabric_tool import ( + create_datafabric_tools, + ) + from uipath_langchain.chat import get_chat_model + + try: + llm = get_chat_model(args.model) + except Exception as exc: + raise _AuthOrNetworkError( + f"could not construct chat model {args.model!r}: {exc}" + ) from exc + + tools = create_datafabric_tools(resource, llm) + write_tool, write_handler = _find_write_handler(tools) + if write_handler is None: + print("ERROR: could not locate the write tool/handler.", file=sys.stderr) + return 2 + + # --- Step 7: force-initialize the write handler (needs network). ----- + try: + await write_handler._ensure_initialized() + except Exception as exc: + raise _AuthOrNetworkError(f"entity resolution failed: {exc}") from exc + + except _AuthOrNetworkError as exc: + print( + "\n--- Tool build / entity resolution did not complete ---\n" + f"What failed: {exc}\n" + "This step needs UiPath auth + network (UiPath(), the LLM gateway, " + "and staging entity resolution). Run `uip login` and ensure the " + "entity-set JSON has real entity ids/folderIds for your tenant.", + file=sys.stderr, + ) + if args.dry_run: + print( + "\n--dry-run: the standalone ontology facts above were compiled " + "WITHOUT network and are valid. Skipping the live write tool " + "description (it requires resolved entities)." + ) + return 0 + return 1 + + # --- Verify the ontology is actually ACTIVE in the handler. -------------- + compiled = getattr(write_handler, "_compiled_ontology", None) + + if compiled is None and patch_target is None: + # Fallback strategy: directly set the compiled ontology and rebuild the + # description. Used only if class-level patching was unavailable. + from uipath_langchain.agent.tools.datafabric_tool.write_schema_builder import ( + build_write_tool_description, + ) + + compiled = standalone_compiled + write_handler._compiled_ontology = compiled + write_handler._write_tool_description = build_write_tool_description( + write_handler._write_schemas, + entity_access=compiled.entity_access, + ) + print("Applied direct-set fallback for the compiled ontology.") + + if compiled is not None and not ( + hasattr(compiled, "is_empty") and compiled.is_empty() + ): + print("\nontology ACTIVE -- handler compiled and is using the ontology.") + _print_ontology_facts(compiled, header="ONTOLOGY FACTS (active in handler)") + else: + print( + "\nontology INACTIVE (fell back to metadata-only). The handler did " + "not pick up a compiled ontology." + ) + + description = getattr(write_handler, "_write_tool_description", None) or getattr( + write_tool, "description", "" + ) + print("\n=== GENERATED WRITE TOOL DESCRIPTION ===") + print(description) + + # --- Step 8: dry-run stops here. ----------------------------------------- + if args.dry_run: + print("\n--dry-run: not invoking the LLM. Exiting.") + return 0 + + # --- Step 9: build messages, the agent, and run it. ---------------------- + from langchain_core.messages import HumanMessage, SystemMessage + + from uipath_langchain.agent.react import create_agent + + if args.system_prompt: + system_text = Path(args.system_prompt).expanduser().read_text() + else: + system_text = DEFAULT_SYSTEM_PROMPT + + messages = [SystemMessage(content=system_text), HumanMessage(content=args.prompt)] + + try: + graph = create_agent(llm, tools, messages).compile() + result = await graph.ainvoke({"messages": [HumanMessage(content=args.prompt)]}) + except Exception as exc: + print( + "\n--- Agent run failed ---\n" + f"What failed: {exc}\n" + "The agent run needs the LLM gateway + staging entities. " + "Use --dry-run to validate the ontology compilation offline.", + file=sys.stderr, + ) + return 1 + + print("\n=== AGENT RUN RESULT ===") + result_messages = result.get("messages", []) if isinstance(result, dict) else [] + for msg in result_messages: + tool_calls = getattr(msg, "tool_calls", None) + if tool_calls: + for call in tool_calls: + name = call.get("name") if isinstance(call, dict) else None + cargs = call.get("args") if isinstance(call, dict) else None + print(f"[tool call] {name}({cargs})") + final = result_messages[-1] if result_messages else None + print("\n=== FINAL MESSAGE ===") + print(getattr(final, "content", final) if final is not None else "(no messages)") + return 0 + + +def _build_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=( + "Load an OWL ontology, inject it into the Data Fabric write tool's " + "fetch path, and run a coded agent." + ) + ) + parser.add_argument( + "--ontology", required=True, help="Path to the OWL 2 QL Turtle (.ttl) file." + ) + parser.add_argument( + "--entity-set", + required=True, + help="Path to a JSON list of DataFabricEntityItem dicts " + "(id, name, folderId, referenceKey, description).", + ) + parser.add_argument( + "--prompt", required=True, help="The user prompt for the agent." + ) + parser.add_argument( + "--model", + default=DEFAULT_MODEL, + help=f"UiPath-gateway model name (default: {DEFAULT_MODEL}).", + ) + parser.add_argument( + "--system-prompt", + default=None, + help="Optional path to a system-prompt/SOP .txt file. A generic default " + "is used when omitted.", + ) + parser.add_argument( + "--resource-name", + default="datafabric", + help="Name for the Data Fabric context resource (default: datafabric).", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Do NOT call the LLM. Build tools, compile + inject the ontology, " + "print the ontology facts and the generated write tool description, " + "then exit. Degrades gracefully offline.", + ) + return parser + + +def main() -> int: + args = _build_arg_parser().parse_args() + return asyncio.run(_async_main(args)) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/sample_refund_entity_set.json b/scripts/sample_refund_entity_set.json new file mode 100644 index 000000000..4161ef1e8 --- /dev/null +++ b/scripts/sample_refund_entity_set.json @@ -0,0 +1,37 @@ +[ + { + "id": "00000000-0000-0000-0000-0000000c0001", + "name": "Customer", + "folderId": "00000000-0000-0000-0000-00000000f001", + "referenceKey": "11111111-1111-1111-1111-111111110001", + "description": "A customer account. Read-only in the refund context: used to look up the customer's region, tier, and risk." + }, + { + "id": "00000000-0000-0000-0000-0000000c0002", + "name": "Contact", + "folderId": "00000000-0000-0000-0000-00000000f001", + "referenceKey": "11111111-1111-1111-1111-111111110002", + "description": "A contact person belonging to a customer. Read-only: used to resolve the person who requested the refund." + }, + { + "id": "00000000-0000-0000-0000-0000000c0003", + "name": "Order", + "folderId": "00000000-0000-0000-0000-00000000f001", + "referenceKey": "11111111-1111-1111-1111-111111110003", + "description": "A customer purchase order (PurchaseOrder). Writable: status may be updated as part of refund processing." + }, + { + "id": "00000000-0000-0000-0000-0000000c0004", + "name": "CustomerRisk", + "folderId": "00000000-0000-0000-0000-00000000f001", + "referenceKey": "11111111-1111-1111-1111-111111110004", + "description": "Risk profile for a customer (fraud score, refund history). Read-only: consulted before approving a refund." + }, + { + "id": "00000000-0000-0000-0000-0000000c0005", + "name": "RefundRequest", + "folderId": "00000000-0000-0000-0000-00000000f001", + "referenceKey": "11111111-1111-1111-1111-111111110005", + "description": "A refund request against an order. Writable: the agent creates and updates refund requests, and cancellation requires human confirmation." + } +] diff --git a/scripts/sample_refund_sop.txt b/scripts/sample_refund_sop.txt new file mode 100644 index 000000000..5eba61071 --- /dev/null +++ b/scripts/sample_refund_sop.txt @@ -0,0 +1,39 @@ +You are a Data Fabric refund-processing agent operating under the P1 LDO +writes-with-reads SOP (RFC v2, section 4.3). Your job is to process customer +refund requests safely, with reads informing every write and deterministic +guardrails on destructive operations. + +Standard operating procedure: + +1. Identify the requester. Use the read tool to resolve the Contact and the + owning Customer from the details in the user's request. Never assume a + record id -- always look it up. + +2. Locate the order. Read the Order (PurchaseOrder) the refund concerns. + Confirm it exists and capture its current status and amount before any + write. + +3. Check risk. Read the CustomerRisk profile for the customer. If the fraud + score is elevated or refund history is unusual, do not auto-approve -- + surface the risk and ask the human to decide. + +4. Create or update the refund. Use the write tool only with valid entity + keys, operations, and fields discovered from the schema and ontology. + Respect required fields and foreign-key references (e.g. link the + RefundRequest to the correct Order and Customer ids you read in steps 1-2). + +5. Honor HITL. For any operation the ontology marks as human-in-the-loop + (for example cancellation or any destructive/terminal transition), STOP and + ask the user for explicit confirmation before calling the write tool. Do + not proceed on an HITL operation without an explicit "yes". + +6. Respect state transitions. When updating a status field governed by a state + machine, only move to a state reachable from the current one. If the + requested transition is invalid, explain why and ask for clarification + instead of submitting it. + +7. Report. Summarize what you read, what you wrote, and any item you escalated + for human confirmation. + +Never invent customer status, order ids, or risk findings. Separate what you +observed (evidence) from what you recommend. From 8a10b1ad8811f97477240a53199667955a1eb600 Mon Sep 17 00:00:00 2001 From: Harshit Rohatgi Date: Sat, 27 Jun 2026 13:27:05 +0530 Subject: [PATCH 4/6] fix(datafabric writes) + ontology write POC, validated on staging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two write-path bugs found while validating the ontology POC end-to-end against live staging, both fixed: 1. Read schema stripped the Id primary key (write_validation system-field filter), so the NL-to-SQL model (a) invented a non-existent 'rowid' column for ORDER BY on paginated reads (FQS 400) and (b) never returned Id, leaving the agent no record_id for updates/deletes. Fix: retain the primary key for WRITABLE entities in the read schema (SELECT it, ORDER BY it); keep other system fields hidden; read-only entities unchanged. P3 collision guard for user/CSV fields sharing a system field name. Harden is_entity_writable with getattr. 2. Write executor called the CRUD endpoint with the entity NAME, but .../EntityService/entity/{id}/insert requires the GUID id ("not valid" 400). Fix: handler maps entity name -> id before executing, restores the friendly name on the result. Verified on staging (dataservicetest/DataFabricFQS): with both fixes the refund flow's insert + 3 updates all persist (read-back confirmed). The ontology compiles, activates, and correctly governs tool selection (RefundRequest insert-only; Order/Risk/Contact update; Customer read-only, never written). POC harness (scripts/): - poc_refund_setup.sh / poc_refund_teardown.sh — create+seed / delete the 5 refund entities, emit ontology + entity-set (referenceKey=GUID) + ids - poc_refund_drive.py — drive the real write handler with the ontology active, verify by read-back (deterministic; no LLM) - run_agent_with_ontology.py — full LLM-in-the-loop variant; gains --agenthub-config (LLM-gateway licensing OpCode; without it the gateway 403s) and recursion_limit - POC_README.md — env setup + the three run levels + the known agent-loop gap (create_agent does not auto-execute the terminal write batch; that is runtime plumbing, not the ontology/write tool) Tests: 740 passed. New: read-schema PK retention (writable vs read-only, other-system-fields-hidden, collision-not-duplicated, rowid-free ORDER BY) and name->id translation for CRUD. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/POC_README.md | 96 ++++++++++ scripts/poc_refund_drive.py | 172 ++++++++++++++++++ scripts/poc_refund_setup.sh | 100 ++++++++++ scripts/poc_refund_teardown.sh | 11 ++ scripts/run_agent_with_ontology.py | 22 ++- .../datafabric_prompt_builder.py | 54 +++++- .../tools/datafabric_tool/datafabric_tool.py | 21 ++- .../tools/datafabric_tool/write_validation.py | 8 +- .../test_refund_agent_integ.py | 2 + .../datafabric_tool/test_write_integration.py | 42 +++++ .../tools/test_datafabric_prompt_builder.py | 87 ++++++++- 11 files changed, 602 insertions(+), 13 deletions(-) create mode 100644 scripts/POC_README.md create mode 100644 scripts/poc_refund_drive.py create mode 100755 scripts/poc_refund_setup.sh create mode 100755 scripts/poc_refund_teardown.sh diff --git a/scripts/POC_README.md b/scripts/POC_README.md new file mode 100644 index 000000000..be94800b8 --- /dev/null +++ b/scripts/POC_README.md @@ -0,0 +1,96 @@ +# Data Fabric Ontology Write POC + +Demonstrates an OWL ontology driving native Data Fabric **writes** through the +agent tooling: the ontology compiles into a structured intermediate +representation, activates in the write handler, governs which entities/operations +are allowed, and the resulting writes persist to Data Fabric. + +Hero case: a contact-center **refund** agent over 5 entities — `Customer` +(read-only), `Contact`, `PurchaseOrder`, `CustomerRisk`, `RefundRequest`. + +## Components + +| File | Role | +|------|------| +| `src/.../datafabric_tool/ontology_compiler.py` | OWL Turtle → `CompiledOntology` (entity_access, measure/state/reference fields, HITL, relationships) | +| `src/.../datafabric_tool/compiled_ontology.py` | the IR model | +| `src/.../datafabric_tool/datafabric_tool.py` | write tool + handler; fetches+compiles ontology, maps entity name→id for CRUD | +| `src/.../datafabric_tool/write_validation.py` | writability + mutation-intent validation (ontology-constrained) | +| `src/.../datafabric_tool/datafabric_prompt_builder.py` | read schema; retains the primary key for writable entities | +| `scripts/poc_refund_setup.sh` | create + seed staging entities, emit ontology + entity-set + ids | +| `scripts/poc_refund_drive.py` | drive the real write handler with the ontology active, verify by read-back | +| `scripts/poc_refund_teardown.sh` | delete the POC entities | +| `scripts/run_agent_with_ontology.py` | full LLM-in-the-loop variant (see "Known gap") | + +## Prerequisites + +```bash +# 1. CLI auth to the target tenant (entity create/seed/verify) +uip login + +# 2. SDK env vars — the Python SDK reads these (separate from the CLI's auth). +# Source the access token from a logged-in session; do NOT hardcode it. +export UIPATH_ACCESS_TOKEN="$(python3 -c "import json,os;print(json.load(open(os.path.expanduser('~/.uipath/.auth.json')))['access_token'])")" +export UIPATH_URL="https:////" +export UIPATH_ORGANIZATION_ID="" +export UIPATH_TENANT_ID="" +``` + +The access token is short-lived (~1h); re-export after re-login. + +## Run + +### A. Ontology compiles + activates (offline, no staging) + +```bash +uv run python scripts/run_agent_with_ontology.py \ + --ontology ../../../df-agent-os/roadmap/p1-owl-write-extension.ttl \ + --entity-set scripts/sample_refund_entity_set.json \ + --prompt x --dry-run +``` + +Prints the extracted ontology facts without any network call. + +### B. Ontology governs writes that persist (live staging) — the working POC + +```bash +bash scripts/poc_refund_setup.sh ./poc_out # create + seed +uv run python scripts/poc_refund_drive.py ./poc_out # drive writes, verify +bash scripts/poc_refund_teardown.sh ./poc_out # clean up +``` + +`poc_refund_drive.py` prints `ontology ACTIVE`, runs insert RefundRequest + +update Order/CustomerRisk/Contact through the real handler, and verifies all +four mutations by read-back. + +### C. Full LLM agent picks the tools (live) + +```bash +set -a; source ./poc_out/refund_ids.env; set +a +uv run python scripts/run_agent_with_ontology.py \ + --ontology ./poc_out/refund_ontology.ttl \ + --entity-set ./poc_out/refund_entity_set.json \ + --system-prompt scripts/sample_refund_sop.txt \ + --model gpt-4.1-2025-04-14 --agenthub-config agentsplayground \ + --prompt "Process the refund for contact ${CONTACT_ID}. Order id ${ORDER_ID}, CustomerRisk id ${RISK_ID}, Customer id ${CUSTOMER_ID}. ..." +``` + +The LLM reads, decides, and emits ontology-correct write calls (insert on +RefundRequest, update on the writable entities, never on read-only Customer). + +## Known gap + +In path **C**, the standalone `create_agent` harness terminates on control-flow +tools and the gateway returns tool calls in the OpenAI Responses format; the +terminal write batch is *planned* but not auto-executed by this harness. That is +agent-runtime plumbing, not the ontology or the write tool — path **B** confirms +the writes themselves land. The production `uipath_agents` runtime drives the +tool-execution loop and is the place to validate path C end-to-end. + +## Notes + +- Status fields are plain STRING (the `choice-set-values` endpoint was + unreliable on staging); the ontology still models `OrderStatus` as a + `StateField`. Swap to ChoiceSet fields when the endpoint is stable. +- The seeded entities are not FK-linked (simplified scenario); pass the record + ids explicitly in path C rather than relying on relationship discovery. diff --git a/scripts/poc_refund_drive.py b/scripts/poc_refund_drive.py new file mode 100644 index 000000000..23ac08c57 --- /dev/null +++ b/scripts/poc_refund_drive.py @@ -0,0 +1,172 @@ +"""Ontology Write POC — drive the real write tool against staging. + +Loads the POC artifacts produced by ``poc_refund_setup.sh``, injects the +ontology via the same bridge the CLI uses (monkeypatching the not-yet-shipped +``EntitiesService.get_ontology_file_async``), builds the real Data Fabric +tools, then runs the refund flow by invoking the write tool handler directly — +the exact callable an agent's tool node calls. Proves: ontology compile + +inject -> write validation -> EntitiesService CRUD -> records persisted -> +verified by read-back. + +This is the deterministic counterpart to ``run_agent_with_ontology.py`` (which +puts an LLM in the loop). Use this to confirm the writes actually land. + +Prereq: UIPATH_* env vars set (see scripts/POC_README.md), and + ``poc_refund_setup.sh`` already run. +Usage: uv run python scripts/poc_refund_drive.py [OUT_DIR] (default: ./poc_out) +""" + +from __future__ import annotations + +import asyncio +import json +import sys +from pathlib import Path + +import uipath.platform.entities._entities_service as es_mod + +OUT = Path(sys.argv[1] if len(sys.argv) > 1 else "./poc_out") +ONTOLOGY_TTL = (OUT / "refund_ontology.ttl").read_text() + + +async def _get_ontology_file_async(self, file_type, *args, **kwargs): # noqa: ANN001 + """Bridge: return the POC ontology for the 'owl' file type. + + Stands in for the platform method that has not shipped yet, so the + handler's own _maybe_compile_ontology picks it up naturally. + """ + return ONTOLOGY_TTL if file_type == "owl" else None + + +es_mod.EntitiesService.get_ontology_file_async = _get_ontology_file_async + +from uipath.agent.models.agent import ( # noqa: E402 + AgentContextResourceConfig, + AgentContextType, +) +from uipath.platform.entities import DataFabricEntityItem # noqa: E402 + +from uipath_langchain.agent.tools.datafabric_tool import ( # noqa: E402 + create_datafabric_tools, +) + + +def _load_ids() -> dict[str, str]: + ids: dict[str, str] = {} + for line in (OUT / "refund_ids.env").read_text().splitlines(): + line = line.strip() + if line and not line.startswith("#") and "=" in line: + k, v = line.split("=", 1) + ids[k] = v + return ids + + +class _NoLLM: + """Stub LLM — only needed to build the read tool; never invoked here.""" + + def bind_tools(self, *args, **kwargs): # noqa: ANN001, ANN002, ANN003 + return self + + +async def main() -> int: + ids = _load_ids() + items = [ + DataFabricEntityItem.model_validate(d) + for d in json.loads((OUT / "refund_entity_set.json").read_text()) + ] + by_suffix = {it.name.rsplit("_", 1)[1]: it.name for it in items} + + resource = AgentContextResourceConfig( + name="refund_context", + description="Refund processing context", + context_type=AgentContextType.DATA_FABRIC_ENTITY_SET, + entity_set=items, + ) + tools = create_datafabric_tools(resource, _NoLLM()) # type: ignore[arg-type] + handler = next(t for t in tools if t.name.endswith("_write")).coroutine + + await handler._ensure_initialized() + onto = handler._compiled_ontology + print("=== ONTOLOGY STATUS ===") + if onto and not onto.is_empty(): + print( + " ACTIVE — entity_access:", + {k: sorted(v) for k, v in onto.entity_access.items()}, + ) + else: + print(" INACTIVE (metadata-only)") + + # SOP decision is fixed for the seeded scenario (Delivered, score 2, $200). + amt, score, ltv = 200.0, 2, 5000.0 + print( + f"\nDECIDE: order Delivered, risk {score} < 3, amount {amt} <= 500 -> APPROVE\n" + ) + + async def write(**kw): # noqa: ANN003 + out = json.loads(await handler(**kw)) + print( + f" {kw['operation']:6} {kw['entity_key'].rsplit('_', 1)[1]:14} -> success={out['success']}" + ) + return out + + print("=== WRITES (real handler, ontology validating) ===") + await write( + entity_key=by_suffix["RefundRequest"], + operation="insert", + fields={ + "ApprovedAmount": amt, + "Reason": "Auto-approved: low risk", + "OrderRef": ids["ORDER_ID"], + "CustomerRef": ids["CUSTOMER_ID"], + "RefundStatus": "Pending", + }, + ) + await write( + entity_key=by_suffix["PurchaseOrder"], + operation="update", + record_id=ids["ORDER_ID"], + fields={"OrderStatus": "Returned"}, + ) + await write( + entity_key=by_suffix["CustomerRisk"], + operation="update", + record_id=ids["RISK_ID"], + fields={"RiskScore": score + 1, "LifetimeValue": ltv - amt}, + ) + await write( + entity_key=by_suffix["Contact"], + operation="update", + record_id=ids["CONTACT_ID"], + fields={"Resolution": "Approved"}, + ) + + # Verify by read-back through the resolved service. + from uipath.platform import UiPath + + svc = (await UiPath().entities.resolve_entity_set_async(items)).entities_service + + def g(rec, k): # noqa: ANN001 + return rec.get(k) if isinstance(rec, dict) else getattr(rec, k, None) + + order = await svc.get_record_async(by_suffix["PurchaseOrder"], ids["ORDER_ID"]) + risk = await svc.get_record_async(by_suffix["CustomerRisk"], ids["RISK_ID"]) + contact = await svc.get_record_async(by_suffix["Contact"], ids["CONTACT_ID"]) + + print("\n=== VERIFY (read-back) ===") + checks = [ + ("Order.OrderStatus", g(order, "OrderStatus"), "Returned"), + ("Risk.RiskScore", int(g(risk, "RiskScore")), 3), + ("Risk.LifetimeValue", float(g(risk, "LifetimeValue")), 4800.0), + ("Contact.Resolution", g(contact, "Resolution"), "Approved"), + ] + ok = 0 + for label, actual, expected in checks: + good = actual == expected + ok += good + print(f" {'OK ' if good else 'XX '} {label} = {actual} (expected {expected})") + print(f"\n=== {ok}/{len(checks)} verified ===") + return 0 if ok == len(checks) else 1 + + +if __name__ == "__main__": + raise SystemExit(asyncio.run(main())) diff --git a/scripts/poc_refund_setup.sh b/scripts/poc_refund_setup.sh new file mode 100755 index 000000000..22d29f9f3 --- /dev/null +++ b/scripts/poc_refund_setup.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# ============================================================================= +# Ontology Write POC — staging setup +# +# Creates the contact-center refund entities on the logged-in Data Fabric +# tenant, seeds one refund scenario, and emits three artifacts into OUT: +# - refund_entity_set.json (DataFabricEntityItem list; referenceKey = GUID) +# - refund_ontology.ttl (OWL with df:entityKey matching the real names) +# - refund_ids.env (seeded record ids + entity ids for teardown) +# +# Status fields are plain STRING (the choice-set-values endpoint is currently +# unreliable on staging); the ontology still models OrderStatus as a StateField. +# +# Prereq: `uip login` to the target tenant. +# Usage: bash scripts/poc_refund_setup.sh [OUT_DIR] (default: ./poc_out) +# Teardown: bash scripts/poc_refund_teardown.sh [OUT_DIR] +# ============================================================================= +set -eo pipefail +OUT="${1:-./poc_out}" +mkdir -p "$OUT" +P="Rfnd$(date +%H%M%S)" +idof() { python3 -c "import json,sys; print(json.load(sys.stdin)['Data']['Id'])"; } + +echo "=== Entities (prefix $P) ===" +CUST_E=$(uip df entities create "${P}_Customer" --body "{\"displayName\":\"${P} Customer\",\"fields\":[{\"fieldName\":\"CustomerName\",\"type\":\"STRING\",\"isRequired\":true,\"lengthLimit\":200},{\"fieldName\":\"AccountTier\",\"type\":\"STRING\",\"lengthLimit\":50}]}" --output json 2>/dev/null | idof) +ORD_E=$(uip df entities create "${P}_PurchaseOrder" --body "{\"displayName\":\"${P} Order\",\"fields\":[{\"fieldName\":\"OrderNumber\",\"type\":\"STRING\",\"isRequired\":true,\"lengthLimit\":50},{\"fieldName\":\"TotalAmount\",\"type\":\"DECIMAL\",\"decimalPrecision\":2},{\"fieldName\":\"OrderStatus\",\"type\":\"STRING\",\"lengthLimit\":50}]}" --output json 2>/dev/null | idof) +RISK_E=$(uip df entities create "${P}_CustomerRisk" --body "{\"displayName\":\"${P} Risk\",\"fields\":[{\"fieldName\":\"RiskScore\",\"type\":\"INTEGER\"},{\"fieldName\":\"LifetimeValue\",\"type\":\"DECIMAL\",\"decimalPrecision\":2}]}" --output json 2>/dev/null | idof) +CONT_E=$(uip df entities create "${P}_Contact" --body "{\"displayName\":\"${P} Contact\",\"fields\":[{\"fieldName\":\"ContactReason\",\"type\":\"STRING\",\"lengthLimit\":50},{\"fieldName\":\"RefundAmount\",\"type\":\"DECIMAL\",\"decimalPrecision\":2},{\"fieldName\":\"OrderRef\",\"type\":\"STRING\",\"lengthLimit\":100},{\"fieldName\":\"Resolution\",\"type\":\"STRING\",\"lengthLimit\":50}]}" --output json 2>/dev/null | idof) +RFND_E=$(uip df entities create "${P}_RefundRequest" --body "{\"displayName\":\"${P} Refund\",\"fields\":[{\"fieldName\":\"ApprovedAmount\",\"type\":\"DECIMAL\",\"decimalPrecision\":2,\"isRequired\":true},{\"fieldName\":\"Reason\",\"type\":\"STRING\",\"isRequired\":true,\"lengthLimit\":500},{\"fieldName\":\"OrderRef\",\"type\":\"STRING\",\"lengthLimit\":100},{\"fieldName\":\"CustomerRef\",\"type\":\"STRING\",\"lengthLimit\":100},{\"fieldName\":\"RefundStatus\",\"type\":\"STRING\",\"lengthLimit\":50}]}" --output json 2>/dev/null | idof) + +echo "=== Seed records ===" +CUST_R=$(uip df records insert "$CUST_E" --body '{"CustomerName":"Sarah Chen","AccountTier":"Gold"}' --output json 2>/dev/null | idof) +ORD_R=$(uip df records insert "$ORD_E" --body '{"OrderNumber":"ORD001","TotalAmount":200.00,"OrderStatus":"Delivered"}' --output json 2>/dev/null | idof) +RISK_R=$(uip df records insert "$RISK_E" --body '{"RiskScore":2,"LifetimeValue":5000.00}' --output json 2>/dev/null | idof) +CONT_R=$(uip df records insert "$CONT_E" --body "{\"ContactReason\":\"Refund\",\"RefundAmount\":200.00,\"OrderRef\":\"$ORD_R\",\"Resolution\":\"Open\"}" --output json 2>/dev/null | idof) + +F="00000000-0000-0000-0000-000000000000" +# referenceKey = entity GUID — resolve_entity_set_async looks up by this value, +# and the CRUD endpoints require the id, not the entity name. +cat > "$OUT/refund_entity_set.json" < "$OUT/refund_ontology.ttl" < . +@prefix ex: . +@prefix owl: . +@prefix rdfs: . + +ex:Customer a owl:Class ; rdfs:subClassOf df:ReadableEntity ; df:entityKey "${P}_Customer" . +ex:Contact a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "${P}_Contact" ; df:allowsOperation "update" . +ex:Order a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "${P}_PurchaseOrder" ; df:allowsOperation "update" . +ex:Risk a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "${P}_CustomerRisk" ; df:allowsOperation "update" . +ex:Refund a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "${P}_RefundRequest" ; df:allowsOperation "insert" . + +ex:field_Order_Status a df:StateField ; df:fieldKey "OrderStatus" ; df:choiceSetKey "OrderStatusValues" . +ex:field_Risk_Score a df:MeasureField ; df:fieldKey "RiskScore" ; df:measureSemantics "additive" . +ex:field_Risk_LTV a df:MeasureField ; df:fieldKey "LifetimeValue" ; df:measureSemantics "additive" . +ex:field_Refund_Order a df:ReferenceField ; df:fieldKey "OrderRef" ; df:referencesEntity ex:Order . +ex:field_Refund_Customer a df:ReferenceField ; df:fieldKey "CustomerRef" ; df:referencesEntity ex:Customer . + +ex:Order df:hasField ex:field_Order_Status . +ex:Risk df:hasField ex:field_Risk_Score , ex:field_Risk_LTV . +ex:Refund df:hasField ex:field_Refund_Order , ex:field_Refund_Customer . + +ex:CreateRefund a df:InsertAction ; df:writeOperation "insert" ; df:targetEntity ex:Refund ; df:requiresHITL false . +ex:UpdateOrder a df:UpdateAction ; df:writeOperation "update" ; df:targetEntity ex:Order ; df:requiresHITL false . +ex:UpdateRisk a df:UpdateAction ; df:writeOperation "update" ; df:targetEntity ex:Risk ; df:requiresHITL false . +ex:UpdateContact a df:UpdateAction ; df:writeOperation "update" ; df:targetEntity ex:Contact ; df:requiresHITL false . + +ex:Refund df:hasAction ex:CreateRefund . +ex:Order df:hasAction ex:UpdateOrder . +ex:Risk df:hasAction ex:UpdateRisk . +ex:Contact df:hasAction ex:UpdateContact . + +ex:Contact df:relatedEntity ex:Customer , ex:Order . +ex:Refund df:relatedEntity ex:Order , ex:Customer . +ex:Risk df:relatedEntity ex:Customer . +TTL + +cat > "$OUT/refund_ids.env" </dev/null \ + | python3 -c "import json,sys;print(json.load(sys.stdin).get('Code','?'))" 2>/dev/null || echo "skip $e" +done +echo "teardown done" diff --git a/scripts/run_agent_with_ontology.py b/scripts/run_agent_with_ontology.py index 088a4feef..c04043f9a 100644 --- a/scripts/run_agent_with_ontology.py +++ b/scripts/run_agent_with_ontology.py @@ -245,7 +245,12 @@ async def _async_main(args: argparse.Namespace) -> int: from uipath_langchain.chat import get_chat_model try: - llm = get_chat_model(args.model) + # agenthub_config carries the AgentHub OpCode that routes LLM + # licensing on the gateway. Without it the call defaults to an + # unlicensed product path and the gateway returns 403 "License + # not available for LLM usage". "agentsplayground" uses the + # developer's debug/playground quota — appropriate for a local run. + llm = get_chat_model(args.model, agenthub_config=args.agenthub_config) except Exception as exc: raise _AuthOrNetworkError( f"could not construct chat model {args.model!r}: {exc}" @@ -335,7 +340,13 @@ async def _async_main(args: argparse.Namespace) -> int: try: graph = create_agent(llm, tools, messages).compile() - result = await graph.ainvoke({"messages": [HumanMessage(content=args.prompt)]}) + # The refund flow needs many steps (several reads, a decision, then 4 + # writes). The default recursion_limit (25) can be exhausted before the + # terminal write batch executes, leaving the writes only *planned*. + result = await graph.ainvoke( + {"messages": [HumanMessage(content=args.prompt)]}, + config={"recursion_limit": 80}, + ) except Exception as exc: print( "\n--- Agent run failed ---\n" @@ -385,6 +396,13 @@ def _build_arg_parser() -> argparse.ArgumentParser: default=DEFAULT_MODEL, help=f"UiPath-gateway model name (default: {DEFAULT_MODEL}).", ) + parser.add_argument( + "--agenthub-config", + default="agentsplayground", + help="AgentHub OpCode for LLM-gateway licensing routing " + "(default: agentsplayground — uses the developer's playground quota). " + "Without a valid value the gateway returns 403 'License not available'.", + ) parser.add_argument( "--system-prompt", default=None, diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py index a497e0f3f..066f2bc54 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py @@ -34,10 +34,29 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: field_schemas: list[FieldSchema] = [] numeric_field: str | None = None text_field: str | None = None + pk_field: str | None = None + + # System fields (CreateTime, UpdatedBy, ...) are analytical noise and stay + # hidden. The one exception is the primary key on a *writable* entity: the + # agent needs it to retrieve record ids (the record_id for update/delete) + # and as a stable column to ORDER BY. Read-only entities keep all system + # fields hidden. + writable = is_entity_writable(entity) + seen_names: set[str] = set() for field in entity.fields or []: - if field.is_hidden_field or field.is_system_field: + is_pk = bool(getattr(field, "is_primary_key", False)) + if field.is_hidden_field: + continue + if field.is_system_field and not (writable and is_pk): + continue + # P3 collision guard: when a user/CSV field shares a system field's + # name (e.g. an imported "Id"), keep the first occurrence rather than + # emitting a duplicate column row. Full disambiguation is P3 work. + if field.name in seen_names: continue + seen_names.add(field.name) + type_name = field.sql_type.name if field.sql_type else "unknown" fs = FieldSchema( name=field.name, @@ -51,6 +70,8 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: ) field_schemas.append(fs) + if is_pk and writable and pk_field is None: + pk_field = fs.name if not numeric_field and fs.is_numeric: numeric_field = fs.name if not text_field and fs.is_text: @@ -62,13 +83,28 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: group_field = text_field or (field_names[0] if field_names else "Category") agg_field = numeric_field or (field_names[1] if len(field_names) > 1 else "Amount") filter_field = text_field or (field_names[0] if field_names else "Name") - fields_sample = ", ".join(field_names[:5]) if field_names else "*" - count_col = field_names[0] if field_names else "id" + count_col = pk_field or (field_names[0] if field_names else "id") + + # Put the primary key first in the projection so record-level reads return + # the id the agent reuses as record_id when writing. + if pk_field: + ordered = [pk_field] + [n for n in field_names if n != pk_field] + else: + ordered = field_names + fields_sample = ", ".join(ordered[:5]) if ordered else "*" + # A stable sort column for paginated reads. Prefer the primary key; this + # prevents the SQL-gen model from falling back to a non-existent pseudo + # column (e.g. "rowid") when a query needs ORDER BY + LIMIT. + stable_sort = pk_field or (field_names[0] if field_names else None) query_patterns = [ QueryPattern( intent="Show all", - sql=f"SELECT {fields_sample} FROM {table} LIMIT 100", + sql=( + f"SELECT {fields_sample} FROM {table} ORDER BY {stable_sort} LIMIT 100" + if stable_sort + else f"SELECT {fields_sample} FROM {table} LIMIT 100" + ), ), QueryPattern( intent="Find by X", @@ -91,6 +127,16 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: sql=f"SELECT SUM({agg_field}) as total FROM {table}", ), ] + # For writable entities, give the model an explicit record-lookup pattern so + # it knows how to fetch a single row's id before an update/delete. + if pk_field: + query_patterns.insert( + 1, + QueryPattern( + intent="Get a record's id to update/delete it", + sql=f"SELECT {fields_sample} FROM {table} WHERE {filter_field} = 'value' LIMIT 1", + ), + ) schema = EntitySchema( id=entity.id, diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py index 324e861e1..bf367f2b0 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py @@ -168,6 +168,7 @@ def __init__( ) -> None: self._entity_set = entity_set self._write_schemas: dict[str, EntityWriteSchema] | None = None + self._entity_id_by_key: dict[str, str] = {} self._write_tool_description: str | None = None self._compiled_ontology: CompiledOntology | None = None self._executor: Any | None = None @@ -195,6 +196,11 @@ async def _ensure_initialized(self) -> None: ) self._write_schemas = {} + # The LLM addresses entities by name (matching the read schema and + # the write tool description), but the EntitiesService CRUD endpoints + # require the entity's GUID id. Keep a name -> id map to translate at + # execution time. + self._entity_id_by_key = {} for entity in resolution.entities: if not is_entity_writable(entity): continue @@ -204,6 +210,8 @@ async def _ensure_initialized(self) -> None: display_name=entity.display_name or entity.name, writable_fields=writable, ) + if entity.id: + self._entity_id_by_key[entity.name] = entity.id # Optional ontology layer: fetch + compile the OWL ontology if the # platform exposes get_ontology_file_async. This method may only @@ -307,11 +315,20 @@ async def __call__( } ) - # Execute + # Execute. The LLM addresses the entity by name, but the CRUD endpoints + # require the entity's GUID id — translate before executing, then + # restore the friendly name on the result for the model. from .write_executor import WriteExecutor assert isinstance(self._executor, WriteExecutor) - result = await self._executor.execute(intent) + resolved_key = self._entity_id_by_key.get(intent.entity_key, intent.entity_key) + exec_intent = ( + intent.model_copy(update={"entity_key": resolved_key}) + if resolved_key != intent.entity_key + else intent + ) + result = await self._executor.execute(exec_intent) + result.entity_key = intent.entity_key return result.model_dump_json() diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py b/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py index 67a2d64c2..5ce4e4e25 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py @@ -26,11 +26,13 @@ def is_entity_writable(entity: Entity) -> bool: (with external_fields), ChoiceSets, SystemEntities, and InternalEntities are not writable through this path. """ - # Only "Entity" type is writable (not ChoiceSet, SystemEntity, InternalEntity) - if entity.entity_type != "Entity": + # Only "Entity" type is writable (not ChoiceSet, SystemEntity, InternalEntity). + # Use getattr so partial/edge entity objects degrade to "not writable" + # (the safe default) instead of raising. + if getattr(entity, "entity_type", None) != "Entity": return False # Federated entities have external_fields — writes go to source system, not DF - if entity.external_fields: + if getattr(entity, "external_fields", None): return False return True diff --git a/tests/agent/tools/datafabric_tool/test_refund_agent_integ.py b/tests/agent/tools/datafabric_tool/test_refund_agent_integ.py index 8eae25e27..6b64eb152 100644 --- a/tests/agent/tools/datafabric_tool/test_refund_agent_integ.py +++ b/tests/agent/tools/datafabric_tool/test_refund_agent_integ.py @@ -68,6 +68,8 @@ def _make_entity( """Create a mock Entity object.""" entity = MagicMock() entity.name = name + # id == name here so the handler's name->id translation is an identity. + entity.id = name entity.display_name = display_name or name entity.fields = fields entity.entity_type = entity_type diff --git a/tests/agent/tools/datafabric_tool/test_write_integration.py b/tests/agent/tools/datafabric_tool/test_write_integration.py index 495016b1a..5bdc18eb4 100644 --- a/tests/agent/tools/datafabric_tool/test_write_integration.py +++ b/tests/agent/tools/datafabric_tool/test_write_integration.py @@ -64,6 +64,9 @@ def _make_entity( """Create a mock Entity object with .name, .display_name, .fields.""" entity = MagicMock() entity.name = name + # In these tests the entity id equals the name so the handler's name->id + # translation is an identity (a distinct-id case is covered separately). + entity.id = name entity.display_name = name entity.fields = fields entity.entity_type = entity_type @@ -311,6 +314,45 @@ async def test_update_with_record_id_and_fields_calls_api( "Orders", "rec-1", {"Amount": 150} ) + @pytest.mark.asyncio + async def test_entity_name_translated_to_id_for_crud( + self, mock_record: MagicMock + ) -> None: + """The LLM addresses the entity by name, but the CRUD call must use the + entity's GUID id. The handler translates name -> id before executing.""" + orders = _make_entity("Orders", [_make_field("OrderName")]) + orders.id = "orders-guid-123" # id distinct from the name + resolution = _make_resolution([orders]) + resolution.entities_service.insert_record_async = AsyncMock( + return_value=mock_record + ) + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) + result = json.loads( + await tools[1].ainvoke( + { + "entity_key": "Orders", # name, as the LLM sees it + "operation": "insert", + "fields": {"OrderName": "X"}, + } + ) + ) + + assert result["success"] is True + # Executor called with the GUID id, not the entity name. + resolution.entities_service.insert_record_async.assert_awaited_once_with( + "orders-guid-123", {"OrderName": "X"} + ) + # The result still reports the friendly name back to the model. + assert result["entity_key"] == "Orders" + @pytest.mark.asyncio async def test_delete_with_record_id_calls_api( self, mock_entities: list[MagicMock] diff --git a/tests/agent/tools/test_datafabric_prompt_builder.py b/tests/agent/tools/test_datafabric_prompt_builder.py index 47034b144..8483f89d6 100644 --- a/tests/agent/tools/test_datafabric_prompt_builder.py +++ b/tests/agent/tools/test_datafabric_prompt_builder.py @@ -1,10 +1,13 @@ from types import SimpleNamespace -from uipath_langchain.agent.tools.datafabric_tool.datafabric_prompt_builder import build +from uipath_langchain.agent.tools.datafabric_tool.datafabric_prompt_builder import ( + build, + build_entity_context, +) def _fake_field(**overrides): - return SimpleNamespace( + defaults = dict( name="status", display_name="Status", sql_type=SimpleNamespace(name="varchar"), @@ -19,11 +22,29 @@ def _fake_field(**overrides): is_unique=False, is_hidden_field=False, is_system_field=False, + is_primary_key=False, + ) + defaults.update(overrides) + return SimpleNamespace(**defaults) + + +def _pk_field(**overrides): + """A system-managed primary key field (e.g. the platform ``Id``).""" + return _fake_field( + name="Id", + display_name="Id", + sql_type=SimpleNamespace(name="uniqueidentifier"), + description="Record id", + is_system_field=True, + is_primary_key=True, + is_unique=True, **overrides, ) def _fake_entity(*fields, **overrides): + overrides.setdefault("entity_type", "Entity") + overrides.setdefault("external_fields", None) return SimpleNamespace( id="entity-1", name="Ticket", @@ -57,3 +78,65 @@ def test_build_includes_domain_guidance_in_rendered_prompt(): assert "## Domain Guidance" in prompt assert "Use business-friendly ticket language." in prompt + + +def _field_names(ctx): + return [f.name for f in ctx.entity_schema.fields] + + +def test_writable_entity_retains_primary_key(): + """Writable entity surfaces the system primary key so the agent can fetch + record ids (record_id for writes) — clearing the rowid/record_id gap.""" + ctx = build_entity_context(_fake_entity(_pk_field(), _fake_field())) + names = _field_names(ctx) + assert "Id" in names, "primary key must be surfaced for writable entities" + # Id is first in every projection so record-level reads return it. + show_all = next(p for p in ctx.query_patterns if p.intent == "Show all") + assert "SELECT Id" in show_all.sql + # A real column to ORDER BY — never the non-existent 'rowid' pseudo-column. + assert "ORDER BY Id" in show_all.sql + assert "rowid" not in show_all.sql + # An explicit record-lookup pattern is offered for writes. + assert any("update/delete" in p.intent for p in ctx.query_patterns) + + +def test_readonly_entity_excludes_system_primary_key(): + """Federated/read-only entities keep all system fields hidden — no write + means no need for identity-for-mutation.""" + ctx = build_entity_context( + _fake_entity(_pk_field(), _fake_field(), external_fields=[object()]) + ) + names = _field_names(ctx) + assert "Id" not in names + assert "status" in names + # No record-lookup pattern for a non-writable entity. + assert not any("update/delete" in p.intent for p in ctx.query_patterns) + + +def test_other_system_fields_stay_hidden_on_writable_entity(): + """Only the primary key is surfaced; other system fields remain noise.""" + ctx = build_entity_context( + _fake_entity( + _pk_field(), + _fake_field(name="CreateTime", is_system_field=True, is_primary_key=False), + _fake_field(), + ) + ) + names = _field_names(ctx) + assert "Id" in names + assert "CreateTime" not in names + assert "status" in names + + +def test_system_pk_name_collision_with_user_field_not_duplicated(): + """P3 collision guard: a user/CSV field sharing the system PK name must not + produce a duplicate column row.""" + ctx = build_entity_context( + _fake_entity( + _fake_field(name="Id", is_system_field=False, is_primary_key=False), + _pk_field(), # system Id with the same name + _fake_field(), + ) + ) + names = _field_names(ctx) + assert names.count("Id") == 1 From 90dffe6f51a4690c354250adeac91449bd24bf85 Mon Sep 17 00:00:00 2001 From: Harshit Rohatgi Date: Mon, 29 Jun 2026 08:18:58 +0530 Subject: [PATCH 5/6] feat(datafabric ontology): first-class read-only entities, enforcement, debug, read-flow wiring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the ontology layer per review feedback: 1. df:ReadableEntity is now first-class. CompiledOntology gains `known_entities` (every df:entityKey the ontology declares) plus is_known / is_writable / is_read_only helpers. Previously a read-only entity was indistinguishable from one the ontology never mentioned — both were merely absent from entity_access. 2. Read-only is enforced, not advisory. validate_mutation_intent rejects a write to an entity the ontology knows but grants no write ops; the write handler prunes such entities from write_schemas so they never appear in the write tool description. (Verified: Customer is excluded and a direct update is rejected.) 3. Debug output. CompiledOntology.to_human_readable() + module-level format_ontology_debug(owl, compiled) render the raw OWL Turtle and a human-readable IR (entities + access modes, measure/state/reference field semantics, relationships). Logged at DEBUG in the fetch/compile path and printed by both POC scripts during a run. 4. Ontology wired into the READ flow (reads still go through the existing NL-to-SQL path — ontology enriches, does not restrict). Shared maybe_fetch_and_compile_ontology helper used by both handlers; the read handler threads CompiledOntology into DataFabricGraph.create -> datafabric_prompt_builder, which emits an "## Ontology Context" section (access modes, relationships, FK/reference targets, state-value sources) for schema linking (P5). Also: poc_refund_drive.py verification read-back now addresses entities by GUID (get_record_async requires the id, not the name). Validated live on staging (dataservicetest/addyTest): debug IR shows Customer READ-ONLY; Customer pruned from writes + write rejected; refund flow insert + 3 updates persist, 4/4 verified. 752 tests pass, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitignore | 1 + scripts/poc_refund_drive.py | 16 +++- scripts/run_agent_with_ontology.py | 36 ++++++++ .../datafabric_tool/compiled_ontology.py | 84 ++++++++++++++++++- .../datafabric_prompt_builder.py | 78 +++++++++++++++++ .../datafabric_tool/datafabric_subgraph.py | 16 +++- .../tools/datafabric_tool/datafabric_tool.py | 57 +++++-------- .../agent/tools/datafabric_tool/models.py | 1 + .../datafabric_tool/ontology_compiler.py | 72 +++++++++++++++- .../tools/datafabric_tool/write_validation.py | 26 ++++-- .../datafabric_tool/test_ontology_compiler.py | 59 +++++++++++++ .../datafabric_tool/test_write_integration.py | 83 ++++++++++++++++++ .../datafabric_tool/test_write_validation.py | 34 ++++++++ .../tools/test_datafabric_prompt_builder.py | 44 ++++++++++ 14 files changed, 558 insertions(+), 49 deletions(-) diff --git a/.gitignore b/.gitignore index 6d92a6dbb..a1449137b 100644 --- a/.gitignore +++ b/.gitignore @@ -195,3 +195,4 @@ samples/**/entry-points.json samples/**/uv.lock testcases/**/uv.lock +poc_out/ diff --git a/scripts/poc_refund_drive.py b/scripts/poc_refund_drive.py index 23ac08c57..44f5407ce 100644 --- a/scripts/poc_refund_drive.py +++ b/scripts/poc_refund_drive.py @@ -93,6 +93,13 @@ async def main() -> int: " ACTIVE — entity_access:", {k: sorted(v) for k, v in onto.entity_access.items()}, ) + # Always dump the raw OWL + human-readable IR for the POC. + from uipath_langchain.agent.tools.datafabric_tool.ontology_compiler import ( + format_ontology_debug, + ) + + print() + print(format_ontology_debug(ONTOLOGY_TTL, onto)) else: print(" INACTIVE (metadata-only)") @@ -148,9 +155,12 @@ async def write(**kw): # noqa: ANN003 def g(rec, k): # noqa: ANN001 return rec.get(k) if isinstance(rec, dict) else getattr(rec, k, None) - order = await svc.get_record_async(by_suffix["PurchaseOrder"], ids["ORDER_ID"]) - risk = await svc.get_record_async(by_suffix["CustomerRisk"], ids["RISK_ID"]) - contact = await svc.get_record_async(by_suffix["Contact"], ids["CONTACT_ID"]) + # get_record_async addresses the entity by its GUID id, not its name + # (same rule the write executor follows). + id_by_suffix = {it.name.rsplit("_", 1)[1]: it.id for it in items} + order = await svc.get_record_async(id_by_suffix["PurchaseOrder"], ids["ORDER_ID"]) + risk = await svc.get_record_async(id_by_suffix["CustomerRisk"], ids["RISK_ID"]) + contact = await svc.get_record_async(id_by_suffix["Contact"], ids["CONTACT_ID"]) print("\n=== VERIFY (read-back) ===") checks = [ diff --git a/scripts/run_agent_with_ontology.py b/scripts/run_agent_with_ontology.py index c04043f9a..fd5162c31 100644 --- a/scripts/run_agent_with_ontology.py +++ b/scripts/run_agent_with_ontology.py @@ -141,6 +141,30 @@ def _fmt_set_map(m: dict[str, Any]) -> str: print(_fmt_set_map(compiled.entity_relationships)) +def _print_ontology_debug( + owl_turtle: str, compiled: Any, *, debug_ontology: bool +) -> None: + """Print the human-readable IR and, when enabled, the raw OWL Turtle. + + Uses ``ontology_compiler.format_ontology_debug`` so the CLI output mirrors + exactly what the runtime emits to debug logs. + """ + if compiled is None: + return + from uipath_langchain.agent.tools.datafabric_tool.ontology_compiler import ( + format_ontology_debug, + ) + + if debug_ontology: + # Full block: raw OWL + human-readable IR. + print() + print(format_ontology_debug(owl_turtle, compiled)) + else: + # Human-readable IR only (no raw-OWL dump). + print("\n=== COMPILED ONTOLOGY (human-readable IR) ===") + print(compiled.to_human_readable()) + + def _load_entity_set(path: Path) -> list[Any]: """Load a JSON list of DataFabricEntityItem dicts into model instances.""" import json @@ -201,6 +225,10 @@ async def _async_main(args: argparse.Namespace) -> int: _print_ontology_facts( standalone_compiled, header="ONTOLOGY FACTS (standalone compile)" ) + # Richer human-readable IR (and, when --debug-ontology, the raw OWL too). + _print_ontology_debug( + ttl_text, standalone_compiled, debug_ontology=args.debug_ontology + ) # --- Step 2: load entity set. -------------------------------------------- try: @@ -309,6 +337,7 @@ async def _async_main(args: argparse.Namespace) -> int: ): print("\nontology ACTIVE -- handler compiled and is using the ontology.") _print_ontology_facts(compiled, header="ONTOLOGY FACTS (active in handler)") + _print_ontology_debug(ttl_text, compiled, debug_ontology=args.debug_ontology) else: print( "\nontology INACTIVE (fell back to metadata-only). The handler did " @@ -414,6 +443,13 @@ def _build_arg_parser() -> argparse.ArgumentParser: default="datafabric", help="Name for the Data Fabric context resource (default: datafabric).", ) + parser.add_argument( + "--debug-ontology", + action=argparse.BooleanOptionalAction, + default=True, + help="Dump the raw OWL Turtle alongside the human-readable compiled IR " + "(default: on). Use --no-debug-ontology to print only the IR.", + ) parser.add_argument( "--dry-run", action="store_true", diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/compiled_ontology.py b/src/uipath_langchain/agent/tools/datafabric_tool/compiled_ontology.py index 0cf0b596f..20a94622e 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/compiled_ontology.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/compiled_ontology.py @@ -32,6 +32,11 @@ class CompiledOntology(BaseModel): valid (if sparse) ``CompiledOntology`` rather than raising. """ + known_entities: set[str] = Field(default_factory=set) + """Every entity_key the ontology declares (has ``df:entityKey``), readable + or writable. A read-only ``df:ReadableEntity`` is "known but not in + ``entity_access``"; an entity absent here is unknown to the ontology.""" + entity_access: dict[str, set[str]] = Field(default_factory=dict) """entity_key -> set of allowed operations, e.g. ``{"insert", "update"}``.""" @@ -53,10 +58,87 @@ class CompiledOntology(BaseModel): def is_empty(self) -> bool: """True if no ontology facts were extracted (graceful-fallback signal).""" return not ( - self.entity_access + self.known_entities + or self.entity_access or self.measure_fields or self.state_fields or self.reference_fields or self.hitl_operations or self.entity_relationships ) + + def is_known(self, entity_key: str) -> bool: + """True if the ontology declares this entity (readable or writable).""" + return entity_key in self.known_entities + + def is_writable(self, entity_key: str) -> bool: + """True if the ontology grants any write access to this entity.""" + return entity_key in self.entity_access + + def is_read_only(self, entity_key: str) -> bool: + """True if the entity is declared by the ontology but grants no writes. + + A ``df:ReadableEntity`` is "known but not in ``entity_access``". + """ + return ( + entity_key in self.known_entities and entity_key not in self.entity_access + ) + + def to_human_readable(self) -> str: + """Render a compact, grouped, human-readable summary of the IR. + + Sections: entity access modes (with HITL ops), field semantics + (measure / state / reference), and entity relationships. Intended + for debug logs and the ontology CLI scripts. + """ + lines: list[str] = [] + + # -- Entities (access mode + HITL) -- + lines.append("Entities:") + if self.known_entities: + for ek in sorted(self.known_entities): + ops = self.entity_access.get(ek) + if ops is not None: + mode = ( + f"WRITABLE [{','.join(sorted(ops))}]" + if ops + else "WRITABLE [no ops declared]" + ) + else: + mode = "READ-ONLY" + line = f" - {ek}: {mode}" + hitl = self.hitl_operations.get(ek) + if hitl: + line += f" (HITL: {','.join(sorted(hitl))})" + lines.append(line) + else: + lines.append(" (none)") + + # -- Field semantics -- + lines.append("Field semantics:") + if self.measure_fields: + lines.append(" Measure fields (additive / replacement):") + for k in sorted(self.measure_fields): + lines.append(f" - {k}: {self.measure_fields[k]}") + if self.state_fields: + lines.append(" State fields (choiceset / state-machine):") + for k in sorted(self.state_fields): + src = self.state_fields[k] or "(unspecified)" + lines.append(f" - {k}: {src}") + if self.reference_fields: + lines.append(" Reference fields (-> target entity):") + for k in sorted(self.reference_fields): + lines.append(f" - {k} -> {self.reference_fields[k]}") + if not (self.measure_fields or self.state_fields or self.reference_fields): + lines.append(" (none)") + + # -- Relationships -- + lines.append("Relationships:") + if self.entity_relationships: + for ek in sorted(self.entity_relationships): + targets = ", ".join(self.entity_relationships[ek]) + lines.append(f" - {ek} -> {targets}") + else: + lines.append(" (none)") + + return "\n".join(lines) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py index 066f2bc54..9da176c55 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py @@ -13,6 +13,7 @@ from uipath.platform.entities import Entity +from .compiled_ontology import CompiledOntology from .datafabric_prompts import SQL_CONSTRAINTS from .models import ( EntitySchema, @@ -149,11 +150,71 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: return EntitySQLContext(entity_schema=schema, query_patterns=query_patterns) +def format_ontology_context(compiled_ontology: CompiledOntology) -> str: + """Render read-side schema-linking enrichment from a compiled ontology. + + This is informational context for the NL-to-SQL model (the P5 goal). It + surfaces per-entity access modes, entity relationships, reference/FK fields + (to guide join-path selection), and state fields (with their valid-value + source). It does NOT restrict reads. + + Args: + compiled_ontology: The compiled OWL ontology IR. + + Returns: + A markdown ``## Ontology Context`` section, or an empty string when the + ontology carries no facts. + """ + if compiled_ontology is None or compiled_ontology.is_empty(): + return "" + + lines: list[str] = ["## Ontology Context", ""] + lines.append( + "Ontology-derived schema-linking hints (informational; does not " + "restrict what you may read):" + ) + lines.append("") + + # Per-entity access mode (purely informational for the read model). + if compiled_ontology.known_entities: + lines.append("**Entity access modes:**") + for ek in sorted(compiled_ontology.known_entities): + mode = "WRITABLE" if compiled_ontology.is_writable(ek) else "READ-ONLY" + lines.append(f"- {ek}: {mode}") + lines.append("") + + # Entity relationships (entity -> related entities). + if compiled_ontology.entity_relationships: + lines.append("**Entity relationships (entity -> related entities):**") + for ek in sorted(compiled_ontology.entity_relationships): + targets = ", ".join(compiled_ontology.entity_relationships[ek]) + lines.append(f"- {ek} -> {targets}") + lines.append("") + + # Reference / FK fields (guide join-path selection). + if compiled_ontology.reference_fields: + lines.append("**Reference / foreign-key fields (field -> target entity):**") + for k in sorted(compiled_ontology.reference_fields): + lines.append(f"- {k} -> {compiled_ontology.reference_fields[k]}") + lines.append("") + + # State fields and their valid-value source. + if compiled_ontology.state_fields: + lines.append("**State fields (field -> valid-value source):**") + for k in sorted(compiled_ontology.state_fields): + src = compiled_ontology.state_fields[k] or "(unspecified)" + lines.append(f"- {k} -> {src}") + lines.append("") + + return "\n".join(lines).rstrip() + "\n" + + def build_sql_context( entities: list[Entity], resource_description: str = "", base_system_prompt: str = "", prompt_version: str | None = None, + compiled_ontology: CompiledOntology | None = None, ) -> SQLContext: """Build the full SQL context from entities, prompts, and constraints. @@ -173,11 +234,18 @@ def build_sql_context( ) rendered_prompt = version.render(ctx) + ontology_context = ( + format_ontology_context(compiled_ontology) + if compiled_ontology is not None + else "" + ) + return SQLContext( base_system_prompt=base_system_prompt or None, resource_description=None, sql_expert_system_prompt=rendered_prompt, constraints=SQL_CONSTRAINTS, + ontology_context=ontology_context or None, entity_contexts=[build_entity_context(e) for e in entities], ) @@ -210,6 +278,10 @@ def format_sql_context(ctx: SQLContext) -> str: lines.append(ctx.resource_description) lines.append("") + if ctx.ontology_context: + lines.append(ctx.ontology_context.rstrip()) + lines.append("") + lines.append("## All available Data Fabric Entities") lines.append("") @@ -245,6 +317,7 @@ def build( resource_description: str = "", base_system_prompt: str = "", prompt_version: str | None = None, + compiled_ontology: CompiledOntology | None = None, ) -> str: """Build the full SQL prompt text for the inner sub-graph LLM. @@ -258,6 +331,10 @@ def build( base_system_prompt: Optional system prompt from the outer agent. prompt_version: Optional version key (e.g. ``"v0"``, ``"v1"``). Defaults to the registry's default. + compiled_ontology: Optional compiled OWL ontology. When present an + ``## Ontology Context`` section is appended with read-side + schema-linking enrichment (relationships, FK targets, state-value + sources, access modes). Purely informational — never restricts reads. Returns: Formatted prompt string for the inner LLM system message. @@ -270,6 +347,7 @@ def build( resource_description, base_system_prompt, prompt_version=prompt_version, + compiled_ontology=compiled_ontology, ) return format_sql_context(ctx) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_subgraph.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_subgraph.py index 591227962..53053fb85 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_subgraph.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_subgraph.py @@ -33,6 +33,7 @@ from ..datafabric_query_tool import DataFabricQueryTool from . import datafabric_prompt_builder +from .compiled_ontology import CompiledOntology from .models import DataFabricExecuteSqlInput logger = logging.getLogger(__name__) @@ -88,6 +89,7 @@ def __init__( max_iterations: int = 25, resource_description: str = "", base_system_prompt: str = "", + compiled_ontology: CompiledOntology | None = None, ) -> None: self._max_iterations = max_iterations self._execute_sql_tool = self._create_execute_sql_tool( @@ -95,7 +97,10 @@ def __init__( ) self._system_message = SystemMessage( content=datafabric_prompt_builder.build( - entities, resource_description, base_system_prompt + entities, + resource_description, + base_system_prompt, + compiled_ontology=compiled_ontology, ) ) self._inner_llm = llm.model_copy(update={"disable_streaming": True}).bind_tools( @@ -226,8 +231,14 @@ def create( max_iterations: int = 25, resource_description: str = "", base_system_prompt: str = "", + compiled_ontology: CompiledOntology | None = None, ) -> CompiledStateGraph[Any]: - """Create and return a compiled Data Fabric sub-graph.""" + """Create and return a compiled Data Fabric sub-graph. + + When *compiled_ontology* is supplied it enriches the read prompt with + schema-linking context (relationships, FK targets, state-value sources, + access modes). It never restricts reads. + """ graph = DataFabricGraph( llm, entities, @@ -235,5 +246,6 @@ def create( max_iterations, resource_description, base_system_prompt, + compiled_ontology=compiled_ontology, ) return graph.compiled_graph diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py index bf367f2b0..a44e8d18f 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py @@ -33,6 +33,7 @@ DataFabricWriteInput, EntityWriteSchema, ) +from .ontology_compiler import maybe_fetch_and_compile_ontology from .write_schema_builder import build_write_tool_description from .write_validation import ( derive_writable_fields, @@ -65,6 +66,7 @@ def __init__( self._resource_description = resource_description self._base_system_prompt = base_system_prompt self._compiled: CompiledStateGraph[Any] | None = None + self._compiled_ontology: CompiledOntology | None = None self._init_lock = asyncio.Lock() async def _ensure_datafabric_graph(self) -> CompiledStateGraph[Any]: @@ -91,12 +93,18 @@ async def _ensure_datafabric_graph(self) -> CompiledStateGraph[Any]: "No Data Fabric entity schemas could be fetched. " "Check entity identifiers and permissions." ) + # Optional ontology layer enriches the read prompt (schema linking, + # P5). It does NOT restrict reads — best-effort and may be None. + self._compiled_ontology = await maybe_fetch_and_compile_ontology( + resolution.entities_service + ) self._compiled = DataFabricGraph.create( llm=self._llm, entities=resolution.entities, entities_service=resolution.entities_service, resource_description=self._resource_description, base_system_prompt=self._base_system_prompt, + compiled_ontology=self._compiled_ontology, ) return self._compiled @@ -217,10 +225,23 @@ async def _ensure_initialized(self) -> None: # platform exposes get_ontology_file_async. This method may only # exist on a feature branch — if it is absent we degrade gracefully # to the metadata-only write path (compiled_ontology stays None). - self._compiled_ontology = await self._maybe_compile_ontology( + self._compiled_ontology = await maybe_fetch_and_compile_ontology( resolution.entities_service ) + # Prune any entity the ontology marks read-only (a df:ReadableEntity) + # so the write tool description never advertises it (e.g. Customer). + # When the ontology is absent (None) nothing is pruned. + if self._compiled_ontology is not None: + read_only = [ + name + for name in self._write_schemas + if self._compiled_ontology.is_read_only(name) + ] + for name in read_only: + del self._write_schemas[name] + self._entity_id_by_key.pop(name, None) + entity_access = ( self._compiled_ontology.entity_access if self._compiled_ontology @@ -233,40 +254,6 @@ async def _ensure_initialized(self) -> None: self._executor = WriteExecutor(resolution.entities_service) - async def _maybe_compile_ontology( - self, entities_service: Any - ) -> CompiledOntology | None: - """Best-effort fetch + compile of the optional OWL ontology. - - Returns the compiled ontology, or ``None`` when no ontology is - available or the platform package does not expose the fetch method. - Never raises — any failure degrades to the metadata-only path. - """ - get_ontology = getattr(entities_service, "get_ontology_file_async", None) - if not callable(get_ontology): - logger.debug( - "EntitiesService has no get_ontology_file_async; " - "skipping ontology compilation (metadata-only writes)." - ) - return None - - from .ontology_compiler import compile_ontology - - try: - owl_turtle = await get_ontology("owl") - if not owl_turtle: - logger.debug("No OWL ontology returned; metadata-only writes.") - return None - compiled = compile_ontology(owl_turtle) - logger.debug( - "Compiled ontology with %d writable entities.", - len(compiled.entity_access), - ) - return compiled - except Exception as exc: # graceful no-op on any fetch/parse failure - logger.debug("Ontology fetch/compile skipped: %s", exc) - return None - async def __call__( self, entity_key: str, diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/models.py b/src/uipath_langchain/agent/tools/datafabric_tool/models.py index 30db0d3f7..0555f4de8 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/models.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/models.py @@ -72,6 +72,7 @@ class SQLContext(BaseModel): resource_description: str | None = None sql_expert_system_prompt: str | None = None constraints: str | None = None + ontology_context: str | None = None entity_contexts: list[EntitySQLContext] diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/ontology_compiler.py b/src/uipath_langchain/agent/tools/datafabric_tool/ontology_compiler.py index 27ebbd2ec..c48008020 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/ontology_compiler.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/ontology_compiler.py @@ -297,7 +297,13 @@ def _compound_key(field_uri: URIRef) -> str | None: if obj_key not in targets: targets.append(obj_key) + # Every entity carrying df:entityKey is "known" to the ontology — this + # makes df:ReadableEntity first-class: a read-only entity is known but not + # in entity_access (distinguishable from "unknown to the ontology"). + known_entities = set(entity_key_by_uri.values()) + compiled = CompiledOntology( + known_entities=known_entities, entity_access=entity_access, measure_fields=measure_fields, state_fields=state_fields, @@ -306,8 +312,9 @@ def _compound_key(field_uri: URIRef) -> str | None: entity_relationships=entity_relationships, ) logger.debug( - "Compiled ontology: %d writable entities, %d measure, %d state, " - "%d reference fields, %d HITL entities", + "Compiled ontology: %d known entities, %d writable, %d measure, " + "%d state, %d reference fields, %d HITL entities", + len(compiled.known_entities), len(compiled.entity_access), len(compiled.measure_fields), len(compiled.state_fields), @@ -315,3 +322,64 @@ def _compound_key(field_uri: URIRef) -> str | None: len(compiled.hitl_operations), ) return compiled + + +def format_ontology_debug(owl_turtle: str, compiled: CompiledOntology) -> str: + """Render a debug block with both the raw OWL and the compiled IR. + + Returns a single string containing two clearly-headed sections: the raw + OWL Turtle source and the human-readable compiled IR. Useful for debug + logs and the ontology CLI scripts. + + Args: + owl_turtle: The raw OWL Turtle source the ontology was compiled from. + compiled: The compiled ontology IR. + + Returns: + A multi-section debug string. + """ + return ( + "=== RAW ONTOLOGY (OWL Turtle) ===\n" + f"{owl_turtle.strip()}\n" + "\n=== COMPILED ONTOLOGY (human-readable IR) ===\n" + f"{compiled.to_human_readable()}" + ) + + +async def maybe_fetch_and_compile_ontology( + entities_service: object, +) -> CompiledOntology | None: + """Best-effort fetch + compile of the optional OWL ontology. + + Shared by both the Data Fabric read and write handlers. Looks for + ``entities_service.get_ontology_file_async`` (which may only exist on a + feature branch), fetches the OWL file, compiles it, and emits a debug log + of the raw + compiled IR. Never raises: any absence/failure degrades to + ``None`` (the metadata-only path). + + Args: + entities_service: The resolved EntitiesService instance. + + Returns: + The compiled ontology, or ``None`` when no ontology is available or + the platform package does not expose the fetch method. + """ + get_ontology = getattr(entities_service, "get_ontology_file_async", None) + if not callable(get_ontology): + logger.debug( + "EntitiesService has no get_ontology_file_async; " + "skipping ontology compilation (metadata-only)." + ) + return None + + try: + owl_turtle = await get_ontology("owl") + if not owl_turtle: + logger.debug("No OWL ontology returned; metadata-only.") + return None + compiled = compile_ontology(owl_turtle) + logger.debug(format_ontology_debug(owl_turtle, compiled)) + return compiled + except Exception as exc: # graceful no-op on any fetch/parse failure + logger.debug("Ontology fetch/compile skipped: %s", exc) + return None diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py b/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py index 5ce4e4e25..4f1c759d5 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/write_validation.py @@ -92,8 +92,11 @@ def validate_mutation_intent( When a *compiled_ontology* is supplied (the optional OWL ontology layer, see ``ontology_compiler``), the operation is additionally checked against the ontology's per-entity access modes: an operation not listed in - ``entity_access[entity_key]`` is rejected. When the ontology is ``None`` - the metadata-only behaviour is unchanged. + ``entity_access[entity_key]`` is rejected, and a write to an entity the + ontology declares but grants no write operations (a read-only + ``df:ReadableEntity``) is rejected with a read-only message. An entity + unknown to the ontology falls back to metadata-only validation. When the + ontology is ``None`` the metadata-only behaviour is unchanged. Args: intent: The write operation intent to validate. @@ -123,13 +126,24 @@ def validate_mutation_intent( # Only enforced when the ontology actually carries an access entry for # this entity (graceful fallback when the ontology is partial/absent). if compiled_ontology is not None: - allowed_ops = compiled_ontology.entity_access.get(intent.entity_key) - if allowed_ops is not None and op.value not in allowed_ops: + ek = intent.entity_key + allowed_ops = compiled_ontology.entity_access.get(ek) + if allowed_ops is not None: + if op.value not in allowed_ops: + errors.append( + f"Operation '{op.value}' is not allowed on '{ek}' " + f"by the ontology. Allowed operation(s): {sorted(allowed_ops)}" + ) + return errors + elif compiled_ontology.is_known(ek): + # The ontology declares this entity but grants no write operations + # -> read-only (df:ReadableEntity). errors.append( - f"Operation '{op.value}' is not allowed on '{intent.entity_key}' " - f"by the ontology. Allowed operation(s): {sorted(allowed_ops)}" + f"Entity '{ek}' is read-only per the ontology " + f"(no write operations declared)." ) return errors + # else: entity unknown to the ontology -> metadata-only fallback. # TODO(state-machine): when compiled_ontology.state_fields covers a field # being written, validate that the new value is a legal transition from diff --git a/tests/agent/tools/datafabric_tool/test_ontology_compiler.py b/tests/agent/tools/datafabric_tool/test_ontology_compiler.py index 37dedaa28..00a3479d9 100644 --- a/tests/agent/tools/datafabric_tool/test_ontology_compiler.py +++ b/tests/agent/tools/datafabric_tool/test_ontology_compiler.py @@ -10,6 +10,7 @@ from uipath_langchain.agent.tools.datafabric_tool.ontology_compiler import ( OntologyCompileError, compile_ontology, + format_ontology_debug, ) # A small refund-domain ontology in the .ttl dialect (subClassOf + actions + @@ -141,6 +142,64 @@ def test_entity_relationships(self, compiled: CompiledOntology) -> None: assert set(rels) == {"Order", "Customer"} assert compiled.entity_relationships["CustomerRisk"] == ["Customer"] + def test_known_entities_populated(self, compiled: CompiledOntology) -> None: + # Every entity carrying df:entityKey is known, readable or writable. + assert compiled.known_entities == { + "Customer", + "RefundRequest", + "Order", + "CustomerRisk", + } + + def test_is_known(self, compiled: CompiledOntology) -> None: + assert compiled.is_known("Customer") is True + assert compiled.is_known("RefundRequest") is True + assert compiled.is_known("NotAnEntity") is False + + def test_is_read_only_for_readable_entity(self, compiled: CompiledOntology) -> None: + # Customer is df:ReadableEntity -> known but not writable -> read-only. + assert compiled.is_read_only("Customer") is True + assert compiled.is_writable("Customer") is False + + def test_is_read_only_false_for_writable_entity( + self, compiled: CompiledOntology + ) -> None: + assert compiled.is_read_only("RefundRequest") is False + assert compiled.is_writable("RefundRequest") is True + # An entity unknown to the ontology is not read-only (it's just unknown). + assert compiled.is_read_only("NotAnEntity") is False + + +class TestHumanReadableAndDebug: + """to_human_readable / format_ontology_debug smoke tests.""" + + @pytest.fixture + def compiled(self) -> CompiledOntology: + return compile_ontology(REFUND_OWL) + + def test_human_readable_contains_entities_and_modes( + self, compiled: CompiledOntology + ) -> None: + text = compiled.to_human_readable() + assert "Customer" in text + assert "RefundRequest" in text + assert "READ-ONLY" in text + assert "WRITABLE" in text + # A relationship line should be present. + assert "RefundRequest ->" in text + + def test_format_ontology_debug_has_both_sections( + self, compiled: CompiledOntology + ) -> None: + block = format_ontology_debug(REFUND_OWL, compiled) + assert "=== RAW ONTOLOGY (OWL Turtle) ===" in block + assert "=== COMPILED ONTOLOGY (human-readable IR) ===" in block + # Raw OWL content is present. + assert "df:entityKey" in block + # Human-readable facts are present. + assert "READ-ONLY" in block + assert "WRITABLE" in block + class TestRfcDialect: """The RFC §4.1 dialect: rdf:type df:WritableEntity + df:allowsOperation.""" diff --git a/tests/agent/tools/datafabric_tool/test_write_integration.py b/tests/agent/tools/datafabric_tool/test_write_integration.py index 5bdc18eb4..8ca57c815 100644 --- a/tests/agent/tools/datafabric_tool/test_write_integration.py +++ b/tests/agent/tools/datafabric_tool/test_write_integration.py @@ -582,6 +582,89 @@ async def test_choiceset_entity_not_writable(self) -> None: assert any("not configured for writes" in e for e in result["errors"]) +# --------------------------------------------------------------------------- +# 5b. Ontology prunes read-only entities from the write tool description +# --------------------------------------------------------------------------- + + +# Refund-set ontology: Customer is read-only (df:ReadableEntity); the rest are +# writable (df:WritableEntity) with action-derived operations. +_REFUND_PRUNE_OWL = """ +@prefix df: . +@prefix ex: . +@prefix owl: . +@prefix rdfs: . + +ex:Customer a owl:Class ; rdfs:subClassOf df:ReadableEntity ; df:entityKey "Customer" . +ex:RefundRequest a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "RefundRequest" . +ex:Order a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "Order" . +ex:Risk a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "Risk" . +ex:Contact a owl:Class ; rdfs:subClassOf df:WritableEntity ; df:entityKey "Contact" . + +ex:CreateRefund a df:InsertAction ; df:writeOperation "insert" ; df:targetEntity ex:RefundRequest . +ex:UpdateOrder a df:UpdateAction ; df:writeOperation "update" ; df:targetEntity ex:Order . +ex:UpdateRisk a df:UpdateAction ; df:writeOperation "update" ; df:targetEntity ex:Risk . +ex:UpdateContact a df:UpdateAction ; df:writeOperation "update" ; df:targetEntity ex:Contact . +""" + + +class TestOntologyPrunesReadOnlyFromWriteDescription: + """The write tool description must not advertise read-only entities.""" + + @pytest.fixture + def refund_entities(self) -> list[MagicMock]: + """All five entities resolve as native/writable from metadata alone. + + Customer is metadata-writable but the ontology marks it read-only, so + it must be pruned from the write schemas/description. + """ + return [ + _make_entity("Customer", [_make_field("Name", is_required=True)]), + _make_entity("RefundRequest", [_make_field("Amount", is_required=True)]), + _make_entity("Order", [_make_field("Status")]), + _make_entity("Risk", [_make_field("Score", sql_type_name="int")]), + _make_entity("Contact", [_make_field("Resolution")]), + ] + + @pytest.mark.asyncio + async def test_read_only_customer_pruned_from_write_schemas( + self, refund_entities: list[MagicMock] + ) -> None: + resolution = _make_resolution(refund_entities) + # Inject the ontology via the fetch hook the handler looks for. + resolution.entities_service.get_ontology_file_async = AsyncMock( + return_value=_REFUND_PRUNE_OWL + ) + + with patch("uipath.platform.UiPath") as mock_uipath_cls: + mock_sdk = MagicMock() + mock_sdk.entities.resolve_entity_set_async = AsyncMock( + return_value=resolution + ) + mock_uipath_cls.return_value = mock_sdk + + entity_items = [ + {"id": n, "name": n, "folderId": "f1"} + for n in ["Customer", "RefundRequest", "Order", "Risk", "Contact"] + ] + resource = _make_context_resource(entity_items=entity_items) + tools = create_datafabric_tools(resource, _mock_llm()) + write_handler = tools[1].coroutine + + await write_handler._ensure_initialized() + + schemas = write_handler._write_schemas + assert "Customer" not in schemas # read-only -> pruned + assert set(schemas.keys()) == {"RefundRequest", "Order", "Risk", "Contact"} + + description = write_handler._write_tool_description + assert "Customer" not in description + assert "RefundRequest" in description + assert "Order" in description + assert "Risk" in description + assert "Contact" in description + + # --------------------------------------------------------------------------- # 6. Read tool is unchanged (renumbered from 5) # --------------------------------------------------------------------------- diff --git a/tests/agent/tools/datafabric_tool/test_write_validation.py b/tests/agent/tools/datafabric_tool/test_write_validation.py index 43c0770eb..4cbe56377 100644 --- a/tests/agent/tools/datafabric_tool/test_write_validation.py +++ b/tests/agent/tools/datafabric_tool/test_write_validation.py @@ -517,3 +517,37 @@ def test_ontology_without_entry_for_entity_does_not_constrain(self) -> None: ) errors = validate_mutation_intent(intent, self._schema(), ontology) assert errors == [] + + def test_ontology_read_only_entity_rejected(self) -> None: + """Entity known to the ontology but with no write ops -> read-only reject.""" + # Orders is declared (known) but not in entity_access -> read-only. + ontology = CompiledOntology( + known_entities={"Orders", "RefundRequest"}, + entity_access={"RefundRequest": {"insert"}}, + ) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.update, + record_id="rec-1", + fields={"Amount": 10}, + ) + errors = validate_mutation_intent(intent, self._schema(), ontology) + assert len(errors) == 1 + assert "read-only" in errors[0] + + def test_ontology_unknown_entity_falls_back_to_metadata(self) -> None: + """Entity unknown to the ontology (not in known_entities) is NOT rejected + on a read-only basis — it falls back to metadata validation.""" + # Orders is neither in known_entities nor entity_access -> unknown. + ontology = CompiledOntology( + known_entities={"RefundRequest"}, + entity_access={"RefundRequest": {"insert"}}, + ) + intent = DataFabricWriteInput( + entity_key="Orders", + operation=EntityWriteOperation.insert, + fields={"OrderName": "Test", "Amount": 50}, + ) + errors = validate_mutation_intent(intent, self._schema(), ontology) + # Passes metadata validation (no read-only rejection applied). + assert errors == [] diff --git a/tests/agent/tools/test_datafabric_prompt_builder.py b/tests/agent/tools/test_datafabric_prompt_builder.py index 8483f89d6..d0c7c2bd1 100644 --- a/tests/agent/tools/test_datafabric_prompt_builder.py +++ b/tests/agent/tools/test_datafabric_prompt_builder.py @@ -140,3 +140,47 @@ def test_system_pk_name_collision_with_user_field_not_duplicated(): ) names = _field_names(ctx) assert names.count("Id") == 1 + + +def test_build_appends_ontology_context_when_compiled_ontology_passed(): + """When a compiled ontology is supplied, the read prompt gains an + '## Ontology Context' section with relationships and reference targets.""" + from uipath_langchain.agent.tools.datafabric_tool.compiled_ontology import ( + CompiledOntology, + ) + + ontology = CompiledOntology( + known_entities={"Ticket", "Customer"}, + entity_access={"Ticket": {"update"}}, + reference_fields={"Ticket.CustomerId": "Customer"}, + state_fields={"Ticket.status": "TicketStatusChoiceSet"}, + entity_relationships={"Ticket": ["Customer"]}, + ) + prompt = build([_fake_entity(_fake_field())], compiled_ontology=ontology) + + assert "## Ontology Context" in prompt + # Access modes surfaced (informational). + assert "READ-ONLY" in prompt + assert "WRITABLE" in prompt + # A relationship line. + assert "Ticket -> Customer" in prompt + # A reference-target (FK) line for join-path guidance. + assert "Ticket.CustomerId -> Customer" in prompt + # A state-value-source line. + assert "TicketStatusChoiceSet" in prompt + + +def test_build_without_ontology_has_no_ontology_context(): + """When compiled_ontology is None the prompt is unchanged (no section).""" + prompt = build([_fake_entity(_fake_field())]) + assert "## Ontology Context" not in prompt + + +def test_build_with_empty_ontology_has_no_ontology_context(): + """An empty compiled ontology adds no section.""" + from uipath_langchain.agent.tools.datafabric_tool.compiled_ontology import ( + CompiledOntology, + ) + + prompt = build([_fake_entity(_fake_field())], compiled_ontology=CompiledOntology()) + assert "## Ontology Context" not in prompt From fd373f3fe1ef83b6b5086e78a65c0cf8f9c4516b Mon Sep 17 00:00:00 2001 From: Harshit Rohatgi Date: Mon, 29 Jun 2026 09:13:24 +0530 Subject: [PATCH 6/6] feat(datafabric ontology): full LLM-loop writes + run-script tracing Makes the full LLM-in-the-loop refund flow persist writes end-to-end (verified on staging dataservicetest/DataFabricFQS: insert RefundRequest + update Order/CustomerRisk/Contact all success, read-back confirmed). Root cause of the prior "writes planned but not dispatched": the write tool hardcoded `require_conversational_confirmation: True`, whose tool-node gate calls request_approval -> @durable_interrupt, suspending the graph for human approval. In a non-conversational/coded agent (no human/checkpointer) the graph suspended at the first write and ainvoke returned without executing it. - datafabric_tool.py: drop the unconditional `require_conversational_ confirmation` from the write tool metadata. HITL confirmation is still applied per-resource for conversational agents by tool_factory; it is no longer forced on coded agents (where it can only deadlock). Deterministic guardrails remain: writability checks, ontology op-validation, field allowlist, read-only enforcement. - run_agent_with_ontology.py: add --trace (DEBUG logging surfaces the inner NL->SQL generated SQL per read), --api-flavor (default chat-completions), and print tool RESPONSES (not just calls) so reads/writes are visible. - test: assert the write tool no longer hardcodes the confirmation flag. 752 tests pass, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/run_agent_with_ontology.py | 56 ++++++++++++++++++- .../tools/datafabric_tool/datafabric_tool.py | 1 - .../datafabric_tool/test_write_integration.py | 8 ++- 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/scripts/run_agent_with_ontology.py b/scripts/run_agent_with_ontology.py index fd5162c31..eed7b345e 100644 --- a/scripts/run_agent_with_ontology.py +++ b/scripts/run_agent_with_ontology.py @@ -52,6 +52,7 @@ import argparse import asyncio +import logging import sys from pathlib import Path from typing import Any @@ -278,7 +279,18 @@ async def _async_main(args: argparse.Namespace) -> int: # unlicensed product path and the gateway returns 403 "License # not available for LLM usage". "agentsplayground" uses the # developer's debug/playground quota — appropriate for a local run. - llm = get_chat_model(args.model, agenthub_config=args.agenthub_config) + # api_flavor forces the gateway/LangChain to use a specific API. + # The Responses API returns the terminal tool batch as raw + # function_call items that don't reliably map to LangChain + # .tool_calls — so the agent loop plans the writes but never + # dispatches them. Forcing 'chat-completions' yields standard + # tool_calls that the router executes. + flavor = args.api_flavor or None + llm = get_chat_model( + args.model, + agenthub_config=args.agenthub_config, + **({"api_flavor": flavor} if flavor else {}), + ) except Exception as exc: raise _AuthOrNetworkError( f"could not construct chat model {args.model!r}: {exc}" @@ -386,18 +398,32 @@ async def _async_main(args: argparse.Namespace) -> int: ) return 1 - print("\n=== AGENT RUN RESULT ===") + print("\n=== AGENT RUN RESULT (calls + responses, in order) ===") result_messages = result.get("messages", []) if isinstance(result, dict) else [] for msg in result_messages: + # Tool CALLS the model emitted (the NL query for reads, the structured + # intent for writes). tool_calls = getattr(msg, "tool_calls", None) if tool_calls: for call in tool_calls: name = call.get("name") if isinstance(call, dict) else None cargs = call.get("args") if isinstance(call, dict) else None - print(f"[tool call] {name}({cargs})") + print(f"\n[tool call] {name}({cargs})") + # Tool RESPONSES (ToolMessage) — for query_datafabric this is the + # natural-language answer derived from the executed SQL; for the write + # tool it's the WriteResult JSON. + if msg.__class__.__name__ == "ToolMessage": + tool_name = getattr(msg, "name", "?") + content = getattr(msg, "content", "") + print(f"[tool response: {tool_name}]\n{content}") final = result_messages[-1] if result_messages else None print("\n=== FINAL MESSAGE ===") print(getattr(final, "content", final) if final is not None else "(no messages)") + if not args.trace: + print( + "\n(tip: re-run with --trace to see the generated SQL the inner " + "NL->SQL subgraph produced for each read.)" + ) return 0 @@ -457,11 +483,35 @@ def _build_arg_parser() -> argparse.ArgumentParser: "print the ontology facts and the generated write tool description, " "then exit. Degrades gracefully offline.", ) + parser.add_argument( + "--api-flavor", + default="chat-completions", + help="LLM gateway API flavor. Default 'chat-completions' yields " + "standard tool_calls the agent loop can dispatch. Pass '' to let the " + "gateway pick (may select 'responses', whose terminal tool batch is " + "not reliably executed by the standalone harness).", + ) + parser.add_argument( + "--trace", + action="store_true", + help="Enable DEBUG logging for the Data Fabric tool so the inner " + "NL->SQL subgraph prints each generated SQL statement " + "('execute_sql called with SQL: ...') and the read/write calls.", + ) return parser def main() -> int: args = _build_arg_parser().parse_args() + if args.trace: + # Surface the inner subgraph's generated SQL + tool invocations. + logging.basicConfig( + level=logging.DEBUG, + format="%(levelname)s %(name)s: %(message)s", + ) + logging.getLogger("uipath_langchain.agent.tools.datafabric_tool").setLevel( + logging.DEBUG + ) return asyncio.run(_async_main(args)) diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py index a44e8d18f..6b63566f8 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_tool.py @@ -442,7 +442,6 @@ def create_datafabric_tools( coroutine=write_handler, metadata={ "tool_type": "datafabric_write", - "require_conversational_confirmation": True, }, ) diff --git a/tests/agent/tools/datafabric_tool/test_write_integration.py b/tests/agent/tools/datafabric_tool/test_write_integration.py index 8ca57c815..cff75c99a 100644 --- a/tests/agent/tools/datafabric_tool/test_write_integration.py +++ b/tests/agent/tools/datafabric_tool/test_write_integration.py @@ -171,11 +171,15 @@ def test_tool_type_is_datafabric_write(self) -> None: assert write_tool.metadata is not None assert write_tool.metadata["tool_type"] == "datafabric_write" - def test_require_conversational_confirmation_is_true(self) -> None: + def test_no_unconditional_conversational_confirmation(self) -> None: + """The write tool does not hardcode the HITL confirmation gate. It is + applied per-resource for conversational agents by tool_factory, not + unconditionally here (which would suspend non-conversational/coded + agents on an interrupt with nothing to approve it).""" tools = create_datafabric_tools(_make_context_resource(), _mock_llm()) write_tool = tools[1] assert write_tool.metadata is not None - assert write_tool.metadata["require_conversational_confirmation"] is True + assert "require_conversational_confirmation" not in write_tool.metadata # ---------------------------------------------------------------------------