diff --git a/packages/uipath-pydantic-ai/pyproject.toml b/packages/uipath-pydantic-ai/pyproject.toml index 5b51024b..960fc98f 100644 --- a/packages/uipath-pydantic-ai/pyproject.toml +++ b/packages/uipath-pydantic-ai/pyproject.toml @@ -8,6 +8,7 @@ dependencies = [ "pydantic-ai>=1.63.0, <2.0.0", "openinference-instrumentation-pydantic-ai>=0.1.12", "uipath>=2.10.2, <2.11.0", + "uipath-core>=0.5.18, <0.7.0", "uipath-runtime>=0.11.0, <0.12.0", ] classifiers = [ diff --git a/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/governance/__init__.py b/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/governance/__init__.py new file mode 100644 index 00000000..3c9199ed --- /dev/null +++ b/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/governance/__init__.py @@ -0,0 +1,21 @@ +"""Governance integration for ``uipath-pydantic-ai``. + +Exposes :func:`install_governance` — wraps a ``pydantic_ai.Agent``'s ``model`` +with a :class:`GovernanceModel` that brackets every model call with governance +(BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, AFTER_TOOL). Wired into a run by passing +an ``evaluator`` to :class:`UiPathPydanticAIRuntimeFactory`; the factory calls +:func:`install_governance` on the resolved agent. + +Importing this module has no side effects: no adapter is registered, no global +state is mutated. +""" + +from __future__ import annotations + +from .model import GovernanceCallbacks, GovernanceModel, install_governance + +__all__ = [ + "GovernanceCallbacks", + "GovernanceModel", + "install_governance", +] \ No newline at end of file diff --git a/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/governance/model.py b/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/governance/model.py new file mode 100644 index 00000000..7b061703 --- /dev/null +++ b/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/governance/model.py @@ -0,0 +1,441 @@ +"""Pydantic AI governance model wrapper for UiPath. + +Pydantic AI has the thinnest hook surface of the supported frameworks — there +is no per-agent callback or middleware system. But *everything* an agent does +flows through its ``Model``: the LLM request, the model's tool-call requests +(``ToolCallPart`` in the response), and the tool results fed back on the next +turn (``ToolReturnPart`` in the request). So this adapter governs by wrapping +``agent.model`` with a :class:`GovernanceModel` (a ``pydantic_ai`` ``WrapperModel``) +that brackets every model call: + +- BEFORE_MODEL — the latest request message's text (user prompt or tool result + being fed back), before delegating to the wrapped model. +- AFTER_TOOL — any ``ToolReturnPart`` in that latest request message. +- AFTER_MODEL — the ``TextPart`` content of the model's response. +- TOOL_CALL — each ``ToolCallPart`` the model emits (tool name + arguments). + +Both the non-streaming ``request`` and the streaming ``request_stream`` paths +are covered (the runtime uses ``agent.run`` and ``agent.iter`` respectively). + +Because the wrap is installed on ``agent.model`` in place, +:func:`install_governance` returns the **original agent**. + +Chain-level boundaries (BEFORE_AGENT / AFTER_AGENT) are owned by the +governance host and are intentionally not fired here. + +The evaluator protocol comes from ``uipath-core``; this package contributes +only the Pydantic-AI-specific wiring. Governance is installed by the runtime +factory: passing an ``evaluator`` to ``new_runtime`` calls +:func:`install_governance` on the resolved agent. No adapter registry, no +entry point, no import-time side effects. + +Audit emission and enforcement (raising :class:`GovernanceBlockException` on +DENY) are owned by the evaluator. The wrapper only extracts payloads and calls +the matching ``evaluate_*`` method; :class:`GovernanceBlockException` propagates +(aborting the run), anything else is logged and swallowed. +""" + +from __future__ import annotations + +import json +import logging +from contextlib import asynccontextmanager +from typing import Any, AsyncIterator, Dict, Iterable, List + +from pydantic_ai import Agent +from pydantic_ai.messages import ( + BuiltinToolCallPart, + BuiltinToolReturnPart, + ModelRequest, + TextPart, + ToolCallPart, + ToolReturnPart, + UserPromptPart, +) +from pydantic_ai.models import Model, ModelRequestParameters, StreamedResponse +from pydantic_ai.models.wrapper import WrapperModel +from pydantic_ai.settings import ModelSettings +from uipath.core.adapters import EvaluatorProtocol +from uipath.core.governance.exceptions import GovernanceBlockException + +logger = logging.getLogger(__name__) + +# Cap on the text blob passed to BEFORE_MODEL / AFTER_MODEL governance +# evaluation. Sized to match the runtime side and the other adapters. +_BEFORE_MODEL_TEXT_CAP = 64000 + + +def install_governance( + agent: Agent, + evaluator: EvaluatorProtocol, + *, + agent_name: str, + session_id: str, +) -> Agent: + """Wrap ``agent.model`` with a :class:`GovernanceModel` (mutated in place). + + Returns the original ``agent``. Idempotent: an already-wrapped model is + left untouched. If the agent has no concrete ``Model`` bound (the model is + supplied per-run), there is nothing to wrap and a warning is logged. + + Called by :class:`UiPathPydanticAIRuntimeFactory` when an ``evaluator`` + is supplied to ``new_runtime``. + """ + model = getattr(agent, "model", None) + if isinstance(model, GovernanceModel): + return agent # idempotent — already governed + if not isinstance(model, Model): + logger.warning( + "install_governance: agent has no bound Model to wrap (got %s); " + "model-layer governance will not fire", + type(model).__name__, + ) + return agent + callbacks = GovernanceCallbacks( + evaluator=evaluator, agent_name=agent_name, session_id=session_id + ) + # ``agent.model`` is a property whose setter stores ``_model``; the agent + # re-reads ``self.model`` on every run (``_get_model``), so this in-place + # wrap takes effect for all subsequent runs with no stale reference. A model + # supplied per-run (``agent.run(model=...)``) bypasses this wrap — that path + # is governed by whatever model the caller passes, not by us. + agent.model = GovernanceModel(model, callbacks) + logger.debug("Wrapped Pydantic AI agent model with governance") + return agent + + +class GovernanceModel(WrapperModel): + """A ``WrapperModel`` that brackets every model call with governance.""" + + def __init__(self, wrapped: Model, callbacks: "GovernanceCallbacks") -> None: + super().__init__(wrapped) + self._callbacks = callbacks + + async def request( + self, + messages: List[Any], + model_settings: ModelSettings | None, + model_request_parameters: ModelRequestParameters, + ) -> Any: + self._callbacks.on_request(messages) + response = await super().request( + messages, model_settings, model_request_parameters + ) + self._callbacks.on_response(response) + return response + + @asynccontextmanager + async def request_stream( + self, + messages: List[Any], + model_settings: ModelSettings | None, + model_request_parameters: ModelRequestParameters, + run_context: Any = None, + ) -> AsyncIterator[StreamedResponse]: + self._callbacks.on_request(messages) + async with super().request_stream( + messages, model_settings, model_request_parameters, run_context + ) as stream: + try: + yield stream + finally: + # Once the caller has consumed the stream the final response is + # assembled — govern it the same as the non-streaming path. This + # runs in a ``finally`` so AFTER_MODEL / TOOL_CALL still fire + # even if the consumer's ``async for`` raised partway through. + # + # Streaming governance is inherently post-hoc: the tokens have + # already been streamed to the caller by the time the response + # is complete, so a DENY here aborts the run but cannot un-send + # what was already emitted. The block exception still + # propagates; any other governance error is logged and swallowed + # so a governance bug can't break the run. + try: + self._callbacks.on_response(stream.get()) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning( + "after-stream governance check failed (continuing): %s", e + ) + + +class GovernanceCallbacks: + """Holds the evaluator + per-attach state, called by :class:`GovernanceModel`. + + :class:`GovernanceBlockException` is re-raised (it aborts the run); + anything else is logged and swallowed so a governance bug never breaks an + agent run. + """ + + def __init__( + self, + evaluator: EvaluatorProtocol, + agent_name: str, + session_id: str, + ) -> None: + self._evaluator = evaluator + self._agent_name = agent_name + self._session_id = session_id + # ``trace_id`` is intentionally NOT held here. A single uuid minted at + # install time would be identical for every call. Trace correlation is + # owned by the layer below (OTel span / HTTP resolve at call time), + # matching the LangChain adapter. + self._session_state: Dict[str, Any] = {"tool_calls": 0, "llm_calls": 0} + + # ----- before the model call -------------------------------------- + + def on_request(self, messages: Any) -> None: + """Fire BEFORE_MODEL (latest message text) + AFTER_TOOL (tool returns). + + Only the latest request message is scanned, so a tool result / prompt + is not re-evaluated on every subsequent model call (the full history is + re-sent each turn for context). + """ + latest = self._latest_request(messages) + if latest is None: + self._before_model("") + return + parts = getattr(latest, "parts", None) or [] + self._before_model(self._parts_input_text(parts)) + for part in parts: + if isinstance(part, ToolReturnPart): + self._after_tool( + _tool_name(part), + part.content, + tool_call_id=getattr(part, "tool_call_id", None), + ) + + # ----- after the model call --------------------------------------- + + def on_response(self, response: Any) -> None: + """Fire AFTER_MODEL (response text) + TOOL_CALL / AFTER_TOOL parts. + + A provider-executed **built-in** tool carries both its call + (``BuiltinToolCallPart``) and its result (``BuiltinToolReturnPart``) + inline in the model response, so AFTER_TOOL for built-in tools is fired + here — symmetric with the built-in TOOL_CALL — rather than on the next + request (where only user-tool ``ToolReturnPart``s arrive). + """ + parts = getattr(response, "parts", None) or [] + self._after_model(self._response_text(parts)) + for part in parts: + if isinstance(part, (ToolCallPart, BuiltinToolCallPart)): + self._tool_call( + _tool_name(part), + part.args, + tool_call_id=getattr(part, "tool_call_id", None), + ) + elif isinstance(part, BuiltinToolReturnPart): + self._after_tool( + _tool_name(part), + part.content, + tool_call_id=getattr(part, "tool_call_id", None), + ) + + # ----- individual evaluate_* wrappers (block-propagate, else swallow) -- + + def _before_model(self, text: str) -> None: + try: + self._evaluator.evaluate_before_model( + model_input=text, + agent_name=self._agent_name, + runtime_id=self._session_id, + ) + # Count only calls that passed governance — a DENY raises above, so + # a blocked call must not inflate the counter. + self._session_state["llm_calls"] = ( + self._session_state.get("llm_calls", 0) + 1 + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("before_model governance check failed (continuing): %s", e) + + def _after_model(self, text: str) -> None: + try: + self._evaluator.evaluate_after_model( + model_output=text, + agent_name=self._agent_name, + runtime_id=self._session_id, + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("after_model governance check failed (continuing): %s", e) + + def _tool_call( + self, tool_name: str, args: Any, tool_call_id: str | None = None + ) -> None: + try: + self._evaluator.evaluate_tool_call( + tool_name=tool_name, + tool_args=_coerce_args(args), + agent_name=self._agent_name, + runtime_id=self._session_id, + session_state=self._session_state, + tool_call_id=tool_call_id, + ) + # Count only calls that passed governance; the evaluator saw the + # count of prior tool calls, and a DENY raises before this bump. + self._session_state["tool_calls"] = ( + self._session_state.get("tool_calls", 0) + 1 + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("tool_call governance check failed (continuing): %s", e) + + def _after_tool( + self, tool_name: str, content: Any, tool_call_id: str | None = None + ) -> None: + try: + self._evaluator.evaluate_after_tool( + tool_name=tool_name, + tool_result="" if content is None else _stringify(content), + agent_name=self._agent_name, + runtime_id=self._session_id, + tool_call_id=tool_call_id, + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("after_tool governance check failed (continuing): %s", e) + + # ----- text extraction -------------------------------------------- + + @staticmethod + def _latest_request(messages: Any) -> Any: + """Return the most recent ``ModelRequest`` message, or ``None``. + + Scans from the end for the last ``ModelRequest`` rather than blindly + taking ``messages[-1]``: the history can end with a ``ModelResponse`` + (e.g. mid tool-call round-trips), and treating that as request input + would scan the wrong side of the exchange. + """ + if not messages or not isinstance(messages, (list, tuple)): + return None + for message in reversed(messages): + if isinstance(message, ModelRequest): + return message + return None + + @classmethod + def _parts_input_text(cls, parts: Any) -> str: + """Join governance-relevant input text from a request message's parts. + + Covers user prompts and tool-return content (the model's input on a + follow-up turn). Joined with a running cap so an oversized tool-return + can't build a multi-megabyte string before the final slice. + """ + + def _pieces() -> Iterable[str]: + for part in parts: + if isinstance(part, UserPromptPart): + yield _content_text(part.content) + elif isinstance(part, ToolReturnPart): + yield _stringify(part.content) + + return _join_within_cap(_pieces(), _BEFORE_MODEL_TEXT_CAP) + + @classmethod + def _response_text(cls, parts: Any) -> str: + """Join ``TextPart`` content from a model response's parts (running cap).""" + + def _pieces() -> Iterable[str]: + for part in parts: + if isinstance(part, TextPart) and part.content: + yield part.content + + return _join_within_cap(_pieces(), _BEFORE_MODEL_TEXT_CAP) + + +# -------------------------------------------------------------------------- +# Helpers +# -------------------------------------------------------------------------- + + +def _join_within_cap(pieces: Iterable[str], cap: int) -> str: + """Join non-empty ``pieces`` with newlines, stopping once ``cap`` is hit. + + Bounds the work (and the allocation) to ``cap`` characters instead of + building the full string and slicing afterwards. + """ + collected: List[str] = [] + remaining = cap + for piece in pieces: + if remaining <= 0: + break + if not piece: + continue + collected.append(piece[:remaining]) + remaining -= len(piece) + 1 + return "\n".join(collected)[:cap] + + +def _content_text(content: Any) -> str: + """Render a ``UserPromptPart.content`` (str or list of items) as text.""" + if content is None: + return "" + if isinstance(content, str): + return content[:_BEFORE_MODEL_TEXT_CAP] + if isinstance(content, (list, tuple)): + + def _pieces() -> Iterable[str]: + for item in content: + if isinstance(item, str): + yield item + else: + text = getattr(item, "text", None) + if isinstance(text, str): + yield text + + return _join_within_cap(_pieces(), _BEFORE_MODEL_TEXT_CAP) + return _stringify(content) + + +def _tool_name(part: Any) -> str: + """Return ``part.tool_name`` or ``"unknown"``, logging the fallback. + + A missing tool name means TOOL_CALL / AFTER_TOOL can't be attributed to a + real tool, so surface it rather than silently reporting ``"unknown"``. + """ + name = getattr(part, "tool_name", None) + if name: + return name + logger.warning( + "governance: %s carries no tool_name; reporting 'unknown'", + type(part).__name__, + ) + return "unknown" + + +def _coerce_args(args: Any) -> Dict[str, Any]: + """Normalise ``ToolCallPart.args`` (dict / JSON string / None) to a dict.""" + if args is None: + return {} + if isinstance(args, dict): + return args + if isinstance(args, str): + try: + parsed = json.loads(args) + return parsed if isinstance(parsed, dict) else {"_": parsed} + except (TypeError, ValueError): + # Preserve the raw string so an arg-based policy can still scan it; + # a malformed payload must not be a way to slip past governance. + return {"_raw": args} + return {} + + +def _stringify(value: Any, cap: int = _BEFORE_MODEL_TEXT_CAP) -> str: + """Render a dict / object payload as compact, scannable text, capped. + + Bounded by ``cap`` so an oversized tool result / return content can't hand + a multi-megabyte string to the evaluator. + """ + if isinstance(value, str): + return value[:cap] + try: + return json.dumps(value, default=str, ensure_ascii=False)[:cap] + except (TypeError, ValueError): + return str(value)[:cap] diff --git a/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/runtime/factory.py b/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/runtime/factory.py index c415baef..593291ab 100644 --- a/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/runtime/factory.py +++ b/packages/uipath-pydantic-ai/src/uipath_pydantic_ai/runtime/factory.py @@ -4,6 +4,7 @@ from typing import Any from pydantic_ai import Agent +from uipath.core.adapters import EvaluatorProtocol from uipath.runtime import ( UiPathRuntimeContext, UiPathRuntimeFactorySettings, @@ -12,6 +13,7 @@ ) from uipath.runtime.errors import UiPathErrorCategory +from uipath_pydantic_ai.governance import install_governance from uipath_pydantic_ai.runtime.config import PydanticAiConfig from uipath_pydantic_ai.runtime.errors import ( UiPathPydanticAIErrorCode, @@ -215,6 +217,7 @@ async def _create_runtime_instance( agent: Agent, runtime_id: str, entrypoint: str, + evaluator: EvaluatorProtocol | None = None, ) -> UiPathRuntimeProtocol: """ Create a runtime instance from an agent. @@ -223,10 +226,20 @@ async def _create_runtime_instance( agent: The PydanticAI Agent runtime_id: Unique identifier for the runtime instance entrypoint: Agent entrypoint name + evaluator: When supplied, governance is installed on the agent's + model in place via :func:`install_governance`. Returns: Configured runtime instance """ + if evaluator is not None: + install_governance( + agent, + evaluator, + agent_name=entrypoint, + session_id=runtime_id, + ) + return UiPathPydanticAIRuntime( agent=agent, runtime_id=runtime_id, @@ -242,7 +255,9 @@ async def new_runtime( Args: entrypoint: Agent name from pydantic_ai.json runtime_id: Unique identifier for the runtime instance - **kwargs: Additional keyword arguments (unused) + **kwargs: Forwarded factory kwargs. Recognized: ``evaluator`` + (``EvaluatorProtocol``) — when present, governance is installed + on the agent's model via :func:`install_governance`. Returns: Configured runtime instance with agent @@ -252,6 +267,7 @@ async def new_runtime( return await self._create_runtime_instance( agent=agent, runtime_id=runtime_id, + evaluator=kwargs.get("evaluator"), entrypoint=entrypoint, ) diff --git a/packages/uipath-pydantic-ai/tests/governance/__init__.py b/packages/uipath-pydantic-ai/tests/governance/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/packages/uipath-pydantic-ai/tests/governance/test_model.py b/packages/uipath-pydantic-ai/tests/governance/test_model.py new file mode 100644 index 00000000..d8b3cf6c --- /dev/null +++ b/packages/uipath-pydantic-ai/tests/governance/test_model.py @@ -0,0 +1,522 @@ +"""Unit tests for the Pydantic AI governance model wrapper. + +These tests use real ``pydantic_ai`` message parts (``UserPromptPart`` etc.) +so the part-extraction logic is exercised against the actual types, plus the +adapter's model-wrapping attach/detach against a real ``Agent`` (driven by the +offline ``TestModel``). + +The package is configured with ``asyncio_mode = "auto"``, so ``async def`` +tests run without an explicit marker. +""" + +from __future__ import annotations + +import logging +from types import SimpleNamespace +from typing import Any, List + +import pytest +from pydantic_ai import Agent +from pydantic_ai.messages import ( + BuiltinToolCallPart, + BuiltinToolReturnPart, + ModelRequest, + ModelResponse, + TextPart, + ToolCallPart, + ToolReturnPart, + UserPromptPart, +) +from pydantic_ai.models.test import TestModel +from uipath.core.governance.exceptions import GovernanceBlockException + +from uipath_pydantic_ai.governance.model import ( + _BEFORE_MODEL_TEXT_CAP, + GovernanceCallbacks, + GovernanceModel, + _coerce_args, + install_governance, +) + +# -------------------------------------------------------------------------- +# Fakes +# -------------------------------------------------------------------------- + + +class FakeEvaluator: + """Records evaluate_* calls; optionally BLOCKs on a named hook.""" + + def __init__(self, block_on: str | None = None) -> None: + self.block_on = block_on + self.calls: List[tuple[str, dict[str, Any]]] = [] + + def _record(self, hook: str, **kwargs: Any) -> None: + self.calls.append((hook, kwargs)) + if self.block_on == hook: + raise GovernanceBlockException("blocked") # type: ignore[call-arg] + + def evaluate_before_agent(self, *args: Any, **kwargs: Any) -> Any: + self._record("before_agent", **kwargs) + + def evaluate_after_agent(self, *args: Any, **kwargs: Any) -> Any: + self._record("after_agent", **kwargs) + + def evaluate_before_model(self, *args: Any, **kwargs: Any) -> Any: + self._record("before_model", **kwargs) + + def evaluate_after_model(self, *args: Any, **kwargs: Any) -> Any: + self._record("after_model", **kwargs) + + def evaluate_tool_call(self, *args: Any, **kwargs: Any) -> Any: + self._record("tool_call", **kwargs) + + def evaluate_after_tool(self, *args: Any, **kwargs: Any) -> Any: + self._record("after_tool", **kwargs) + + +def _make_callbacks(ev: FakeEvaluator) -> GovernanceCallbacks: + return GovernanceCallbacks(evaluator=ev, agent_name="agent-1", session_id="sess-1") + + +def _hooks(ev: FakeEvaluator) -> List[str]: + return [h for h, _ in ev.calls] + + +# -------------------------------------------------------------------------- +# install_governance +# -------------------------------------------------------------------------- + + +def test_install_governance_wraps_model(): + agent = Agent(model=TestModel()) + returned = install_governance( + agent, FakeEvaluator(), agent_name="x", session_id="s" + ) + assert returned is agent + assert isinstance(agent.model, GovernanceModel) + + +def test_install_governance_is_idempotent(): + agent = Agent(model=TestModel()) + ev = FakeEvaluator() + install_governance(agent, ev, agent_name="x", session_id="s") + wrapped = agent.model + install_governance(agent, ev, agent_name="x", session_id="s") + assert agent.model is wrapped # not double-wrapped + + +def test_install_governance_warns_when_no_bound_model(caplog): + agent = Agent() # no model bound + with caplog.at_level(logging.WARNING): + install_governance(agent, FakeEvaluator(), agent_name="x", session_id="s") + assert any("no bound Model" in r.message for r in caplog.records) + + +# -------------------------------------------------------------------------- +# Factory wiring — the evaluator kwarg drives install_governance +# -------------------------------------------------------------------------- + + +def _factory_without_init(): + """A factory instance that skips __init__ (avoids config/IO).""" + from uipath_pydantic_ai.runtime.factory import UiPathPydanticAIRuntimeFactory + + return UiPathPydanticAIRuntimeFactory.__new__(UiPathPydanticAIRuntimeFactory) + + +async def test_factory_installs_governance_when_evaluator_supplied(monkeypatch): + from uipath_pydantic_ai.runtime import factory as factory_mod + + monkeypatch.setattr( + factory_mod, "UiPathPydanticAIRuntime", lambda **kw: SimpleNamespace(**kw) + ) + agent = Agent(model=TestModel()) + await _factory_without_init()._create_runtime_instance( + agent=agent, runtime_id="r", entrypoint="e", evaluator=FakeEvaluator() + ) + assert isinstance(agent.model, GovernanceModel) + + +async def test_factory_skips_governance_without_evaluator(monkeypatch): + from uipath_pydantic_ai.runtime import factory as factory_mod + + monkeypatch.setattr( + factory_mod, "UiPathPydanticAIRuntime", lambda **kw: SimpleNamespace(**kw) + ) + agent = Agent(model=TestModel()) + original = agent.model + await _factory_without_init()._create_runtime_instance( + agent=agent, runtime_id="r", entrypoint="e" + ) + assert agent.model is original + + +# -------------------------------------------------------------------------- +# on_request → BEFORE_MODEL + AFTER_TOOL +# -------------------------------------------------------------------------- + + +def test_on_request_fires_before_model_with_latest_user_prompt(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + messages = [ + ModelRequest(parts=[UserPromptPart(content="old turn")]), + ModelRequest(parts=[UserPromptPart(content="the question")]), + ] + cb.on_request(messages) + assert _hooks(ev) == ["before_model"] + assert ev.calls[0][1]["model_input"] == "the question" + + +def test_on_request_fires_after_tool_for_tool_return(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + messages = [ + ModelRequest( + parts=[ + ToolReturnPart( + tool_name="lookup", content={"balance": "1000"}, tool_call_id="c1" + ) + ] + ) + ] + cb.on_request(messages) + # both BEFORE_MODEL (tool result is the model's new input) and AFTER_TOOL fire + assert "before_model" in _hooks(ev) + after_tool = [kw for h, kw in ev.calls if h == "after_tool"] + assert after_tool and after_tool[0]["tool_name"] == "lookup" + assert "1000" in after_tool[0]["tool_result"] + + +def test_on_request_caps_text(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + huge = "x" * (_BEFORE_MODEL_TEXT_CAP + 5000) + cb.on_request([ModelRequest(parts=[UserPromptPart(content=huge)])]) + assert len(ev.calls[0][1]["model_input"]) <= _BEFORE_MODEL_TEXT_CAP + + +def test_on_request_empty(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + cb.on_request([]) + assert ev.calls[0][1]["model_input"] == "" + + +# -------------------------------------------------------------------------- +# on_response → AFTER_MODEL + TOOL_CALL +# -------------------------------------------------------------------------- + + +def test_on_response_fires_after_model_and_tool_call(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + response = ModelResponse( + parts=[ + TextPart(content="thinking out loud"), + ToolCallPart(tool_name="transfer", args={"amount": 50}, tool_call_id="c1"), + ] + ) + cb.on_response(response) + assert "after_model" in _hooks(ev) and "tool_call" in _hooks(ev) + after_model = [kw for h, kw in ev.calls if h == "after_model"][0] + assert after_model["model_output"] == "thinking out loud" + tool_call = [kw for h, kw in ev.calls if h == "tool_call"][0] + assert tool_call["tool_name"] == "transfer" + assert tool_call["tool_args"] == {"amount": 50} + assert tool_call["session_state"]["tool_calls"] == 1 + + +def test_on_response_fires_after_tool_for_builtin_tool_result(): + """A provider-executed built-in tool carries its result inline in the + response (BuiltinToolReturnPart); AFTER_TOOL must fire for it — symmetric + with the built-in TOOL_CALL.""" + ev = FakeEvaluator() + cb = _make_callbacks(ev) + response = ModelResponse( + parts=[ + BuiltinToolCallPart( + tool_name="web_search", args={"q": "x"}, tool_call_id="b1" + ), + BuiltinToolReturnPart( + tool_name="web_search", content={"results": "found"}, tool_call_id="b1" + ), + ] + ) + cb.on_response(response) + hooks = _hooks(ev) + assert "tool_call" in hooks and "after_tool" in hooks + after_tool = [kw for h, kw in ev.calls if h == "after_tool"][0] + assert after_tool["tool_name"] == "web_search" + assert after_tool["tool_call_id"] == "b1" + assert "found" in after_tool["tool_result"] + + +def test_on_response_coerces_json_string_args(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + response = ModelResponse( + parts=[ToolCallPart(tool_name="t", args='{"x": 1}', tool_call_id="c1")] + ) + cb.on_response(response) + tool_call = [kw for h, kw in ev.calls if h == "tool_call"][0] + assert tool_call["tool_args"] == {"x": 1} + + +# -------------------------------------------------------------------------- +# GovernanceModel.request brackets a wrapped model +# -------------------------------------------------------------------------- + + +async def test_governance_model_request_brackets_call(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + order: List[str] = [] + + class FakeWrapped: + async def request(self, messages, settings, params): + order.append("MODEL_CALL") + return ModelResponse(parts=[TextPart(content="Your balance is 1000.")]) + + gm = GovernanceModel.__new__(GovernanceModel) # bypass WrapperModel init + gm.wrapped = FakeWrapped() # type: ignore[assignment] # test double, not a real Model + gm._callbacks = cb + messages = [ModelRequest(parts=[UserPromptPart(content="What is my balance?")])] + await gm.request(messages, None, None) # type: ignore[arg-type] + + assert order == ["MODEL_CALL"] + assert _hooks(ev) == ["before_model", "after_model"] + assert ev.calls[0][1]["model_input"] == "What is my balance?" + assert ev.calls[1][1]["model_output"] == "Your balance is 1000." + + +async def test_governance_model_request_stream_block_propagates(): + # A DENY during the after-stream check must abort the run, exactly like the + # non-streaming request() path — it must not be swallowed by the catch-all. + from contextlib import asynccontextmanager + from types import SimpleNamespace + + cb = _make_callbacks(FakeEvaluator(block_on="tool_call")) + denied = ModelResponse( + parts=[ToolCallPart(tool_name="t", args={}, tool_call_id="c1")] + ) + + class FakeWrapped: + @asynccontextmanager + async def request_stream(self, *_a, **_k): + yield SimpleNamespace(get=lambda: denied) + + gm = GovernanceModel.__new__(GovernanceModel) # bypass WrapperModel init + gm.wrapped = FakeWrapped() # type: ignore[assignment] # test double, not a real Model + gm._callbacks = cb + messages = [ModelRequest(parts=[UserPromptPart(content="hi")])] + with pytest.raises(GovernanceBlockException): + async with gm.request_stream(messages, None, None) as stream: # type: ignore[arg-type] + assert stream is not None + + +async def test_governance_model_request_stream_governs_finalized_response(): + """Streaming happy path: BEFORE_MODEL fires up front, and AFTER_MODEL runs + on the finalized response assembled after the caller consumes the stream.""" + from contextlib import asynccontextmanager + from types import SimpleNamespace + + ev = FakeEvaluator() + cb = _make_callbacks(ev) + final = ModelResponse(parts=[TextPart(content="the streamed answer")]) + + class FakeWrapped: + @asynccontextmanager + async def request_stream(self, *_a, **_k): + yield SimpleNamespace(get=lambda: final) + + gm = GovernanceModel.__new__(GovernanceModel) + gm.wrapped = FakeWrapped() # type: ignore[assignment] # test double, not a real Model + gm._callbacks = cb + messages = [ModelRequest(parts=[UserPromptPart(content="the question")])] + + async with gm.request_stream(messages, None, None) as stream: # type: ignore[arg-type] + # BEFORE_MODEL already fired; AFTER_MODEL deferred until the stream + # context exits (final response is assembled). + assert _hooks(ev) == ["before_model"] + assert stream is not None + + assert _hooks(ev) == ["before_model", "after_model"] + assert ev.calls[-1][1]["model_output"] == "the streamed answer" + + +async def test_governance_model_request_stream_governs_even_if_consumer_raises(): + """If the consumer's ``async for`` raises, AFTER_MODEL still fires (finally), + and the consumer's exception propagates.""" + from contextlib import asynccontextmanager + from types import SimpleNamespace + + ev = FakeEvaluator() + cb = _make_callbacks(ev) + final = ModelResponse(parts=[TextPart(content="partial answer")]) + + class FakeWrapped: + @asynccontextmanager + async def request_stream(self, *_a, **_k): + yield SimpleNamespace(get=lambda: final) + + gm = GovernanceModel.__new__(GovernanceModel) + gm.wrapped = FakeWrapped() # type: ignore[assignment] # test double, not a real Model + gm._callbacks = cb + messages = [ModelRequest(parts=[UserPromptPart(content="hi")])] + + with pytest.raises(RuntimeError, match="consumer blew up"): + async with gm.request_stream(messages, None, None): # type: ignore[arg-type] + raise RuntimeError("consumer blew up") + + assert "after_model" in _hooks(ev) # ran despite the consumer error + + +def test_latest_request_skips_trailing_model_response(): + """History can end with a ModelResponse (mid tool round-trip); BEFORE_MODEL + must scan the last ModelRequest, not the response.""" + ev = FakeEvaluator() + cb = _make_callbacks(ev) + messages = [ + ModelRequest(parts=[UserPromptPart(content="the real question")]), + ModelResponse(parts=[TextPart(content="assistant reply")]), + ] + cb.on_request(messages) + assert ev.calls[0][1]["model_input"] == "the real question" + + +def test_tool_call_and_after_tool_pass_tool_call_id(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + cb.on_response( + ModelResponse( + parts=[ToolCallPart(tool_name="t", args={}, tool_call_id="call-42")] + ) + ) + tool_call = [kw for h, kw in ev.calls if h == "tool_call"][0] + assert tool_call["tool_call_id"] == "call-42" + + ev2 = FakeEvaluator() + cb2 = _make_callbacks(ev2) + cb2.on_request( + [ + ModelRequest( + parts=[ + ToolReturnPart(tool_name="t", content="ok", tool_call_id="call-7") + ] + ) + ] + ) + after_tool = [kw for h, kw in ev2.calls if h == "after_tool"][0] + assert after_tool["tool_call_id"] == "call-7" + + +# -------------------------------------------------------------------------- +# helpers + enforcement +# -------------------------------------------------------------------------- + + +def test_coerce_args_variants(): + assert _coerce_args({"a": 1}) == {"a": 1} + assert _coerce_args('{"a": 1}') == {"a": 1} + assert _coerce_args(None) == {} + # malformed JSON is preserved (not dropped) so arg-based policies can scan it + assert _coerce_args("not json") == {"_raw": "not json"} + + +def test_block_in_before_model_propagates(): + cb = _make_callbacks(FakeEvaluator(block_on="before_model")) + with pytest.raises(GovernanceBlockException): + cb.on_request([ModelRequest(parts=[UserPromptPart(content="hi")])]) + + +def test_block_in_tool_call_propagates(): + cb = _make_callbacks(FakeEvaluator(block_on="tool_call")) + with pytest.raises(GovernanceBlockException): + cb.on_response( + ModelResponse( + parts=[ToolCallPart(tool_name="t", args={}, tool_call_id="c1")] + ) + ) + + +def test_non_block_exception_is_swallowed(caplog): + class Boom: + def evaluate_before_model(self, **_: Any) -> None: + raise RuntimeError("evaluator bug") + + cb = GovernanceCallbacks(evaluator=Boom(), agent_name="a", session_id="s") # type: ignore[arg-type] + with caplog.at_level(logging.WARNING): + cb.on_request([ModelRequest(parts=[UserPromptPart(content="x")])]) + assert any("governance check failed" in r.message for r in caplog.records) + + +# -------------------------------------------------------------------------- +# coverage: swallow paths on every hook + extraction/helper edges +# -------------------------------------------------------------------------- + + +class _Boom: + """Evaluator whose every evaluate_* raises a non-block error.""" + + def __getattr__(self, _name: str) -> Any: + def _raise(*_a: Any, **_k: Any) -> None: + raise RuntimeError("evaluator bug") + + return _raise + + +@pytest.mark.parametrize( + "invoke", + [ + lambda cb: cb.on_response( + ModelResponse( + parts=[ + TextPart(content="x"), + ToolCallPart(tool_name="t", args={}, tool_call_id="c"), + ] + ) + ), + lambda cb: cb.on_request( + [ + ModelRequest( + parts=[ToolReturnPart(tool_name="t", content="r", tool_call_id="c")] + ) + ] + ), + ], +) +def test_response_and_tool_paths_swallow_non_block_errors(invoke, caplog): + cb = GovernanceCallbacks(evaluator=_Boom(), agent_name="a", session_id="s") # type: ignore[arg-type] + with caplog.at_level(logging.WARNING): + invoke(cb) # must NOT raise — a governance bug can't break the run + assert any("governance check failed" in r.message for r in caplog.records) + + +def test_on_request_with_only_model_response_scans_empty(): + # history ending with (only) a ModelResponse → no ModelRequest → empty input + ev = FakeEvaluator() + cb = _make_callbacks(ev) + cb.on_request([ModelResponse(parts=[TextPart(content="assistant")])]) + assert ev.calls[0][1]["model_input"] == "" + + +def test_extraction_and_helper_edges(): + from uipath_pydantic_ai.governance.model import ( + _content_text, + _stringify, + _tool_name, + ) + + # _content_text: None / str / list-of-(str|obj) / bare object + assert _content_text(None) == "" + assert _content_text("plain") == "plain" + assert "a" in _content_text(["a", SimpleNamespace(text="b")]) + assert isinstance(_content_text(SimpleNamespace()), str) + # _stringify: str passthrough + circular-ref fallback (no crash) + assert _stringify("hi") == "hi" + circular: dict[str, Any] = {} + circular["self"] = circular + assert isinstance(_stringify(circular), str) + # _tool_name: missing name → warns + "unknown" + assert _tool_name(SimpleNamespace()) == "unknown" diff --git a/packages/uipath-pydantic-ai/uv.lock b/packages/uipath-pydantic-ai/uv.lock index 64691c63..e02e7469 100644 --- a/packages/uipath-pydantic-ai/uv.lock +++ b/packages/uipath-pydantic-ai/uv.lock @@ -3654,16 +3654,16 @@ wheels = [ [[package]] name = "uipath-core" -version = "0.5.18" +version = "0.5.28" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-instrumentation" }, { name = "opentelemetry-sdk" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/14/b1/d4e555a1a2ccf298195a5f2968e538b0cea8592b3e03f43fc12b178d6c69/uipath_core-0.5.18.tar.gz", hash = "sha256:63ebe8bdb818ca30a4bc9ab0ea8171315680691429931282939359ce039401ab", size = 131988, upload-time = "2026-06-08T14:04:49.688Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/f9/8d2f1d98cbebbcf059cf4561f38f34ad4cd58423e4f15cad22bd297a2563/uipath_core-0.5.28.tar.gz", hash = "sha256:942987f6b612c64f93d612ad7b242276ed75f129fdd8f25bc71c24ec8887e388", size = 130578, upload-time = "2026-06-30T14:04:48.841Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/57/de/1a820b33f7bff4565d7649772bc54c88480ac7e70f707097f7da37d05157/uipath_core-0.5.18-py3-none-any.whl", hash = "sha256:351d6faeecfc6a0acea93182e01526f39c04a77e09fa0444be5f4fb580463f5a", size = 54572, upload-time = "2026-06-08T14:04:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/e8/1e/385bb166232a57ebe938cc57ad2717f350bc922bb5d2ce31af84306b7569/uipath_core-0.5.28-py3-none-any.whl", hash = "sha256:b952a46a21710073cbc16d6d5684e9aa645c107f57a636b778cfb94aa81a1e48", size = 54980, upload-time = "2026-06-30T14:04:47.374Z" }, ] [[package]] @@ -3691,6 +3691,7 @@ dependencies = [ { name = "openinference-instrumentation-pydantic-ai" }, { name = "pydantic-ai" }, { name = "uipath" }, + { name = "uipath-core" }, { name = "uipath-runtime" }, ] @@ -3710,6 +3711,7 @@ requires-dist = [ { name = "openinference-instrumentation-pydantic-ai", specifier = ">=0.1.12" }, { name = "pydantic-ai", specifier = ">=1.63.0,<2.0.0" }, { name = "uipath", specifier = ">=2.10.2,<2.11.0" }, + { name = "uipath-core", specifier = ">=0.5.18,<0.7.0" }, { name = "uipath-runtime", specifier = ">=0.11.0,<0.12.0" }, ]