diff --git a/packages/uipath-openai-agents/pyproject.toml b/packages/uipath-openai-agents/pyproject.toml index 329eb4c8..7a88aeae 100644 --- a/packages/uipath-openai-agents/pyproject.toml +++ b/packages/uipath-openai-agents/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "openai-agents>=0.6.5", "openinference-instrumentation-openai-agents>=1.4.0", "uipath>=2.10.0, <2.11.0", + "uipath-core>=0.5.18, <0.7.0", "uipath-runtime>=0.11.0, <0.12.0", ] classifiers = [ diff --git a/packages/uipath-openai-agents/src/uipath_openai_agents/governance/__init__.py b/packages/uipath-openai-agents/src/uipath_openai_agents/governance/__init__.py new file mode 100644 index 00000000..45725a16 --- /dev/null +++ b/packages/uipath-openai-agents/src/uipath_openai_agents/governance/__init__.py @@ -0,0 +1,20 @@ +"""Governance integration for ``uipath-openai-agents``. + +Exposes :func:`install_governance` — installs the OpenAI-Agents-specific inner +hooks (BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, AFTER_TOOL) onto an agent's native +``hooks`` slot. Wired into a run by passing an ``evaluator`` to +:class:`UiPathOpenAIAgentRuntimeFactory`; the factory calls +:func:`install_governance` on the resolved agent. + +Importing this module has no side effects: no adapter is registered, no global +state is mutated. +""" + +from __future__ import annotations + +from .hooks import GovernanceAgentHooks, install_governance + +__all__ = [ + "GovernanceAgentHooks", + "install_governance", +] diff --git a/packages/uipath-openai-agents/src/uipath_openai_agents/governance/hooks.py b/packages/uipath-openai-agents/src/uipath_openai_agents/governance/hooks.py new file mode 100644 index 00000000..3b8ba3ca --- /dev/null +++ b/packages/uipath-openai-agents/src/uipath_openai_agents/governance/hooks.py @@ -0,0 +1,473 @@ +"""OpenAI Agents governance hooks for UiPath. + +Provides governance for OpenAI Agents SDK agents (``agents.Agent`` and any +graph of agents reachable via ``handoffs``). Like the Google ADK integration — +and unlike the LangChain one, which wraps a ``Runnable`` and intercepts +``invoke`` / ``ainvoke`` — OpenAI Agents are executed by ``Runner.run`` / +``Runner.run_streamed``, which hold their **own** reference to the agent +object. Replacing ``runtime.agent`` with a proxy would never reach the +``Runner``. So :func:`install_governance` installs governance directly onto +each agent's native ``hooks`` attribute (an :class:`agents.AgentHooks`), +mutating it in place: + +- ``on_llm_start`` → BEFORE_MODEL +- ``on_llm_end`` → AFTER_MODEL +- ``on_tool_start`` → TOOL_CALL +- ``on_tool_end`` → AFTER_TOOL + +Because the mutation is in place, :func:`install_governance` returns the +**original agent** (hooks installed) rather than a wrapping proxy. +``agents.Agent`` validates that ``hooks`` is an ``AgentHooks`` instance, so +:class:`GovernanceAgentHooks` subclasses it (the ADK integration could +duck-type its callbacks; here the SDK type-checks the slot). + +``agent.hooks`` holds a **single** ``AgentHooks`` (not a list, as in ADK), so +when an agent already carries user hooks we *chain*: governance runs first, +then the previously-installed hooks. + +Chain-level boundaries (BEFORE_AGENT / AFTER_AGENT) are owned by the +governance host, so they are not fired here — that would duplicate every +boundary evaluation. (The SDK's per-agent ``on_start`` / ``on_end`` are +pass-through-only here for that reason.) + +The evaluator protocol comes from ``uipath-core``; this package contributes +only the OpenAI-Agents-specific wiring. Governance is installed by the runtime +factory: passing an ``evaluator`` to +:class:`UiPathOpenAIAgentRuntimeFactory.new_runtime` calls +:func:`install_governance` on the resolved agent. No adapter registry, no +entry point, no import-time side effects. + +Audit emission and enforcement (raising :class:`GovernanceBlockException` on +DENY) are owned by the evaluator itself. Each hook only extracts the relevant +payload and calls the matching ``evaluate_*`` method; +:class:`GovernanceBlockException` is allowed to propagate (it aborts the +``Runner`` run), anything else is logged and swallowed so a governance bug +never breaks an agent run. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any, Dict, List + +from agents import Agent, AgentHooks +from uipath.core.adapters import EvaluatorProtocol +from uipath.core.governance.exceptions import GovernanceBlockException + +logger = logging.getLogger(__name__) + +# Cap on the text blob passed to BEFORE_MODEL / AFTER_MODEL governance +# evaluation. Sized to match the governance host and the other adapters so +# scan-time budgets are consistent across hooks. A long conversation history is +# governed at the LLM layer by scanning only the latest request content, not the +# full prompt — see :func:`_latest_input_text`. +_BEFORE_MODEL_TEXT_CAP = 64000 + +# Hard cap on how many nodes the handoff-graph walk visits, guarding against +# cyclic or pathologically deep agent graphs. Hitting it is logged, not silent. +_MAX_GRAPH_NODES = 1000 + + +def install_governance( + agent: Agent, + evaluator: EvaluatorProtocol, + *, + agent_name: str, + session_id: str, +) -> Agent: + """Install governance hooks on the agent graph (mutated in place). + + Walks every agent reachable through ``handoffs`` and installs a + :class:`GovernanceAgentHooks` on each one's ``hooks`` slot, chaining to any + pre-existing hooks. Returns the original ``agent`` — the ``Runner`` already + holds this reference, so in-place mutation is what wires governance into + execution. Idempotent: an already-governed agent is left untouched. + + Called by :class:`UiPathOpenAIAgentRuntimeFactory` when an ``evaluator`` + is supplied to ``new_runtime``. + """ + agents = _iter_agents(agent) + installed = 0 + for node in agents: + if isinstance(getattr(node, "hooks", None), GovernanceAgentHooks): + continue # idempotent — already governed + prev = getattr(node, "hooks", None) + node.hooks = GovernanceAgentHooks( + evaluator=evaluator, + agent_name=agent_name, + session_id=session_id, + inner=prev, + ) + installed += 1 + if not agents: + logger.warning( + "install_governance found no Agent in %s — deep hooks will not fire", + type(agent).__name__, + ) + else: + logger.debug("Installed governance hooks on %d OpenAI agent(s)", installed) + return agent + + +def _iter_agents(root: Any) -> List[Any]: + """Return every ``Agent`` reachable through the ``handoffs`` graph. + + A node qualifies only if it is a real :class:`agents.Agent`. The SDK + type-checks the ``hooks`` slot, so duck-typing on ``hasattr(node, "hooks")`` + could let non-Agent objects through — we isinstance-check instead. Handoff + targets may be ``Agent`` instances or ``Handoff`` objects that carry the + target on ``.agent``; both are followed so a multi-agent app is governed end + to end. Cycles and pathological depth are bounded by an id-visited set and a + hard cap (``_MAX_GRAPH_NODES``), which logs rather than silently truncating. + + Not walked: agents reachable only as tools (``agent.as_tool()``) or embedded + in input/output guardrail functions — the SDK closes over those behind + opaque callables, so they are governed by their own runtime rather than this + graph walk. + """ + found: List[Any] = [] + seen: set[int] = set() + stack: List[Any] = [root] + capped = False + while stack: + if len(seen) >= _MAX_GRAPH_NODES: + capped = True + break + node = stack.pop() + if node is None or id(node) in seen: + continue + seen.add(id(node)) + if isinstance(node, Agent): + found.append(node) + handoffs = getattr(node, "handoffs", None) + if isinstance(handoffs, (list, tuple)): + for h in handoffs: + # A Handoff wraps its target agent on ``.agent``; a bare Agent + # is itself the target. + stack.append(getattr(h, "agent", h)) + if capped: + logger.warning( + "install_governance stopped walking the agent graph at the %d-node " + "cap; agents beyond it will not be governed", + _MAX_GRAPH_NODES, + ) + return found + + +class GovernanceAgentHooks(AgentHooks): # type: ignore[type-arg] + """Per-agent ``AgentHooks`` bound to one governance evaluator. + + The evaluator owns audit emission and DENY-raising. Each hook extracts the + relevant payload, calls the matching ``evaluate_*`` method, and returns + ``None``. :class:`GovernanceBlockException` is allowed to propagate — it + aborts the ``Runner`` run — anything else is logged and swallowed. + + When the agent already carried an ``AgentHooks`` (``inner``), governance + runs first and then delegates to it, so user hooks keep working. + """ + + def __init__( + self, + evaluator: EvaluatorProtocol, + agent_name: str, + session_id: str, + inner: Any = None, + ) -> None: + self._evaluator = evaluator + self._agent_name = agent_name + self._session_id = session_id + self._inner = inner + # ``trace_id`` is intentionally NOT held here. A single uuid minted at + # install time would be identical for every model/tool call and would + # diverge across handoff nodes (each carries its own hooks). Trace + # correlation is owned by the layer below: OTel-backed sinks read the + # live span on the caller's thread, HTTP consumers resolve the canonical + # id at call time. This matches the LangChain adapter. + self._session_state: Dict[str, Any] = {"tool_calls": 0, "llm_calls": 0} + + def _resolve_agent_name(self, agent: Any) -> str: + """Prefer the live executing agent's name over the install-time name. + + After a handoff the running node may differ from the graph entrypoint + the factory named us with; reporting the actual agent gives governance + accurate attribution. Falls back to the install-time name. + """ + name = getattr(agent, "name", None) + return name if isinstance(name, str) and name else self._agent_name + + # ----- Model hooks ----------------------------------------------------- + + async def on_llm_start( + self, + context: Any, + agent: Any, + system_prompt: Any, + input_items: Any, + ) -> None: + """Evaluate BEFORE_MODEL rules immediately before the LLM call. + + Scans only the **latest input item** — not the full history. The model + still receives the entire history (this hook does not mutate the + request); the evaluator focuses on the new content the agent is about + to respond to. Without this scoping, a violation in an earlier turn + would re-fire on every subsequent model call because that text stays in + the prompt for context. + """ + try: + model_input = _latest_input_text(input_items) + self._evaluator.evaluate_before_model( + model_input=model_input, + agent_name=self._resolve_agent_name(agent), + runtime_id=self._session_id, + ) + # Count only calls that passed governance — a DENY raises above, so + # a blocked call must not inflate the counter. + self._session_state["llm_calls"] = ( + self._session_state.get("llm_calls", 0) + 1 + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 - governance must not break the run + logger.warning("on_llm_start governance check failed (continuing): %s", e) + await _delegate( + self._inner, "on_llm_start", context, agent, system_prompt, input_items + ) + + async def on_llm_end(self, context: Any, agent: Any, response: Any) -> None: + """Evaluate AFTER_MODEL rules immediately after the LLM response.""" + try: + model_output = _model_response_text(response) + self._evaluator.evaluate_after_model( + model_output=model_output, + agent_name=self._resolve_agent_name(agent), + runtime_id=self._session_id, + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("on_llm_end governance check failed (continuing): %s", e) + await _delegate(self._inner, "on_llm_end", context, agent, response) + + # ----- Tool hooks ------------------------------------------------------ + + async def on_tool_start(self, context: Any, agent: Any, tool: Any) -> None: + """Evaluate TOOL_CALL rules immediately before a tool is invoked. + + The OpenAI Agents SDK does not surface tool *arguments* on + ``on_tool_start`` (only the tool itself), so ``tool_args`` is empty + here — argument-shaped rules evaluate at AFTER_TOOL via the result, or + at the model layer where the call's arguments are visible in the output. + """ + try: + tool_name = getattr(tool, "name", None) or "unknown" + self._evaluator.evaluate_tool_call( + tool_name=tool_name, + tool_args={}, + agent_name=self._resolve_agent_name(agent), + runtime_id=self._session_id, + session_state=self._session_state, + ) + # Count only calls that passed governance; the evaluator saw the + # count of prior tool calls, and a DENY raises before this bump. + self._session_state["tool_calls"] = ( + self._session_state.get("tool_calls", 0) + 1 + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("on_tool_start governance check failed (continuing): %s", e) + await _delegate(self._inner, "on_tool_start", context, agent, tool) + + async def on_tool_end( + self, context: Any, agent: Any, tool: Any, result: Any + ) -> None: + """Evaluate AFTER_TOOL rules immediately after a tool is invoked. + + The SDK passes ``tool`` to both ``on_tool_start`` and ``on_tool_end``, + so the name is read directly here — no start→end correlation is needed + (unlike callback frameworks whose end hook omits the tool). + """ + try: + tool_name = getattr(tool, "name", None) or "unknown" + tool_result = "" if result is None else _stringify(result) + self._evaluator.evaluate_after_tool( + tool_name=tool_name, + tool_result=tool_result, + agent_name=self._resolve_agent_name(agent), + runtime_id=self._session_id, + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("on_tool_end governance check failed (continuing): %s", e) + await _delegate(self._inner, "on_tool_end", context, agent, tool, result) + + # ----- Pass-through boundaries ---------------------------------------- + # BEFORE_AGENT / AFTER_AGENT are owned by the governance host; here we only + # forward to any wrapped user hooks so their behaviour is preserved. + + async def on_start(self, context: Any, agent: Any) -> None: + await _delegate(self._inner, "on_start", context, agent) + + async def on_end(self, context: Any, agent: Any, output: Any) -> None: + await _delegate(self._inner, "on_end", context, agent, output) + + async def on_handoff(self, context: Any, agent: Any, source: Any) -> None: + await _delegate(self._inner, "on_handoff", context, agent, source) + + +# -------------------------------------------------------------------------- +# Delegation + text extraction (module-level, sync, duck-typed) +# +# Extraction is duck-typed on purpose: the OpenAI Agents SDK's run-item / +# response shapes are not stable public models, so we read attributes +# defensively rather than isinstance-checking SDK types that may move. +# -------------------------------------------------------------------------- + + +async def _delegate(inner: Any, method: str, *args: Any) -> None: + """Call ``inner.(*args)`` if a wrapped hooks object provides it. + + User hooks are best-effort: a failure in a chained hook is logged and + swallowed (it must not abort the run on governance's behalf), except a + :class:`GovernanceBlockException`, which always propagates. + """ + if inner is None: + return + fn = getattr(inner, method, None) + if fn is None: + return + try: + await fn(*args) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("chained user hook %s failed (continuing): %s", method, e) + + +def _latest_input_text(input_items: Any) -> str: + """Extract text from the most-recent item in an LLM-call input list. + + ``input_items`` is the full ``list`` of response input items sent to the + model. We take the last entry — the new user message, or the tool + ``function_call_output`` being fed back — and pull its text via + :func:`_item_text`. Returns ``""`` when there is nothing extractable. + """ + if not input_items: + return "" + if isinstance(input_items, (list, tuple)): + return _item_text(input_items[-1]) + return _item_text(input_items) + + +def _item_text(item: Any) -> str: + """Return governance-relevant text from one response input/output item. + + Tolerant of both dict-shaped items (``{"role": ..., "content": ...}``, + ``{"type": "function_call", "name": ..., "arguments": ...}``) and + object-shaped items (``.content`` / ``.text`` / ``.name`` / ``.arguments``). + Content may itself be a string or a list of parts (each a dict with + ``text`` / ``input_text`` / ``output_text`` or an object with ``.text``). + Capped at :data:`_BEFORE_MODEL_TEXT_CAP`. + """ + if item is None: + return "" + if isinstance(item, str): + return item[:_BEFORE_MODEL_TEXT_CAP] + + pieces: List[str] = [] + + # A function/tool call carries its intent in name + arguments. Treat an + # item as a call only when it is explicitly typed ``function_call`` or it + # actually carries arguments — a bare ``name`` on some other item type (a + # named message part) is not a tool call. + name = _get(item, "name") + arguments = _get(item, "arguments") + if name and (_get(item, "type") == "function_call" or arguments is not None): + if isinstance(name, str): + pieces.append(name) + if arguments is not None: + pieces.append(_stringify(arguments)) + + content = _get(item, "content") + if content is not None: + pieces.append(_content_text(content)) + + # Tool result fed back to the model. + output = _get(item, "output") + if output is not None and not pieces: + pieces.append(_stringify(output)) + + text = "\n".join(p for p in pieces if p) + return text[:_BEFORE_MODEL_TEXT_CAP] + + +def _content_text(content: Any) -> str: + """Return text from a message ``content`` (string or list of parts).""" + if isinstance(content, str): + return content + if isinstance(content, (list, tuple)): + out: List[str] = [] + for part in content: + if isinstance(part, str): + out.append(part) + continue + t = ( + _get(part, "text") + or _get(part, "input_text") + or _get(part, "output_text") + ) + if isinstance(t, str) and t: + out.append(t) + return "\n".join(out) + t = _get(content, "text") + return t if isinstance(t, str) else "" + + +def _model_response_text(response: Any) -> str: + """Extract assistant text + tool-call intent from a ``ModelResponse``. + + ``response.output`` is the ``list`` of output items the model produced + (assistant messages and function/tool calls). Each is run through + :func:`_item_text` so both visible replies and tool-call arguments are + governed. Capped at :data:`_BEFORE_MODEL_TEXT_CAP`. + """ + if response is None: + return "" + output = _get(response, "output") + if output is None: + # Some shapes hand back text directly. + return _item_text(response) + items = output if isinstance(output, (list, tuple)) else [output] + collected: List[str] = [] + remaining = _BEFORE_MODEL_TEXT_CAP + for item in items: + if remaining <= 0: + break + piece = _item_text(item) + if piece: + collected.append(piece) + remaining -= len(piece) + 1 + return "\n".join(collected)[:_BEFORE_MODEL_TEXT_CAP] + + +def _get(obj: Any, attr: str) -> Any: + """Read ``attr`` from a dict key or object attribute, else ``None``.""" + if isinstance(obj, dict): + return obj.get(attr) + return getattr(obj, attr, None) + + +def _stringify(value: Any, cap: int = _BEFORE_MODEL_TEXT_CAP) -> str: + """Render a dict / object payload as compact, scannable text, capped. + + The result is bounded by ``cap`` so an oversized tool result or argument + blob can't hand a multi-megabyte string to the evaluator. + """ + if isinstance(value, str): + return value[:cap] + try: + return json.dumps(value, default=str, ensure_ascii=False)[:cap] + except (TypeError, ValueError): + return str(value)[:cap] diff --git a/packages/uipath-openai-agents/src/uipath_openai_agents/runtime/factory.py b/packages/uipath-openai-agents/src/uipath_openai_agents/runtime/factory.py index 226df03d..bbe62473 100644 --- a/packages/uipath-openai-agents/src/uipath_openai_agents/runtime/factory.py +++ b/packages/uipath-openai-agents/src/uipath_openai_agents/runtime/factory.py @@ -5,6 +5,7 @@ from agents import Agent from openinference.instrumentation.openai_agents import OpenAIAgentsInstrumentor +from uipath.core.adapters import EvaluatorProtocol from uipath.runtime import ( UiPathRuntimeContext, UiPathRuntimeFactorySettings, @@ -13,6 +14,7 @@ ) from uipath.runtime.errors import UiPathErrorCategory +from uipath_openai_agents.governance import install_governance from uipath_openai_agents.runtime.agent import OpenAiAgentLoader from uipath_openai_agents.runtime.config import OpenAiAgentsConfig from uipath_openai_agents.runtime.errors import ( @@ -201,6 +203,7 @@ async def _create_runtime_instance( agent: Agent, runtime_id: str, entrypoint: str, + evaluator: EvaluatorProtocol | None = None, ) -> UiPathRuntimeProtocol: """ Create a runtime instance from an agent. @@ -209,10 +212,20 @@ async def _create_runtime_instance( agent: The OpenAI Agent runtime_id: Unique identifier for the runtime instance entrypoint: Agent entrypoint name + evaluator: When supplied, governance hooks are installed on the + agent graph in place via :func:`install_governance`. Returns: Configured runtime instance """ + if evaluator is not None: + install_governance( + agent, + evaluator, + agent_name=entrypoint, + session_id=runtime_id, + ) + return UiPathOpenAIAgentRuntime( agent=agent, runtime_id=runtime_id, @@ -228,7 +241,9 @@ async def new_runtime( Args: entrypoint: Agent name from openai_agents.json runtime_id: Unique identifier for the runtime instance - **kwargs: Additional keyword arguments (unused) + **kwargs: Forwarded factory kwargs. Recognized: ``evaluator`` + (``EvaluatorProtocol``) — when present, governance hooks are + installed on the agent via :func:`install_governance`. Returns: Configured runtime instance with agent @@ -239,6 +254,7 @@ async def new_runtime( agent=agent, runtime_id=runtime_id, entrypoint=entrypoint, + evaluator=kwargs.get("evaluator"), ) async def dispose(self) -> None: diff --git a/packages/uipath-openai-agents/tests/governance/__init__.py b/packages/uipath-openai-agents/tests/governance/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/packages/uipath-openai-agents/tests/governance/test_hooks.py b/packages/uipath-openai-agents/tests/governance/test_hooks.py new file mode 100644 index 00000000..5be3d051 --- /dev/null +++ b/packages/uipath-openai-agents/tests/governance/test_hooks.py @@ -0,0 +1,520 @@ +"""Unit tests for the OpenAI Agents governance hooks. + +``can_handle`` is tested against a real ``agents.Agent``; everything else +duck-types the OpenAI Agents payloads (response input/output items, tools) +with lightweight fakes so the real code paths are exercised without a live +LLM. ``GovernanceAgentHooks`` subclasses ``agents.AgentHooks`` (the SDK +type-checks ``agent.hooks``), so importing the adapter requires +``openai-agents`` either way. + +The package is configured with ``asyncio_mode = "auto"``, so ``async def`` +tests run without an explicit marker. +""" + +from __future__ import annotations + +import logging +from contextlib import contextmanager +from types import SimpleNamespace +from typing import Any, Iterator, List + +import pytest +from agents import Agent +from uipath.core.governance.exceptions import GovernanceBlockException + +from uipath_openai_agents.governance.hooks import ( + _BEFORE_MODEL_TEXT_CAP, + GovernanceAgentHooks, + _content_text, + _item_text, + _latest_input_text, + _model_response_text, + _stringify, + install_governance, +) + +# -------------------------------------------------------------------------- +# Fakes +# -------------------------------------------------------------------------- + + +class FakeEvaluator: + """Records evaluate_* calls; optionally BLOCKs on a named hook.""" + + def __init__(self, block_on: str | None = None) -> None: + self.block_on = block_on + self.calls: List[tuple[str, dict[str, Any]]] = [] + + def _record(self, hook: str, **kwargs: Any) -> None: + self.calls.append((hook, kwargs)) + if self.block_on == hook: + raise GovernanceBlockException("blocked") # type: ignore[call-arg] + + def evaluate_before_agent(self, *args: Any, **kwargs: Any) -> Any: + self._record("before_agent", **kwargs) + + def evaluate_after_agent(self, *args: Any, **kwargs: Any) -> Any: + self._record("after_agent", **kwargs) + + def evaluate_before_model(self, *args: Any, **kwargs: Any) -> Any: + self._record("before_model", **kwargs) + + def evaluate_after_model(self, *args: Any, **kwargs: Any) -> Any: + self._record("after_model", **kwargs) + + def evaluate_tool_call(self, *args: Any, **kwargs: Any) -> Any: + self._record("tool_call", **kwargs) + + def evaluate_after_tool(self, *args: Any, **kwargs: Any) -> Any: + self._record("after_tool", **kwargs) + + +class FakeAgent(Agent): # type: ignore[type-arg] + """A real ``agents.Agent`` — the graph walk isinstance-checks ``Agent``, so + a bare duck-typed stand-in would be (correctly) skipped by + ``install_governance``. Subclassing keeps the construction lightweight while + remaining a genuine ``Agent`` instance.""" + + def __init__(self, name: str = "agent", handoffs: List[Any] | None = None): + super().__init__(name=name, handoffs=handoffs or []) + + +class FakeTool: + def __init__(self, name: str): + self.name = name + + +class RecordingHooks: + """A user-supplied AgentHooks-like object that records delegated calls.""" + + def __init__(self) -> None: + self.seen: List[str] = [] + + async def on_llm_start(self, *_a: Any) -> None: + self.seen.append("on_llm_start") + + async def on_llm_end(self, *_a: Any) -> None: + self.seen.append("on_llm_end") + + async def on_tool_start(self, *_a: Any) -> None: + self.seen.append("on_tool_start") + + async def on_tool_end(self, *_a: Any) -> None: + self.seen.append("on_tool_end") + + +def _msg(text: str, role: str = "user") -> dict[str, Any]: + """A response input item carrying plain string content.""" + return {"role": role, "content": text} + + +def _msg_parts(*texts: str, role: str = "user") -> dict[str, Any]: + """A response input item carrying a list of text parts.""" + return {"role": role, "content": [{"type": "input_text", "text": t} for t in texts]} + + +def _function_call(name: str, arguments: str) -> dict[str, Any]: + return {"type": "function_call", "name": name, "arguments": arguments} + + +def _output_message(*texts: str) -> SimpleNamespace: + """A ModelResponse output message item with text parts.""" + parts = [SimpleNamespace(text=t) for t in texts] + return SimpleNamespace(role="assistant", content=parts) + + +def _make_hooks(evaluator: FakeEvaluator, inner: Any = None) -> GovernanceAgentHooks: + return GovernanceAgentHooks( + evaluator=evaluator, agent_name="agent-1", session_id="sess-1", inner=inner + ) + + +# -------------------------------------------------------------------------- +# install_governance +# -------------------------------------------------------------------------- + + +def test_install_governance_installs_on_all_agents_in_handoff_graph(): + leaf_a = FakeAgent("a") + leaf_b = FakeAgent("b") + root = FakeAgent("root", handoffs=[leaf_a, leaf_b]) + + returned = install_governance(root, FakeEvaluator(), agent_name="x", session_id="s") + + assert returned is root # original returned, not a proxy + for node in (root, leaf_a, leaf_b): + assert isinstance(node.hooks, GovernanceAgentHooks) + + +def test_install_governance_follows_handoff_wrapper_objects(): + target = FakeAgent("target") + handoff = SimpleNamespace(agent=target) # Handoff-shaped wrapper + root = FakeAgent("root", handoffs=[handoff]) + install_governance(root, FakeEvaluator(), agent_name="x", session_id="s") + assert isinstance(target.hooks, GovernanceAgentHooks) + + +def test_install_governance_is_idempotent(): + agent = FakeAgent() + ev = FakeEvaluator() + install_governance(agent, ev, agent_name="x", session_id="s") + first = agent.hooks + install_governance(agent, ev, agent_name="x", session_id="s") + assert agent.hooks is first # not re-wrapped + + +def test_install_governance_chains_existing_hooks(): + agent = FakeAgent() + user_hooks = RecordingHooks() + agent.hooks = user_hooks # type: ignore[assignment] # test double, not a real AgentHooks + install_governance(agent, FakeEvaluator(), agent_name="x", session_id="s") + assert isinstance(agent.hooks, GovernanceAgentHooks) + assert agent.hooks._inner is user_hooks + + +_HOOKS_LOGGER = "uipath_openai_agents.governance.hooks" + + +@contextmanager +def _capture_hooks_logs(caplog: Any) -> Iterator[None]: + """Attach caplog's handler straight to the hooks logger. + + Some sibling suites configure an ancestor ``uipath*`` logger with + ``propagate=False``, which silently breaks caplog's default root-handler + capture. Attaching directly to the target logger is propagation-independent. + """ + logger = logging.getLogger(_HOOKS_LOGGER) + logger.addHandler(caplog.handler) + prev = logger.level + logger.setLevel(logging.WARNING) + try: + yield + finally: + logger.removeHandler(caplog.handler) + logger.setLevel(prev) + + +def test_install_governance_warns_when_no_agent(caplog): + with _capture_hooks_logs(caplog): + install_governance(object(), FakeEvaluator(), agent_name="x", session_id="s") # type: ignore[arg-type] + assert any("no Agent" in r.message for r in caplog.records) + + +# -------------------------------------------------------------------------- +# on_llm_start (BEFORE_MODEL) +# -------------------------------------------------------------------------- + + +async def test_on_llm_start_scopes_to_latest_item(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + items = [_msg("OLD turn — secret leak here"), _msg("the new question")] + await cb.on_llm_start(None, FakeAgent(), "system", items) + hook, kwargs = ev.calls[-1] + assert hook == "before_model" + assert kwargs["model_input"] == "the new question" + assert "OLD turn" not in kwargs["model_input"] + + +async def test_on_llm_start_extracts_list_parts(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_llm_start(None, FakeAgent(), None, [_msg_parts("part one", "part two")]) + out = ev.calls[-1][1]["model_input"] + assert "part one" in out and "part two" in out + + +async def test_on_llm_start_extracts_function_call_when_latest(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + items = [_function_call("lookup", '{"balance": "1000"}')] + await cb.on_llm_start(None, FakeAgent(), None, items) + out = ev.calls[-1][1]["model_input"] + assert "lookup" in out and "1000" in out + + +async def test_on_llm_start_caps_text(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + huge = "x" * (_BEFORE_MODEL_TEXT_CAP + 5000) + await cb.on_llm_start(None, FakeAgent(), None, [_msg(huge)]) + assert len(ev.calls[-1][1]["model_input"]) <= _BEFORE_MODEL_TEXT_CAP + + +async def test_on_llm_start_empty_input(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_llm_start(None, FakeAgent(), None, []) + assert ev.calls[-1][1]["model_input"] == "" + + +# -------------------------------------------------------------------------- +# on_llm_end (AFTER_MODEL) +# -------------------------------------------------------------------------- + + +async def test_on_llm_end_extracts_text_and_function_call(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + response = SimpleNamespace( + output=[ + _output_message("thinking"), + SimpleNamespace( + type="function_call", + name="submit_answer", + arguments='{"content": "final reply"}', + ), + ] + ) + await cb.on_llm_end(None, FakeAgent(), response) + out = ev.calls[-1][1]["model_output"] + assert "thinking" in out and "submit_answer" in out and "final reply" in out + + +async def test_on_llm_end_empty_response(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_llm_end(None, FakeAgent(), SimpleNamespace(output=[])) + assert ev.calls[-1][1]["model_output"] == "" + + +# -------------------------------------------------------------------------- +# tools +# -------------------------------------------------------------------------- + + +async def test_on_tool_start_passes_name_and_session_state(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_tool_start(None, FakeAgent(), FakeTool("transfer")) + hook, kwargs = ev.calls[-1] + assert hook == "tool_call" + assert kwargs["tool_name"] == "transfer" + assert kwargs["tool_args"] == {} # OpenAI SDK does not surface args here + assert kwargs["session_state"]["tool_calls"] == 1 + + +async def test_on_tool_end_stringifies_dict_result(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_tool_end(None, FakeAgent(), FakeTool("lookup"), {"x": 1}) + out = ev.calls[-1][1]["tool_result"] + assert "x" in out and "1" in out + + +async def test_on_tool_end_none_result(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_tool_end(None, FakeAgent(), FakeTool("noop"), None) + assert ev.calls[-1][1]["tool_result"] == "" + + +async def test_reports_live_agent_name_not_install_time_name(): + """After a handoff the executing agent differs from the graph entrypoint + the factory named us with; governance should attribute the live agent.""" + ev = FakeEvaluator() + cb = _make_hooks(ev) # install-time name is "agent-1" + await cb.on_llm_start(None, FakeAgent("billing_specialist"), None, [_msg("hi")]) + assert ev.calls[-1][1]["agent_name"] == "billing_specialist" + + +async def test_blocked_call_does_not_increment_counter(): + """A DENY raises before the counter bump, so the count is not inflated.""" + ev = FakeEvaluator(block_on="tool_call") + cb = _make_hooks(ev) + with pytest.raises(GovernanceBlockException): + await cb.on_tool_start(None, FakeAgent(), FakeTool("t")) + # evaluator saw the pre-call count (0) and the block prevented the bump + assert ev.calls[-1][1]["session_state"]["tool_calls"] == 0 + assert cb._session_state["tool_calls"] == 0 + + +# -------------------------------------------------------------------------- +# chaining to user hooks +# -------------------------------------------------------------------------- + + +async def test_governance_delegates_to_inner_hooks(): + inner = RecordingHooks() + cb = _make_hooks(FakeEvaluator(), inner=inner) + await cb.on_llm_start(None, FakeAgent(), None, [_msg("hi")]) + await cb.on_llm_end(None, FakeAgent(), SimpleNamespace(output=[])) + await cb.on_tool_start(None, FakeAgent(), FakeTool("t")) + await cb.on_tool_end(None, FakeAgent(), FakeTool("t"), {}) + assert inner.seen == ["on_llm_start", "on_llm_end", "on_tool_start", "on_tool_end"] + + +# -------------------------------------------------------------------------- +# enforcement semantics +# -------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "hook,invoke", + [ + ( + "before_model", + lambda cb: cb.on_llm_start(None, FakeAgent(), None, [_msg("hi")]), + ), + ( + "after_model", + lambda cb: cb.on_llm_end(None, FakeAgent(), SimpleNamespace(output=[])), + ), + ("tool_call", lambda cb: cb.on_tool_start(None, FakeAgent(), FakeTool("t"))), + ( + "after_tool", + lambda cb: cb.on_tool_end(None, FakeAgent(), FakeTool("t"), {"r": 1}), + ), + ], +) +async def test_block_exception_propagates(hook, invoke): + cb = _make_hooks(FakeEvaluator(block_on=hook)) + with pytest.raises(GovernanceBlockException): + await invoke(cb) + + +async def test_non_block_exception_is_swallowed(caplog): + class Boom: + def evaluate_before_model(self, **_: Any) -> None: + raise RuntimeError("evaluator bug") + + cb = GovernanceAgentHooks( + evaluator=Boom(), # type: ignore[arg-type] + agent_name="a", + session_id="s", + ) + with _capture_hooks_logs(caplog): + # must NOT raise — a governance bug can't break the agent run + await cb.on_llm_start(None, FakeAgent(), None, [_msg("x")]) + assert any("governance check failed" in r.message for r in caplog.records) + + +async def test_hooks_return_none(): + # hooks are pass-through (return None) — they never short-circuit the run. + # (the inline type: ignores below silence mypy's func-returns-value on the + # None-returning hooks; the runtime assert documents the contract.) + cb = _make_hooks(FakeEvaluator()) + assert await cb.on_llm_start(None, FakeAgent(), None, []) is None # type: ignore[func-returns-value] + assert await cb.on_llm_end(None, FakeAgent(), SimpleNamespace(output=[])) is None # type: ignore[func-returns-value] + assert await cb.on_tool_start(None, FakeAgent(), FakeTool("t")) is None # type: ignore[func-returns-value] + assert await cb.on_tool_end(None, FakeAgent(), FakeTool("t"), {}) is None # type: ignore[func-returns-value] + + +# -------------------------------------------------------------------------- +# coverage: swallow paths on every hook, boundary delegation, extraction edges +# -------------------------------------------------------------------------- + + +class _Boom: + """Evaluator whose every evaluate_* raises a non-block error.""" + + def __getattr__(self, _name: str) -> Any: + def _raise(*_a: Any, **_k: Any) -> None: + raise RuntimeError("evaluator bug") + + return _raise + + +@pytest.mark.parametrize( + "invoke", + [ + lambda cb: cb.on_llm_end(None, FakeAgent(), SimpleNamespace(output=[])), + lambda cb: cb.on_tool_start(None, FakeAgent(), FakeTool("t")), + lambda cb: cb.on_tool_end(None, FakeAgent(), FakeTool("t"), {"r": 1}), + ], +) +async def test_model_and_tool_hooks_swallow_non_block_errors(invoke, caplog): + cb = GovernanceAgentHooks(evaluator=_Boom(), agent_name="a", session_id="s") # type: ignore[arg-type] + with _capture_hooks_logs(caplog): + await invoke(cb) # must NOT raise — a governance bug can't break the run + assert any("governance check failed" in r.message for r in caplog.records) + + +class _InnerBoundary: + def __init__(self) -> None: + self.seen: List[str] = [] + + async def on_start(self, *_a: Any) -> None: + self.seen.append("on_start") + + async def on_end(self, *_a: Any) -> None: + self.seen.append("on_end") + + async def on_handoff(self, *_a: Any) -> None: + self.seen.append("on_handoff") + + +async def test_boundary_hooks_delegate_to_inner(): + inner = _InnerBoundary() + cb = _make_hooks(FakeEvaluator(), inner=inner) + await cb.on_start(None, FakeAgent()) + await cb.on_end(None, FakeAgent(), "out") + await cb.on_handoff(None, FakeAgent(), FakeAgent()) + assert inner.seen == ["on_start", "on_end", "on_handoff"] + + +async def test_delegate_swallows_inner_hook_error(caplog): + class _BadInner: + async def on_llm_start(self, *_a: Any) -> None: + raise RuntimeError("inner boom") + + cb = _make_hooks(FakeEvaluator(), inner=_BadInner()) + with _capture_hooks_logs(caplog): + await cb.on_llm_start(None, FakeAgent(), None, [_msg("x")]) # must not raise + assert any("chained user hook" in r.message for r in caplog.records) + + +def test_extraction_edges(): + # _stringify: str passthrough; circular ref → str() fallback (not a crash) + assert _stringify("hi") == "hi" + circular: dict[str, Any] = {} + circular["self"] = circular + assert isinstance(_stringify(circular), str) + # _item_text: tool-result output-only item + assert "42" in _item_text({"output": {"balance": 42}}) + # _content_text: object exposing .text, and a bare-string part in a list + assert _content_text(SimpleNamespace(text="hello")) == "hello" + assert "raw" in _content_text(["raw", {"text": "block"}]) + # _model_response_text: response with no .output → falls back to item text + assert _model_response_text(SimpleNamespace(content="direct")) == "direct" + # _latest_input_text: single (non-list) item + assert _latest_input_text(_msg("solo")) == "solo" + + +# -------------------------------------------------------------------------- +# Factory wiring — the evaluator kwarg drives install_governance +# -------------------------------------------------------------------------- + + +def _factory_without_init(): + """A factory instance that skips __init__ (avoids SDK instrumentation).""" + from uipath_openai_agents.runtime.factory import UiPathOpenAIAgentRuntimeFactory + + return UiPathOpenAIAgentRuntimeFactory.__new__(UiPathOpenAIAgentRuntimeFactory) + + +async def test_factory_installs_governance_when_evaluator_supplied(monkeypatch): + from uipath_openai_agents.runtime import factory as factory_mod + + # Stub the runtime so we don't introspect a real Agent. + monkeypatch.setattr( + factory_mod, "UiPathOpenAIAgentRuntime", lambda **kw: SimpleNamespace(**kw) + ) + agent = FakeAgent() + await _factory_without_init()._create_runtime_instance( + agent=agent, runtime_id="r", entrypoint="e", evaluator=FakeEvaluator() + ) + assert isinstance(agent.hooks, GovernanceAgentHooks) + + +async def test_factory_skips_governance_without_evaluator(monkeypatch): + from uipath_openai_agents.runtime import factory as factory_mod + + monkeypatch.setattr( + factory_mod, "UiPathOpenAIAgentRuntime", lambda **kw: SimpleNamespace(**kw) + ) + agent = FakeAgent() + await _factory_without_init()._create_runtime_instance( + agent=agent, runtime_id="r", entrypoint="e" + ) + assert agent.hooks is None diff --git a/packages/uipath-openai-agents/uv.lock b/packages/uipath-openai-agents/uv.lock index ff2970af..192c3081 100644 --- a/packages/uipath-openai-agents/uv.lock +++ b/packages/uipath-openai-agents/uv.lock @@ -2334,16 +2334,16 @@ wheels = [ [[package]] name = "uipath-core" -version = "0.5.18" +version = "0.5.28" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-instrumentation" }, { name = "opentelemetry-sdk" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/14/b1/d4e555a1a2ccf298195a5f2968e538b0cea8592b3e03f43fc12b178d6c69/uipath_core-0.5.18.tar.gz", hash = "sha256:63ebe8bdb818ca30a4bc9ab0ea8171315680691429931282939359ce039401ab", size = 131988, upload-time = "2026-06-08T14:04:49.688Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/f9/8d2f1d98cbebbcf059cf4561f38f34ad4cd58423e4f15cad22bd297a2563/uipath_core-0.5.28.tar.gz", hash = "sha256:942987f6b612c64f93d612ad7b242276ed75f129fdd8f25bc71c24ec8887e388", size = 130578, upload-time = "2026-06-30T14:04:48.841Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/57/de/1a820b33f7bff4565d7649772bc54c88480ac7e70f707097f7da37d05157/uipath_core-0.5.18-py3-none-any.whl", hash = "sha256:351d6faeecfc6a0acea93182e01526f39c04a77e09fa0444be5f4fb580463f5a", size = 54572, upload-time = "2026-06-08T14:04:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/e8/1e/385bb166232a57ebe938cc57ad2717f350bc922bb5d2ce31af84306b7569/uipath_core-0.5.28-py3-none-any.whl", hash = "sha256:b952a46a21710073cbc16d6d5684e9aa645c107f57a636b778cfb94aa81a1e48", size = 54980, upload-time = "2026-06-30T14:04:47.374Z" }, ] [[package]] @@ -2356,6 +2356,7 @@ dependencies = [ { name = "openai-agents" }, { name = "openinference-instrumentation-openai-agents" }, { name = "uipath" }, + { name = "uipath-core" }, { name = "uipath-runtime" }, ] @@ -2377,6 +2378,7 @@ requires-dist = [ { name = "openai-agents", specifier = ">=0.6.5" }, { name = "openinference-instrumentation-openai-agents", specifier = ">=1.4.0" }, { name = "uipath", specifier = ">=2.10.0,<2.11.0" }, + { name = "uipath-core", specifier = ">=0.5.18,<0.7.0" }, { name = "uipath-runtime", specifier = ">=0.11.0,<0.12.0" }, ]