diff --git a/packages/uipath-google-adk/pyproject.toml b/packages/uipath-google-adk/pyproject.toml index 73a696d6..f0d14711 100644 --- a/packages/uipath-google-adk/pyproject.toml +++ b/packages/uipath-google-adk/pyproject.toml @@ -8,6 +8,7 @@ dependencies = [ "google-adk>=1.25.1", "openinference-instrumentation-google-adk>=0.1.9", "uipath>=2.10.0, <2.11.0", + "uipath-core>=0.5.18, <0.7.0", "uipath-runtime>=0.11.0, <0.12.0", ] classifiers = [ diff --git a/packages/uipath-google-adk/src/uipath_google_adk/governance/__init__.py b/packages/uipath-google-adk/src/uipath_google_adk/governance/__init__.py new file mode 100644 index 00000000..fefee0ce --- /dev/null +++ b/packages/uipath-google-adk/src/uipath_google_adk/governance/__init__.py @@ -0,0 +1,20 @@ +"""Governance integration for ``uipath-google-adk``. + +Exposes :func:`install_governance` — installs governance callbacks +(BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, AFTER_TOOL) on every ``LlmAgent`` in an +ADK agent tree's native ``*_callback`` slots. Wired into a run by passing an +``evaluator`` to :class:`UiPathGoogleADKRuntimeFactory`; the factory calls +:func:`install_governance` on the resolved agent. + +Importing this module has no side effects: no adapter is registered, no global +state is mutated. +""" + +from __future__ import annotations + +from .callbacks import GovernanceCallbacks, install_governance + +__all__ = [ + "GovernanceCallbacks", + "install_governance", +] \ No newline at end of file diff --git a/packages/uipath-google-adk/src/uipath_google_adk/governance/callbacks.py b/packages/uipath-google-adk/src/uipath_google_adk/governance/callbacks.py new file mode 100644 index 00000000..5c1aaef7 --- /dev/null +++ b/packages/uipath-google-adk/src/uipath_google_adk/governance/callbacks.py @@ -0,0 +1,492 @@ +"""Google ADK governance callbacks for UiPath. + +Provides governance for Google ADK agents (``google.adk.agents.LlmAgent`` +and any ``BaseAgent`` tree containing them). Unlike the LangChain integration +— which wraps a ``Runnable`` and intercepts ``invoke`` / ``ainvoke`` — ADK +agents are executed by a ``Runner`` that holds its **own** reference to +the agent object. Replacing ``runtime.agent`` with a proxy would never +reach the ``Runner``. So :func:`install_governance` installs governance +directly onto each ``LlmAgent``'s native callback attributes, mutating them +in place: + +- ``before_model_callback`` → BEFORE_MODEL +- ``after_model_callback`` → AFTER_MODEL +- ``before_tool_callback`` → TOOL_CALL +- ``after_tool_callback`` → AFTER_TOOL + +Because the mutation is in place, :func:`install_governance` returns the +**original agent** (hooks installed) rather than a wrapping proxy. +Returning a proxy here would also break ADK's own ``isinstance(agent, +LlmAgent)`` checks in output-schema / graph resolution, since ``LlmAgent`` +is a Pydantic model. + +Chain-level boundaries (BEFORE_AGENT / AFTER_AGENT) are intentionally +*not* fired from here — they are owned by the governance host. Firing them +here too would duplicate every boundary evaluation. + +The evaluator protocol comes from ``uipath-core``; this package contributes +only the ADK-specific wiring. Governance is installed by the runtime +factory: passing an ``evaluator`` to ``new_runtime`` calls +:func:`install_governance` on the resolved agent. No adapter registry, no +entry point, no import-time side effects. + +Audit emission and enforcement (raising :class:`GovernanceBlockException` +on DENY) are owned by the evaluator itself. Each callback only extracts +the relevant payload and calls the matching ``evaluate_*`` method; +:class:`GovernanceBlockException` is allowed to propagate (it aborts the +``Runner`` run), anything else is logged and swallowed so a governance +bug never breaks an agent run. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any, Dict, List + +from uipath.core.adapters import EvaluatorProtocol +from uipath.core.governance.exceptions import GovernanceBlockException + +logger = logging.getLogger(__name__) + +# Cap on the text blob passed to BEFORE_MODEL / AFTER_MODEL governance +# evaluation. Sized to match the governance host and the other adapters so +# scan-time budgets are consistent across hooks. A long conversation +# history is governed at the LLM layer by scanning only the latest +# request content, not the full prompt — see +# :meth:`GovernanceCallbacks._latest_request_text`. +_BEFORE_MODEL_TEXT_CAP = 64000 + +# Hard cap on how many nodes the agent-tree walk visits, guarding against +# cyclic or pathologically deep trees. Hitting it is logged, not silent. +_MAX_GRAPH_NODES = 1000 + +# Native LlmAgent callback attribute names this adapter manages. +_MODEL_BEFORE = "before_model_callback" +_MODEL_AFTER = "after_model_callback" +_TOOL_BEFORE = "before_tool_callback" +_TOOL_AFTER = "after_tool_callback" + + +def _is_governance_callable(fn: Any) -> bool: + """True if ``fn`` is a bound method of a :class:`GovernanceCallbacks`.""" + return isinstance(getattr(fn, "__self__", None), GovernanceCallbacks) + + +def _find_governance_callbacks(agent: Any) -> "GovernanceCallbacks | None": + """Return the :class:`GovernanceCallbacks` already installed on ``agent``. + + Scans the four callback slots for a governance-owned callable and returns + the instance backing it, else ``None``. Used to detect a cached agent that + was governed by a previous ``new_runtime`` so its metadata can be refreshed + rather than left stale. + """ + for attr in (_MODEL_BEFORE, _MODEL_AFTER, _TOOL_BEFORE, _TOOL_AFTER): + existing = getattr(agent, attr, None) + handlers = existing if isinstance(existing, list) else [existing] + for h in handlers: + if _is_governance_callable(h): + return h.__self__ # type: ignore[no-any-return] + return None + + +def _install_callback(agent: Any, attr: str, fn: Any) -> None: + """Prepend ``fn`` to an ADK callback slot, preserving existing handlers. + + ADK accepts a single callable or a ``list`` of callables for each + ``*_callback`` field and runs them in order, stopping early if one + returns a value (a short-circuit). Governance is prepended (runs + first) so it always evaluates — and can BLOCK — before any + user-supplied callback gets a chance to short-circuit the model / + tool call. + + Idempotent: if a governance callback is already present in the slot, + this is a no-op (so a double ``attach`` does not stack duplicates). + """ + existing = getattr(agent, attr, None) + if existing is None: + handlers: List[Any] = [] + elif isinstance(existing, list): + handlers = list(existing) + else: + handlers = [existing] + if any(_is_governance_callable(h) for h in handlers): + return + setattr(agent, attr, [fn, *handlers]) + + +def _iter_llm_agents(root: Any) -> List[Any]: + """Return every ``LlmAgent``-shaped node in the agent tree. + + A node qualifies if it exposes the model-callback surface (duck-typed + via :data:`_MODEL_BEFORE` so we don't hard-require ``LlmAgent`` to be + importable). Container agents (``Sequential`` / ``Parallel`` / ``Loop``) + have no model callbacks themselves but their ``sub_agents`` are walked + so a multi-agent app is governed end to end. + + ``AgentTool``-wrapped agents are also followed: an agent exposed to another + agent as a tool carries its target on ``tool.agent`` and lives in ``tools`` + (not ``sub_agents``), so it would otherwise be missed. Cycles and + pathological depth are bounded by an id-visited set and a hard cap + (``_MAX_GRAPH_NODES``), which logs rather than silently truncating. + """ + found: List[Any] = [] + seen: set[int] = set() + stack: List[Any] = [root] + capped = False + while stack: + if len(seen) >= _MAX_GRAPH_NODES: + capped = True + break + node = stack.pop() + if node is None or id(node) in seen: + continue + seen.add(id(node)) + if hasattr(node, _MODEL_BEFORE): + found.append(node) + sub_agents = getattr(node, "sub_agents", None) + if isinstance(sub_agents, (list, tuple)): + stack.extend(sub_agents) + # AgentTool wraps its target agent on ``.agent``; follow tools so an + # agent-as-tool is governed too. + tools = getattr(node, "tools", None) + if isinstance(tools, (list, tuple)): + for tool in tools: + wrapped = getattr(tool, "agent", None) + if wrapped is not None: + stack.append(wrapped) + if capped: + logger.warning( + "install_governance stopped walking the agent tree at the %d-node " + "cap; agents beyond it will not be governed", + _MAX_GRAPH_NODES, + ) + return found + + +def install_governance( + agent: Any, + evaluator: EvaluatorProtocol, + *, + agent_name: str, + session_id: str, +) -> Any: + """Install governance callbacks on the agent tree (mutated in place). + + Walks every ``LlmAgent`` reachable through ``sub_agents`` and prepends + governance to each model/tool callback slot, preserving existing handlers. + Returns the original ``agent`` — the ``Runner`` already holds this + reference, so in-place mutation is what wires governance into execution. + Idempotent: a slot that already carries a governance callback is skipped. + + Called by :class:`UiPathGoogleADKRuntimeFactory` when an ``evaluator`` + is supplied to ``new_runtime``. + """ + llm_agents = _iter_llm_agents(agent) + callbacks: GovernanceCallbacks | None = None + for node in llm_agents: + already = _find_governance_callbacks(node) + if already is not None: + # Cached agent reused for a new runtime: refresh the evaluator and + # session/agent so governance attributes to *this* run rather than + # the first one that installed it (the factory caches agents by + # entrypoint across runtime_ids). + already.rebind( + evaluator=evaluator, agent_name=agent_name, session_id=session_id + ) + continue + if callbacks is None: + callbacks = GovernanceCallbacks( + evaluator=evaluator, + agent_name=agent_name, + session_id=session_id, + ) + _install_callback(node, _MODEL_BEFORE, callbacks.before_model) + _install_callback(node, _MODEL_AFTER, callbacks.after_model) + _install_callback(node, _TOOL_BEFORE, callbacks.before_tool) + _install_callback(node, _TOOL_AFTER, callbacks.after_tool) + if not llm_agents: + logger.warning( + "install_governance found no LlmAgent in %s — deep hooks will not fire", + type(agent).__name__, + ) + else: + logger.debug( + "Installed governance callbacks on %d ADK LlmAgent(s)", + len(llm_agents), + ) + return agent + + +class GovernanceCallbacks: + """Holds the four ADK callbacks bound to one governance evaluator. + + The evaluator owns audit emission and DENY-raising. Each callback + extracts the relevant payload, calls the matching ``evaluate_*`` + method, and returns ``None`` (never short-circuiting the model / tool + on its own). :class:`GovernanceBlockException` is allowed to + propagate — it aborts the ``Runner`` run — anything else is logged + and swallowed. + """ + + def __init__( + self, + evaluator: EvaluatorProtocol, + agent_name: str, + session_id: str, + ) -> None: + self._evaluator = evaluator + self._agent_name = agent_name + self._session_id = session_id + # ``trace_id`` is intentionally NOT held here. A single uuid minted at + # install time would be identical for every call. Trace correlation is + # owned by the layer below (OTel span / HTTP resolve at call time), + # matching the LangChain adapter. + self._session_state: Dict[str, Any] = {"tool_calls": 0, "llm_calls": 0} + + def rebind( + self, + evaluator: EvaluatorProtocol, + agent_name: str, + session_id: str, + ) -> None: + """Re-point this callback set at a new run. + + Called when a cached agent (already carrying these callbacks) is reused + for a fresh ``new_runtime`` — updates the evaluator and identifiers and + resets the per-run counters so state does not bleed across runtimes. + """ + self._evaluator = evaluator + self._agent_name = agent_name + self._session_id = session_id + self._session_state = {"tool_calls": 0, "llm_calls": 0} + + # ----- Model callbacks ------------------------------------------------- + + def before_model(self, callback_context: Any, llm_request: Any) -> None: + """Evaluate BEFORE_MODEL rules at model start. + + Scans only the **latest request content** — not the full history. + The model still receives the entire history (this callback does + not mutate ``llm_request``); the evaluator focuses on the new + content the agent is about to respond to. Without this scoping, a + violation in an earlier turn would re-fire on every subsequent + model call because that text stays in the prompt for context. + + Returns ``None`` so ADK proceeds with the model call. + """ + try: + model_input = self._latest_request_text(llm_request) + self._evaluator.evaluate_before_model( + model_input=model_input, + agent_name=self._agent_name, + runtime_id=self._session_id, + ) + # Count only calls that passed governance — a DENY raises above, so + # a blocked call must not inflate the counter. + self._session_state["llm_calls"] = ( + self._session_state.get("llm_calls", 0) + 1 + ) + except GovernanceBlockException: + raise + except Exception as e: + logger.warning("before_model governance check failed (continuing): %s", e) + return None + + def after_model(self, callback_context: Any, llm_response: Any) -> None: + """Evaluate AFTER_MODEL rules at model end. + + Partial (streamed) responses are skipped — ADK fires + ``after_model_callback`` for each chunk with ``partial=True`` and + once more for the aggregated final response. Governing only the + final response avoids re-scanning the same text token-by-token. + + Returns ``None`` so ADK keeps the model's response unchanged. + """ + try: + if getattr(llm_response, "partial", False): + return None + content = getattr(llm_response, "content", None) + model_output = self._content_text(content) + self._evaluator.evaluate_after_model( + model_output=model_output, + agent_name=self._agent_name, + runtime_id=self._session_id, + ) + except GovernanceBlockException: + raise + except Exception as e: + logger.warning("after_model governance check failed (continuing): %s", e) + return None + + # ----- Tool callbacks -------------------------------------------------- + + def before_tool(self, tool: Any, args: Dict[str, Any], tool_context: Any) -> None: + """Evaluate TOOL_CALL rules at tool start. + + Returns ``None`` so ADK proceeds with the tool call (a non-None + return would short-circuit it with a substitute result). + """ + try: + tool_name = getattr(tool, "name", None) or "unknown" + self._evaluator.evaluate_tool_call( + tool_name=tool_name, + tool_args=self._cap_args(args or {}), + agent_name=self._agent_name, + runtime_id=self._session_id, + session_state=self._session_state, + ) + # Count only calls that passed governance; the evaluator saw the + # count of prior tool calls, and a DENY raises before this bump. + self._session_state["tool_calls"] = ( + self._session_state.get("tool_calls", 0) + 1 + ) + except GovernanceBlockException: + raise + except Exception as e: + logger.warning("before_tool governance check failed (continuing): %s", e) + return None + + def after_tool( + self, + tool: Any, + args: Dict[str, Any], + tool_context: Any, + tool_response: Any, + ) -> None: + """Evaluate AFTER_TOOL rules at tool end. + + Returns ``None`` so ADK keeps the tool's result unchanged. + """ + try: + tool_name = getattr(tool, "name", None) or "unknown" + tool_result = ( + "" if tool_response is None else self._stringify(tool_response) + ) + self._evaluator.evaluate_after_tool( + tool_name=tool_name, + tool_result=tool_result, + agent_name=self._agent_name, + runtime_id=self._session_id, + ) + except GovernanceBlockException: + raise + except Exception as e: + logger.warning("after_tool governance check failed (continuing): %s", e) + return None + + # ----- Text extraction ------------------------------------------------- + # Read LlmRequest/LlmResponse/content/parts defensively via getattr + # rather than isinstance on ADK's typed models: this keeps the adapter + # from hard-coupling to google-adk internal types that may shift, and + # lets the tests duck-type the payloads without a google-adk install. + + def _latest_request_text(self, llm_request: Any) -> str: + """Extract text from the most-recent content in an ``LlmRequest``. + + ``llm_request.contents`` is the full ``list[Content]`` sent to the + model. We take the last entry — the new user message, or the tool + ``function_response`` being fed back — and pull its text cleanly + via :meth:`_content_text`. Returns ``""`` when there is nothing + extractable. + """ + contents = getattr(llm_request, "contents", None) + if not contents: + return "" + return self._content_text(contents[-1]) + + @classmethod + def _content_text(cls, content: Any) -> str: + """Return governance-relevant text from a ``Content`` (or part list). + + Walks ``content.parts`` and pulls, per part: + + - ``part.text`` — plain text. + - ``part.function_call`` — the tool name plus JSON-encoded + ``args``; ADK / Gemini routinely carry the user-visible reply in + a function call (e.g. a "submit final answer" tool). + - ``part.function_response`` — the tool result fed back to the + model; relevant when it is the latest content for BEFORE_MODEL. + + Capped at :data:`_BEFORE_MODEL_TEXT_CAP` so a runaway response or + large tool payload can't blow scan budgets. + """ + if content is None: + return "" + parts = getattr(content, "parts", None) + if parts is None: + # Some shapes hand us a bare string or a list of parts. + if isinstance(content, str): + return content[:_BEFORE_MODEL_TEXT_CAP] + if isinstance(content, (list, tuple)): + parts = content + else: + return "" + collected: List[str] = [] + remaining = _BEFORE_MODEL_TEXT_CAP + for part in parts: + if remaining <= 0: + break + piece = cls._part_text(part) + if piece: + collected.append(piece) + remaining -= len(piece) + 1 + return "\n".join(collected)[:_BEFORE_MODEL_TEXT_CAP] + + @classmethod + def _part_text(cls, part: Any) -> str: + """Return text / function-call args / function-response from one part.""" + pieces: List[str] = [] + text = getattr(part, "text", None) + if isinstance(text, str) and text: + pieces.append(text) + + function_call = getattr(part, "function_call", None) + if function_call is not None: + name = getattr(function_call, "name", "") or "" + fc_args = getattr(function_call, "args", None) + if name: + pieces.append(name) + if fc_args: + pieces.append(cls._stringify(fc_args)) + + function_response = getattr(part, "function_response", None) + if function_response is not None: + response = getattr(function_response, "response", None) + if response: + pieces.append(cls._stringify(response)) + + return "\n".join(p for p in pieces if p) + + @classmethod + def _cap_args(cls, args: Dict[str, Any], cap: int = _BEFORE_MODEL_TEXT_CAP) -> Any: + """Bound the tool-args payload before it reaches the evaluator. + + ``before_tool`` receives args straight from ADK; a huge blob (e.g. a + tool called with a multi-megabyte string) would otherwise be scanned + uncapped — contrast with ``after_tool``, which caps its result. Within + budget the dict is passed through unchanged (so per-key rules still + work); once its serialized size exceeds ``cap`` it is replaced with a + single capped, stringified form. + """ + if not isinstance(args, dict) or not args: + return args + blob = cls._stringify(args, cap + 1) + if len(blob) <= cap: + return args + return {"_truncated": blob[:cap]} + + @staticmethod + def _stringify(value: Any, cap: int = _BEFORE_MODEL_TEXT_CAP) -> str: + """Render a dict / object payload as compact, scannable text, capped. + + Bounded by ``cap`` so an oversized tool result, function-call args + blob, or function-response can't hand a multi-megabyte string to the + evaluator. + """ + if isinstance(value, str): + return value[:cap] + try: + return json.dumps(value, default=str, ensure_ascii=False)[:cap] + except (TypeError, ValueError): + return str(value)[:cap] diff --git a/packages/uipath-google-adk/src/uipath_google_adk/runtime/factory.py b/packages/uipath-google-adk/src/uipath_google_adk/runtime/factory.py index 338b6883..2658302e 100644 --- a/packages/uipath-google-adk/src/uipath_google_adk/runtime/factory.py +++ b/packages/uipath-google-adk/src/uipath_google_adk/runtime/factory.py @@ -8,6 +8,7 @@ from google.adk.runners import Runner from google.adk.sessions.sqlite_session_service import SqliteSessionService from openinference.instrumentation.google_adk import GoogleADKInstrumentor +from uipath.core.adapters import EvaluatorProtocol from uipath.runtime import ( UiPathRuntimeContext, UiPathRuntimeFactorySettings, @@ -16,6 +17,7 @@ ) from uipath.runtime.errors import UiPathErrorCategory +from uipath_google_adk.governance import install_governance from uipath_google_adk.runtime.config import GoogleADKConfig from uipath_google_adk.runtime.errors import ( UiPathGoogleADKErrorCode, @@ -209,6 +211,7 @@ async def _create_runtime_instance( agent: BaseAgent, runtime_id: str, entrypoint: str, + evaluator: EvaluatorProtocol | None = None, ) -> UiPathRuntimeProtocol: """ Create a runtime instance from an agent. @@ -217,7 +220,19 @@ async def _create_runtime_instance( retrieves or creates a session for the given runtime_id. Sessions persist across calls, enabling multi-turn conversations where only the current user message is sent each time. + + When ``evaluator`` is supplied, governance callbacks are installed on + the agent tree in place via :func:`install_governance` before the + ``Runner`` is created. """ + if evaluator is not None: + install_governance( + agent, + evaluator, + agent_name=entrypoint, + session_id=runtime_id, + ) + session_service = await self._get_session_service() runner = Runner( agent=agent, @@ -256,7 +271,9 @@ async def new_runtime( Args: entrypoint: Agent name from google_adk.json runtime_id: Unique identifier for the runtime instance - **kwargs: Additional keyword arguments (unused) + **kwargs: Forwarded factory kwargs. Recognized: ``evaluator`` + (``EvaluatorProtocol``) — when present, governance callbacks + are installed on the agent via :func:`install_governance`. Returns: Configured runtime instance with agent @@ -267,6 +284,7 @@ async def new_runtime( agent=agent, runtime_id=runtime_id, entrypoint=entrypoint, + evaluator=kwargs.get("evaluator"), ) async def dispose(self) -> None: diff --git a/packages/uipath-google-adk/tests/governance/__init__.py b/packages/uipath-google-adk/tests/governance/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/packages/uipath-google-adk/tests/governance/test_callbacks.py b/packages/uipath-google-adk/tests/governance/test_callbacks.py new file mode 100644 index 00000000..be0b4aa0 --- /dev/null +++ b/packages/uipath-google-adk/tests/governance/test_callbacks.py @@ -0,0 +1,503 @@ +"""Unit tests for the Google ADK governance callbacks. + +``can_handle`` is tested against a real ``google.adk`` ``LlmAgent`` (the +adapter detects agents with ``isinstance(..., BaseAgent)``). The remaining +tests duck-type the ADK payloads — lightweight fakes for ``Part`` / +``Content`` / ``LlmRequest`` / ``LlmResponse`` / tool / agent — so the +callback code paths are exercised without driving the heavy ADK runtime. +""" + +from __future__ import annotations + +import logging +from types import SimpleNamespace +from typing import Any, List + +import pytest +from uipath.core.governance.exceptions import GovernanceBlockException + +from uipath_google_adk.governance.callbacks import ( + _BEFORE_MODEL_TEXT_CAP, + GovernanceCallbacks, + install_governance, +) + +# -------------------------------------------------------------------------- +# Fakes +# -------------------------------------------------------------------------- + + +class FakeEvaluator: + """Records evaluate_* calls; optionally BLOCKs on a named hook.""" + + def __init__(self, block_on: str | None = None) -> None: + self.block_on = block_on + self.calls: List[tuple[str, dict[str, Any]]] = [] + + def _record(self, hook: str, **kwargs: Any) -> None: + self.calls.append((hook, kwargs)) + if self.block_on == hook: + raise GovernanceBlockException("blocked") # type: ignore[call-arg] + + def evaluate_before_agent(self, *args: Any, **kwargs: Any) -> Any: + self._record("before_agent", **kwargs) + + def evaluate_after_agent(self, *args: Any, **kwargs: Any) -> Any: + self._record("after_agent", **kwargs) + + def evaluate_before_model(self, *args: Any, **kwargs: Any) -> Any: + self._record("before_model", **kwargs) + + def evaluate_after_model(self, *args: Any, **kwargs: Any) -> Any: + self._record("after_model", **kwargs) + + def evaluate_tool_call(self, *args: Any, **kwargs: Any) -> Any: + self._record("tool_call", **kwargs) + + def evaluate_after_tool(self, *args: Any, **kwargs: Any) -> Any: + self._record("after_tool", **kwargs) + + +class FakeLlmAgent: + """Minimal stand-in for ``google.adk.agents.LlmAgent``.""" + + def __init__( + self, + name: str = "agent", + sub_agents: List[Any] | None = None, + tools: List[Any] | None = None, + ): + self.name = name + self.before_model_callback: Any = None + self.after_model_callback: Any = None + self.before_tool_callback: Any = None + self.after_tool_callback: Any = None + self.sub_agents = sub_agents or [] + self.tools = tools or [] + + +class FakeAgentTool: + """Stand-in for ``google.adk.tools.agent_tool.AgentTool`` — wraps an agent.""" + + def __init__(self, agent: Any): + self.agent = agent + self.name = getattr(agent, "name", "agent_tool") + + +class FakeContainerAgent: + """Container agent (Sequential/Parallel) with no model callbacks.""" + + def __init__(self, name: str, sub_agents: List[Any]): + self.name = name + self.sub_agents = sub_agents + + +class FakeTool: + def __init__(self, name: str): + self.name = name + + +def _part( + text: str | None = None, + function_call: Any = None, + function_response: Any = None, +) -> SimpleNamespace: + return SimpleNamespace( + text=text, + function_call=function_call, + function_response=function_response, + ) + + +def _content(parts: List[Any], role: str = "user") -> SimpleNamespace: + return SimpleNamespace(role=role, parts=parts) + + +def _make_callbacks(evaluator: FakeEvaluator) -> GovernanceCallbacks: + return GovernanceCallbacks( + evaluator=evaluator, agent_name="agent-1", session_id="sess-1" + ) + + +# -------------------------------------------------------------------------- +# install_governance +# -------------------------------------------------------------------------- + + +def test_install_governance_installs_on_all_llm_agents_in_tree(): + leaf_a = FakeLlmAgent("a") + leaf_b = FakeLlmAgent("b") + root = FakeContainerAgent("root", [leaf_a, leaf_b]) + + returned = install_governance(root, FakeEvaluator(), agent_name="x", session_id="s") + + assert returned is root # original returned, not a proxy + for leaf in (leaf_a, leaf_b): + assert isinstance(leaf.before_model_callback, list) + assert len(leaf.before_model_callback) == 1 + assert leaf.after_model_callback and leaf.before_tool_callback + assert leaf.after_tool_callback + # the container agent has no model-callback surface → must NOT be decorated + assert not hasattr(root, "before_model_callback") + + +def test_install_governance_is_idempotent(): + agent = FakeLlmAgent() + ev = FakeEvaluator() + install_governance(agent, ev, agent_name="x", session_id="s") + install_governance(agent, ev, agent_name="x", session_id="s") + assert len(agent.before_model_callback) == 1 + + +def test_install_governance_preserves_existing_callback_and_runs_first(): + def user_cb(*_a, **_k): + return None + + agent = FakeLlmAgent() + agent.before_model_callback = user_cb + install_governance(agent, FakeEvaluator(), agent_name="x", session_id="s") + cbs = agent.before_model_callback + assert isinstance(cbs, list) and len(cbs) == 2 + # governance prepended → runs first + assert getattr(cbs[0], "__self__", None).__class__ is GovernanceCallbacks + assert cbs[1] is user_cb + + +def test_install_governance_warns_when_no_llm_agent(caplog): + container = FakeContainerAgent("root", []) + with caplog.at_level(logging.WARNING): + install_governance(container, FakeEvaluator(), agent_name="x", session_id="s") + assert any("no LlmAgent" in r.message for r in caplog.records) + + +def test_install_governance_follows_agent_tool_wrapped_agents(): + """An agent exposed to another agent via AgentTool lives in ``tools``, not + ``sub_agents`` — it must still be governed.""" + wrapped = FakeLlmAgent("researcher") + root = FakeLlmAgent("root", tools=[FakeAgentTool(wrapped)]) + install_governance(root, FakeEvaluator(), agent_name="x", session_id="s") + assert isinstance(wrapped.before_model_callback, list) + assert len(wrapped.before_model_callback) == 1 + + +def test_install_governance_rebinds_session_on_cached_agent_reuse(): + """The factory caches agents by entrypoint; a second new_runtime reuses the + same agent, so governance metadata must refresh to the new session.""" + agent = FakeLlmAgent() + install_governance(agent, FakeEvaluator(), agent_name="a", session_id="session-1") + gov = agent.before_model_callback[0].__self__ + assert gov._session_id == "session-1" + + ev2 = FakeEvaluator() + install_governance(agent, ev2, agent_name="a", session_id="session-2") + # same callback object, not re-stacked, but re-pointed at the new run + assert len(agent.before_model_callback) == 1 + assert agent.before_model_callback[0].__self__ is gov + assert gov._session_id == "session-2" + assert gov._evaluator is ev2 + + +# -------------------------------------------------------------------------- +# Factory wiring — the evaluator kwarg drives install_governance +# -------------------------------------------------------------------------- + + +class _FakeRuntime: + APP_NAME = "app" + USER_ID = "user" + + def __init__(self, **kw: Any) -> None: + pass + + +class _FakeSessionService: + async def get_session(self, **kw: Any) -> Any: + return None + + async def create_session(self, **kw: Any) -> Any: + return object() + + +def _factory_without_init(): + """A factory instance that skips __init__ (avoids config/IO).""" + from uipath_google_adk.runtime.factory import UiPathGoogleADKRuntimeFactory + + return UiPathGoogleADKRuntimeFactory.__new__(UiPathGoogleADKRuntimeFactory) + + +def _stub_factory_runtime(monkeypatch, factory_mod): + """Stub Runner + runtime + session service so only the governance branch runs.""" + monkeypatch.setattr(factory_mod, "Runner", lambda **kw: None) + monkeypatch.setattr(factory_mod, "UiPathGoogleADKRuntime", _FakeRuntime) + + async def _session_service(self): + return _FakeSessionService() + + monkeypatch.setattr( + factory_mod.UiPathGoogleADKRuntimeFactory, + "_get_session_service", + _session_service, + ) + + +async def test_factory_installs_governance_when_evaluator_supplied(monkeypatch): + from uipath_google_adk.runtime import factory as factory_mod + + _stub_factory_runtime(monkeypatch, factory_mod) + agent = FakeLlmAgent() + await _factory_without_init()._create_runtime_instance( + agent=agent, runtime_id="r", entrypoint="e", evaluator=FakeEvaluator() + ) + assert isinstance(agent.before_model_callback, list) + + +async def test_factory_skips_governance_without_evaluator(monkeypatch): + from uipath_google_adk.runtime import factory as factory_mod + + _stub_factory_runtime(monkeypatch, factory_mod) + agent = FakeLlmAgent() + await _factory_without_init()._create_runtime_instance( + agent=agent, runtime_id="r", entrypoint="e" + ) + assert agent.before_model_callback is None + + +# -------------------------------------------------------------------------- +# before_model +# -------------------------------------------------------------------------- + + +def test_before_model_scopes_to_latest_content(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + req = SimpleNamespace( + contents=[ + _content([_part(text="OLD turn — secret leak here")]), + _content([_part(text="the new question")]), + ] + ) + cb.before_model(callback_context=None, llm_request=req) + hook, kwargs = ev.calls[-1] + assert hook == "before_model" + assert kwargs["model_input"] == "the new question" + assert "OLD turn" not in kwargs["model_input"] + + +def test_before_model_extracts_function_response_when_latest(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + fr = SimpleNamespace(name="lookup", response={"balance": "1000"}) + req = SimpleNamespace(contents=[_content([_part(function_response=fr)])]) + cb.before_model(callback_context=None, llm_request=req) + assert "1000" in ev.calls[-1][1]["model_input"] + + +def test_before_model_caps_text(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + huge = "x" * (_BEFORE_MODEL_TEXT_CAP + 5000) + req = SimpleNamespace(contents=[_content([_part(text=huge)])]) + cb.before_model(callback_context=None, llm_request=req) + assert len(ev.calls[-1][1]["model_input"]) <= _BEFORE_MODEL_TEXT_CAP + + +def test_before_model_empty_contents(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + cb.before_model(callback_context=None, llm_request=SimpleNamespace(contents=[])) + assert ev.calls[-1][1]["model_input"] == "" + + +# -------------------------------------------------------------------------- +# after_model +# -------------------------------------------------------------------------- + + +def test_after_model_skips_partial(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + resp = SimpleNamespace(partial=True, content=_content([_part(text="chunk")])) + cb.after_model(callback_context=None, llm_response=resp) + assert ev.calls == [] + + +def test_after_model_extracts_text_and_function_call(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + fc = SimpleNamespace(name="submit_answer", args={"content": "final reply"}) + resp = SimpleNamespace( + partial=False, + content=_content( + [_part(text="thinking"), _part(function_call=fc)], role="model" + ), + ) + cb.after_model(callback_context=None, llm_response=resp) + out = ev.calls[-1][1]["model_output"] + assert "thinking" in out and "submit_answer" in out and "final reply" in out + + +# -------------------------------------------------------------------------- +# tools +# -------------------------------------------------------------------------- + + +def test_before_tool_passes_args_and_session_state(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + cb.before_tool(FakeTool("transfer"), {"amount": 50}, tool_context=None) + hook, kwargs = ev.calls[-1] + assert hook == "tool_call" + assert kwargs["tool_name"] == "transfer" + assert kwargs["tool_args"] == {"amount": 50} + assert kwargs["session_state"]["tool_calls"] == 1 + + +def test_before_tool_caps_huge_args(): + """A huge arg blob must not reach the evaluator uncapped (contrast with the + small-args case, which passes through unchanged).""" + ev = FakeEvaluator() + cb = _make_callbacks(ev) + huge = "x" * (_BEFORE_MODEL_TEXT_CAP + 5000) + cb.before_tool(FakeTool("t"), {"blob": huge}, tool_context=None) + tool_args = ev.calls[-1][1]["tool_args"] + assert set(tool_args) == {"_truncated"} + assert len(tool_args["_truncated"]) <= _BEFORE_MODEL_TEXT_CAP + + +def test_after_tool_stringifies_dict_response(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + cb.after_tool(FakeTool("lookup"), {}, tool_context=None, tool_response={"x": 1}) + out = ev.calls[-1][1]["tool_result"] + assert "x" in out and "1" in out + + +def test_after_tool_none_response(): + ev = FakeEvaluator() + cb = _make_callbacks(ev) + cb.after_tool(FakeTool("noop"), {}, tool_context=None, tool_response=None) + assert ev.calls[-1][1]["tool_result"] == "" + + +def test_blocked_tool_call_does_not_increment_counter(): + """A DENY raises before the counter bump, so the count is not inflated.""" + ev = FakeEvaluator(block_on="tool_call") + cb = _make_callbacks(ev) + with pytest.raises(GovernanceBlockException): + cb.before_tool(FakeTool("t"), {}, tool_context=None) + assert ev.calls[-1][1]["session_state"]["tool_calls"] == 0 + assert cb._session_state["tool_calls"] == 0 + + +# -------------------------------------------------------------------------- +# enforcement semantics +# -------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "hook,invoke", + [ + ( + "before_model", + lambda cb: cb.before_model( + None, SimpleNamespace(contents=[_content([_part(text="hi")])]) + ), + ), + ( + "after_model", + lambda cb: cb.after_model( + None, + SimpleNamespace(partial=False, content=_content([_part(text="o")])), + ), + ), + ("tool_call", lambda cb: cb.before_tool(FakeTool("t"), {}, None)), + ( + "after_tool", + lambda cb: cb.after_tool(FakeTool("t"), {}, None, {"r": 1}), + ), + ], +) +def test_block_exception_propagates(hook, invoke): + cb = _make_callbacks(FakeEvaluator(block_on=hook)) + with pytest.raises(GovernanceBlockException): + invoke(cb) + + +def test_non_block_exception_is_swallowed(caplog): + class Boom: + def evaluate_before_model(self, **_): + raise RuntimeError("evaluator bug") + + cb = GovernanceCallbacks( + evaluator=Boom(), # type: ignore[arg-type] # minimal test double + agent_name="a", + session_id="s", + ) + with caplog.at_level(logging.WARNING): + # must NOT raise — a governance bug can't break the agent run + cb.before_model(None, SimpleNamespace(contents=[_content([_part(text="x")])])) + assert any("governance check failed" in r.message for r in caplog.records) + + +def test_callbacks_return_none(): + # callbacks return None (ADK: a None return means "don't override the + # model/tool"); the type: ignores silence mypy's func-returns-value on the + # None-returning callbacks while the asserts document that contract. + cb = _make_callbacks(FakeEvaluator()) + assert cb.before_model(None, SimpleNamespace(contents=[])) is None # type: ignore[func-returns-value] + assert cb.after_model(None, SimpleNamespace(partial=False, content=None)) is None # type: ignore[func-returns-value] + assert cb.before_tool(FakeTool("t"), {}, None) is None # type: ignore[func-returns-value] + assert cb.after_tool(FakeTool("t"), {}, None, {}) is None # type: ignore[func-returns-value] + + +# -------------------------------------------------------------------------- +# coverage: swallow on model/tool callbacks + extraction / helper edges +# -------------------------------------------------------------------------- + + +class _Boom: + """Evaluator whose every evaluate_* raises a non-block error.""" + + def __getattr__(self, _name: str) -> Any: + def _raise(*_a: Any, **_k: Any) -> None: + raise RuntimeError("evaluator bug") + + return _raise + + +@pytest.mark.parametrize( + "invoke", + [ + lambda cb: cb.after_model(None, SimpleNamespace(partial=False, content=None)), + lambda cb: cb.before_tool(FakeTool("t"), {}, None), + lambda cb: cb.after_tool(FakeTool("t"), {}, None, {"r": 1}), + ], +) +def test_model_tool_callbacks_swallow_non_block_errors(invoke, caplog): + cb = GovernanceCallbacks(evaluator=_Boom(), agent_name="a", session_id="s") + with caplog.at_level(logging.WARNING): + invoke(cb) # must NOT raise — a governance bug can't break the run + assert any("governance check failed" in r.message for r in caplog.records) + + +def test_content_text_and_helper_edges(): + G = GovernanceCallbacks + # _content_text: None / bare str / list-of-parts / unsupported object + assert G._content_text(None) == "" + assert G._content_text("bare") == "bare" + assert G._content_text(123) == "" + fc = SimpleNamespace(name="lookup", args={"q": "x"}) + fr = SimpleNamespace(response={"ok": 1}) + out = G._content_text( + _content( + [_part(text="hi"), _part(function_call=fc), _part(function_response=fr)] + ) + ) + assert "hi" in out and "lookup" in out and "ok" in out + # _cap_args: non-dict passes through untouched + assert G._cap_args("notdict") == "notdict" # type: ignore[arg-type] + # _stringify: str passthrough + circular-ref fallback (no crash) + assert G._stringify("hi") == "hi" + circular: dict[str, Any] = {} + circular["self"] = circular + assert isinstance(G._stringify(circular), str) diff --git a/packages/uipath-google-adk/uv.lock b/packages/uipath-google-adk/uv.lock index 7028436e..11a9f288 100644 --- a/packages/uipath-google-adk/uv.lock +++ b/packages/uipath-google-adk/uv.lock @@ -3591,16 +3591,16 @@ wheels = [ [[package]] name = "uipath-core" -version = "0.5.18" +version = "0.5.28" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-instrumentation" }, { name = "opentelemetry-sdk" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/14/b1/d4e555a1a2ccf298195a5f2968e538b0cea8592b3e03f43fc12b178d6c69/uipath_core-0.5.18.tar.gz", hash = "sha256:63ebe8bdb818ca30a4bc9ab0ea8171315680691429931282939359ce039401ab", size = 131988, upload-time = "2026-06-08T14:04:49.688Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/f9/8d2f1d98cbebbcf059cf4561f38f34ad4cd58423e4f15cad22bd297a2563/uipath_core-0.5.28.tar.gz", hash = "sha256:942987f6b612c64f93d612ad7b242276ed75f129fdd8f25bc71c24ec8887e388", size = 130578, upload-time = "2026-06-30T14:04:48.841Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/57/de/1a820b33f7bff4565d7649772bc54c88480ac7e70f707097f7da37d05157/uipath_core-0.5.18-py3-none-any.whl", hash = "sha256:351d6faeecfc6a0acea93182e01526f39c04a77e09fa0444be5f4fb580463f5a", size = 54572, upload-time = "2026-06-08T14:04:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/e8/1e/385bb166232a57ebe938cc57ad2717f350bc922bb5d2ce31af84306b7569/uipath_core-0.5.28-py3-none-any.whl", hash = "sha256:b952a46a21710073cbc16d6d5684e9aa645c107f57a636b778cfb94aa81a1e48", size = 54980, upload-time = "2026-06-30T14:04:47.374Z" }, ] [[package]] @@ -3611,6 +3611,7 @@ dependencies = [ { name = "google-adk" }, { name = "openinference-instrumentation-google-adk" }, { name = "uipath" }, + { name = "uipath-core" }, { name = "uipath-runtime" }, ] @@ -3636,6 +3637,7 @@ requires-dist = [ { name = "google-adk", specifier = ">=1.25.1" }, { name = "openinference-instrumentation-google-adk", specifier = ">=0.1.9" }, { name = "uipath", specifier = ">=2.10.0,<2.11.0" }, + { name = "uipath-core", specifier = ">=0.5.18,<0.7.0" }, { name = "uipath-runtime", specifier = ">=0.11.0,<0.12.0" }, ] provides-extras = ["anthropic"]