From 1a6213c608f5b588bbe68a0dc8d5fc78d112c96c Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Tue, 26 May 2026 18:09:19 +0200 Subject: [PATCH 01/12] feat(python): Add local-codeact package with AST validation Add agent-framework-local-codeact alpha package for running LLM-generated Python code in Foundry hosted agents and other sandboxed environments. Key features: - Subprocess execution by default (isolated process) - Optional unsafe in-process mode for debugging - AST-based allow-list code validation - Customizable allowed/blocked imports and builtins - Host tool bridge with framed JSON-lines IPC - File mount system with capture and limits - .NET portability features (python_executable, runner_script) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/PACKAGE_STATUS.md | 1 + python/packages/local_codeact/AGENTS.md | 70 +++ python/packages/local_codeact/LICENSE | 22 + python/packages/local_codeact/README.md | 188 +++++++ .../agent_framework_local_codeact/__init__.py | 27 + .../agent_framework_local_codeact/_bridge.py | 310 ++++++++++++ .../_execute_code_tool.py | 465 ++++++++++++++++++ .../agent_framework_local_codeact/_files.py | 168 +++++++ .../_instructions.py | 129 +++++ .../_provider.py | 123 +++++ .../agent_framework_local_codeact/_runner.py | 210 ++++++++ .../agent_framework_local_codeact/_types.py | 48 ++ .../_validator.py | 448 +++++++++++++++++ .../agent_framework_local_codeact/py.typed | 0 python/packages/local_codeact/pyproject.toml | 103 ++++ .../packages/local_codeact/samples/README.md | 19 + .../samples/foundry_hosted_agent.py | 46 ++ .../samples/local_execute_code.py | 115 +++++ .../tests/local_codeact/test_local_codeact.py | 234 +++++++++ .../tests/local_codeact/test_validator.py | 263 ++++++++++ python/pyproject.toml | 1 + python/uv.lock | 12 + 22 files changed, 3002 insertions(+) create mode 100644 python/packages/local_codeact/AGENTS.md create mode 100644 python/packages/local_codeact/LICENSE create mode 100644 python/packages/local_codeact/README.md create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/__init__.py create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/_bridge.py create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/_files.py create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/_instructions.py create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/_provider.py create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/_runner.py create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/_types.py create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/_validator.py create mode 100644 python/packages/local_codeact/agent_framework_local_codeact/py.typed create mode 100644 python/packages/local_codeact/pyproject.toml create mode 100644 python/packages/local_codeact/samples/README.md create mode 100644 python/packages/local_codeact/samples/foundry_hosted_agent.py create mode 100644 python/packages/local_codeact/samples/local_execute_code.py create mode 100644 python/packages/local_codeact/tests/local_codeact/test_local_codeact.py create mode 100644 python/packages/local_codeact/tests/local_codeact/test_validator.py diff --git a/python/PACKAGE_STATUS.md b/python/PACKAGE_STATUS.md index 1f336f1cd8..7d1f45e2fe 100644 --- a/python/PACKAGE_STATUS.md +++ b/python/PACKAGE_STATUS.md @@ -36,6 +36,7 @@ Status is grouped into these buckets: | `agent-framework-github-copilot` | `python/packages/github_copilot` | `beta` | | `agent-framework-hyperlight` | `python/packages/hyperlight` | `beta` | | `agent-framework-lab` | `python/packages/lab` | `beta` | +| `agent-framework-local-codeact` | `python/packages/local_codeact` | `alpha` | | `agent-framework-mem0` | `python/packages/mem0` | `beta` | | `agent-framework-monty` | `python/packages/monty` | `alpha` | | `agent-framework-ollama` | `python/packages/ollama` | `beta` | diff --git a/python/packages/local_codeact/AGENTS.md b/python/packages/local_codeact/AGENTS.md new file mode 100644 index 0000000000..80a69192df --- /dev/null +++ b/python/packages/local_codeact/AGENTS.md @@ -0,0 +1,70 @@ +# Local CodeAct Package (agent-framework-local-codeact) + +Local subprocess-backed CodeAct integrations for the Microsoft Agent Framework. + +> [!WARNING] +> This package runs LLM-generated Python in the local environment. It is **not** +> a Python security sandbox. Use it only inside an external sandbox such as a +> Foundry hosted-agent container, VM, or locked-down container runtime. + +## Core Classes + +- **`LocalCodeActProvider`** — `ContextProvider` that injects a run-scoped + `execute_code` tool plus dynamic CodeAct instructions. +- **`LocalExecuteCodeTool`** — `FunctionTool` that validates generated code + against AST allow-lists, then runs it in a local Python subprocess by default. + Same-interpreter execution is available only through + `execution_mode="unsafe_in_process"`. + +## Public API + +```python +from agent_framework_local_codeact import ( + CodeValidationError, + ExecutionMode, + FileMount, + FileMountInput, + LocalCodeActProvider, + LocalExecuteCodeTool, + MountMode, + ProcessExecutionLimits, +) +``` + +## Architecture + +- **`_types.py`** — public types for execution limits, execution mode, and file + mount metadata. +- **`_provider.py`** — provider wrapper around a managed execute-code tool. +- **`_execute_code_tool.py`** — tool management, approval propagation, + subprocess orchestration, state serialization, and output-file capture. +- **`_validator.py`** — AST-based allow-list code validation (blocks `eval`, + `exec`, dangerous imports/builtins, and risky os operations). +- **`_bridge.py`** — parent-side framed IPC and optional unsafe in-process + runner. Subprocess mode supports explicit `python_executable` and + `runner_script` configuration so non-Python hosts can launch the same runner + by file path. +- **`_runner.py`** — child-process entry point used by subprocess mode. +- **`_files.py`** — mount normalization and symlink-safe file capture helpers. +- **`_instructions.py`** — dynamic instructions and risk wording. + +## Security posture + +Do not describe this package as sandboxing Python code. AST validation, process +isolation, timeouts, output caps, environment allow-lists, and file capture +limits are defense-in-depth controls only. Host filesystem, network, credentials, +process table, and kernel resources must be isolated by the surrounding +environment. + +## .NET portability notes + +Keep the subprocess JSON-lines protocol stable where possible. A .NET port can +mirror the provider/tool/file/limit surface, but should omit +`unsafe_in_process` and require or strongly encourage an explicit +`python_executable`. If the .NET package bundles the Python runner instead of +requiring a Python wheel install, invoke it through the file-path +`runner_script` pattern rather than relying on `-m agent_framework_local_codeact._runner`. + +The AST validator should be ported to .NET as well (likely using Roslyn for C# +code analysis if the .NET version supports generated C# execution, or a Python +AST parser if it still executes Python). diff --git a/python/packages/local_codeact/LICENSE b/python/packages/local_codeact/LICENSE new file mode 100644 index 0000000000..010ec64d62 --- /dev/null +++ b/python/packages/local_codeact/LICENSE @@ -0,0 +1,22 @@ + MIT License + +Copyright (c) Microsoft Corporation. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE + diff --git a/python/packages/local_codeact/README.md b/python/packages/local_codeact/README.md new file mode 100644 index 0000000000..dabda508ac --- /dev/null +++ b/python/packages/local_codeact/README.md @@ -0,0 +1,188 @@ +# agent-framework-local-codeact + +Local CodeAct integrations for Microsoft Agent Framework. + +> [!WARNING] +> This package runs LLM-generated Python in the local environment. It is **not** +> a Python security sandbox and is not safe for untrusted prompts or code on a +> developer workstation or production host without an external sandbox. + +`agent-framework-local-codeact` is intended for environments that already +provide process, filesystem, network, and credential isolation, especially +Foundry hosted agents. It provides the familiar CodeAct provider pattern used by +the Hyperlight and Monty packages while keeping the implementation local to the +agent container. + +## Install + +```bash +pip install agent-framework-local-codeact --pre +``` + +This is an alpha package and is not included in `agent-framework[all]`. + +## Basic usage + +```python +from agent_framework import Agent +from agent_framework_local_codeact import LocalCodeActProvider, ProcessExecutionLimits + +agent = Agent( + client=..., + instructions="Use execute_code for Python control flow when it helps.", + context_providers=[ + LocalCodeActProvider( + execution_limits=ProcessExecutionLimits(timeout_seconds=5), + # Optional: use a specific interpreter instead of the current one. + # python_executable="/usr/bin/python3", + ) + ], +) +``` + +For Foundry hosted agents, add the provider to the local agent before wrapping +it with `ResponsesHostServer`. + +```python +from agent_framework_foundry_hosting import ResponsesHostServer + +server = ResponsesHostServer(agent) +``` + +## What the package controls + +- Validates generated code against AST allow-lists (allowed imports, builtins, + and operations) before execution. +- Runs generated code in a child Python process by default. +- Uses `sys.executable` by default, or an explicit `python_executable` when + configured. +- Does not inherit host environment variables unless explicitly provided. +- Does not invoke a shell. +- Applies code-size, timeout, stdout, stderr, and result-size limits. +- Allows only provider-owned host tools to be called from generated code. +- Propagates `always_require` approval from managed tools to `execute_code`. +- Captures new or modified files under configured writable mounts while + skipping symlinks. + +These are defense-in-depth controls, not a containment boundary. The AST +validator blocks common dangerous operations (`eval`, `exec`, `import subprocess`, +etc.) but does not make Python execution safe on an unsandboxed host. + +## What the package does not protect + +- Malicious Python working within allowed imports and operations. +- Network access unless the surrounding environment blocks it. +- Prompt-injected exfiltration through allowed host tools. +- Resource exhaustion outside the configured limits. +- Log, stdout, stderr, or result poisoning. + +Use Foundry hosted agents, containers, VMs, or equivalent infrastructure as the +actual security boundary. + +## Host tools + +Register host tools on the provider. Generated code calls them with `await`: + +```python +async def add(a: int, b: int) -> int: + return a + b + +provider = LocalCodeActProvider(tools=[add]) +``` + +Inside `execute_code`: + +```python +result = await add(a=2, b=3) +print(result) +``` + +`await call_tool("add", a=2, b=3)` is also available for tool names that are not +valid Python identifiers. + +## Files + +No project directory is exposed by default. If you configure `workspace_root` or +`file_mounts`, generated code receives the direct path to the configured host +directory inside the surrounding sandbox. Mount modes are used for instructions +and output capture; they are not an OS-level filesystem policy. + +Only files under `read-write` mounts are captured after execution. + +## Python interpreter and runner + +Subprocess mode launches Python as: + +```text + -I -m agent_framework_local_codeact._runner +``` + +`python_executable` defaults to the current Python interpreter. If you point it +at a different virtual environment or system Python, that environment must be +able to import `agent_framework_local_codeact._runner`. + +For hosts that cannot rely on a Python package import, such as a future .NET +host bundling the runner itself, pass `runner_script` to execute the runner by +file path instead: + +```python +LocalCodeActProvider( + python_executable="/usr/bin/python3", + runner_script="/app/local_codeact_runner.py", +) +``` + +The framed JSON-lines protocol between parent and runner is intended to be the +cross-language boundary for a .NET implementation. The .NET version should use +subprocess mode only; same-interpreter execution is Python-specific. + +## Code validation + +Generated code is validated against AST allow-lists before execution: + +- **Allowed imports**: `asyncio`, `pathlib`, `json`, `math`, `datetime`, `time`, + `os` (limited to `os.environ`, `os.path`), and a few others. +- **Blocked imports**: `subprocess`, `sys`, `socket`, `urllib`, `requests`, + `threading`, `multiprocessing`, and others. +- **Blocked builtins**: `eval`, `exec`, `compile`, `__import__`, `globals`, + `locals`, `open`, and others. +- **Blocked os operations**: `os.system`, `os.exec*`, `os.popen`, `os.fork`, + file system modifications outside configured mounts, and others. + +Validation errors are returned as `Content.from_error` with details about which +operations are not allowed. This is defense-in-depth only and does not make +Python execution safe on an unsandboxed host. + +### Customizing allow-lists + +Use custom allow/block lists to adapt validation to your use case: + +```python +from agent_framework_local_codeact import LocalExecuteCodeTool + +# Allow specific imports (replaces defaults) +tool = LocalExecuteCodeTool( + allowed_imports={"csv", "json", "pathlib"}, + blocked_imports=set(), # Empty block-list +) + +# Block specific imports (replaces defaults) +provider = LocalCodeActProvider( + blocked_imports={"json", "requests"}, +) + +# Block specific builtins (replaces defaults) +tool = LocalExecuteCodeTool( + blocked_builtins={"len", "sum"}, # Prevent common operations +) +``` + +Custom lists **replace** the defaults entirely (they do not augment them). + +## Unsafe in-process mode + +`execution_mode="unsafe_in_process"` runs generated code with `exec` in the +agent process. This mode is intended only for debugging package behavior because +timeouts cannot stop CPU-bound or blocking code in the same interpreter. It also +does not provide subprocess-only behavior such as an explicit environment map or +working-directory isolation. diff --git a/python/packages/local_codeact/agent_framework_local_codeact/__init__.py b/python/packages/local_codeact/agent_framework_local_codeact/__init__.py new file mode 100644 index 0000000000..d1f00a2d91 --- /dev/null +++ b/python/packages/local_codeact/agent_framework_local_codeact/__init__.py @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import importlib.metadata + +from ._execute_code_tool import LocalExecuteCodeTool +from ._provider import LocalCodeActProvider +from ._types import ExecutionMode, FileMount, FileMountInput, MountMode, ProcessExecutionLimits +from ._validator import CodeValidationError + +try: + __version__ = importlib.metadata.version(__name__) +except importlib.metadata.PackageNotFoundError: + __version__ = "0.0.0" + +__all__ = [ + "CodeValidationError", + "ExecutionMode", + "FileMount", + "FileMountInput", + "LocalCodeActProvider", + "LocalExecuteCodeTool", + "MountMode", + "ProcessExecutionLimits", + "__version__", +] diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py b/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py new file mode 100644 index 0000000000..a1b184a4ab --- /dev/null +++ b/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py @@ -0,0 +1,310 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Execution bridges for local CodeAct.""" + +from __future__ import annotations + +import ast +import asyncio +import contextlib +import io +import json +import keyword +import os +import subprocess # noqa: S404 - subprocess mode is the default execution strategy for this package. +import traceback +from collections.abc import Mapping, Sequence +from copy import copy +from typing import Any, cast + +from agent_framework import FunctionTool + +from ._types import ProcessExecutionLimits + + +def _json_safe_mapping(value: Mapping[Any, Any]) -> dict[str, object]: + return {str(key): json_safe(item) for key, item in value.items()} + + +def _json_safe_sequence(value: Sequence[Any]) -> list[object]: + return [json_safe(item) for item in value] + + +def json_safe(value: object) -> object: + """Return a JSON-safe representation of ``value``.""" + try: + json.dumps(value) + except (TypeError, ValueError): + if isinstance(value, Mapping): + return _json_safe_mapping(cast("Mapping[Any, Any]", value)) # type: ignore[redundant-cast] + if isinstance(value, (list, tuple)): + return _json_safe_sequence(cast("Sequence[Any]", value)) + return repr(value) + return value + + +class _CappedTextIO(io.TextIOBase): + def __init__(self, limit: int) -> None: + super().__init__() + self._limit = max(0, limit) + self._buffer = io.StringIO() + self.truncated = False + + def writable(self) -> bool: + return True + + def write(self, value: str) -> int: + text = str(value) + current = self._buffer.tell() + remaining = max(0, self._limit - current) + if remaining: + self._buffer.write(text[:remaining]) + if len(text) > remaining: + self.truncated = True + return len(text) + + def getvalue(self) -> str: + return self._buffer.getvalue() + + +def _build_child_env(env: Mapping[str, str]) -> dict[str, str]: + child_env = {key: str(value) for key, value in env.items()} + if os.name == "nt": + for key in ("SYSTEMROOT", "COMSPEC", "PATHEXT"): + if key in os.environ and key not in child_env: + child_env[key] = os.environ[key] + return child_env + + +def _check_result_size(result: Mapping[str, Any], *, limits: ProcessExecutionLimits) -> None: + encoded = json.dumps(result, separators=(",", ":")).encode("utf-8") + if len(encoded) > limits.max_result_bytes: + raise RuntimeError("Generated code result exceeded max_result_bytes.") + + +async def _invoke_tool(tool_obj: FunctionTool, kwargs: Mapping[str, Any]) -> Any: + return await copy(tool_obj).invoke(skip_parsing=True, **dict(kwargs)) + + +class SubprocessCodeBridge: + """Parent-side bridge for subprocess execution and host-tool dispatch.""" + + def __init__( + self, + *, + tools: Sequence[FunctionTool], + limits: ProcessExecutionLimits, + env: Mapping[str, str], + cwd: str | None, + python_executable: str, + runner_script: str | None, + ) -> None: + self._tools = {tool_obj.name: tool_obj for tool_obj in tools} + self._limits = limits + self._env = dict(env) + self._cwd = cwd + self._python_executable = python_executable + self._runner_script = runner_script + + async def run(self, code: str) -> dict[str, Any]: + """Run generated code in a child Python process.""" + command = [self._python_executable, "-I"] + if self._runner_script is None: + command.extend(["-m", "agent_framework_local_codeact._runner"]) + else: + command.append(self._runner_script) + + process = await asyncio.create_subprocess_exec( + *command, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=self._cwd, + env=_build_child_env(self._env), + ) + try: + return await asyncio.wait_for(self._communicate(process, code), timeout=self._limits.timeout_seconds) + except TimeoutError as exc: + await self._stop_process(process) + raise TimeoutError(f"Generated code exceeded {self._limits.timeout_seconds} seconds.") from exc + + async def _communicate(self, process: asyncio.subprocess.Process, code: str) -> dict[str, Any]: + if process.stdin is None or process.stdout is None: + raise RuntimeError("Subprocess pipes were not created.") + request = { + "code": code, + "tool_names": list(self._tools), + "max_stdout_bytes": self._limits.max_stdout_bytes, + "max_stderr_bytes": self._limits.max_stderr_bytes, + } + process.stdin.write(json.dumps(request, separators=(",", ":")).encode("utf-8") + b"\n") + await process.stdin.drain() + + while True: + line = await process.stdout.readline() + if not line: + stderr = await self._read_stderr(process) + raise RuntimeError(f"Local CodeAct subprocess exited without a result. stderr: {stderr}") + try: + message_value: Any = json.loads(line.decode("utf-8")) + except json.JSONDecodeError as exc: + await self._stop_process(process) + raise RuntimeError("Local CodeAct subprocess emitted invalid bridge data.") from exc + if not isinstance(message_value, dict): + continue + message = cast("dict[str, Any]", message_value) + message_type = message.get("type") + if message_type == "tool_call": + await self._handle_tool_call(process, message) + continue + if message_type == "complete": + result = message.get("result") + if not isinstance(result, dict): + raise RuntimeError("Local CodeAct subprocess returned an invalid result.") + result_dict = cast("dict[str, Any]", result) + _check_result_size(result_dict, limits=self._limits) + await process.wait() + return dict(result_dict) + if message_type == "error": + details = str(message.get("traceback") or message.get("message") or "Unknown execution error.") + raise RuntimeError(details) + + async def _handle_tool_call(self, process: asyncio.subprocess.Process, message: Mapping[str, Any]) -> None: + if process.stdin is None: + raise RuntimeError("Subprocess stdin was not created.") + call_id = int(message.get("call_id") or 0) + name = str(message.get("name") or "") + kwargs_value: Any = message.get("kwargs") + if kwargs_value is None: + kwargs_value = {} + response: dict[str, Any] + if name not in self._tools: + response = { + "call_id": call_id, + "ok": False, + "exc_type": "ValueError", + "message": f"Tool {name!r} is not registered.", + } + elif not isinstance(kwargs_value, Mapping): + response = { + "call_id": call_id, + "ok": False, + "exc_type": "TypeError", + "message": "Tool kwargs must be a JSON object.", + } + else: + try: + result = await _invoke_tool(self._tools[name], cast("Mapping[str, Any]", kwargs_value)) + response = {"call_id": call_id, "ok": True, "result": json_safe(result)} + except Exception as exc: + response = { + "call_id": call_id, + "ok": False, + "exc_type": type(exc).__name__, + "message": str(exc), + } + process.stdin.write(json.dumps(response, separators=(",", ":")).encode("utf-8") + b"\n") + await process.stdin.drain() + + async def _read_stderr(self, process: asyncio.subprocess.Process) -> str: + if process.stderr is None: + return "" + data = await process.stderr.read(self._limits.max_stderr_bytes) + return data.decode("utf-8", errors="replace") + + async def _stop_process(self, process: asyncio.subprocess.Process) -> None: + if process.returncode is not None: + return + process.terminate() + try: + await asyncio.wait_for(process.wait(), timeout=1) + except TimeoutError: + process.kill() + await process.wait() + + +class UnsafeInProcessCodeBridge: + """Same-interpreter execution bridge for debugging only.""" + + def __init__(self, *, tools: Sequence[FunctionTool], limits: ProcessExecutionLimits) -> None: + self._tools = {tool_obj.name: tool_obj for tool_obj in tools} + self._limits = limits + + async def run(self, code: str) -> dict[str, Any]: + """Run generated code in the current interpreter.""" + return await asyncio.wait_for(self._run_without_timeout_control(code), timeout=self._limits.timeout_seconds) + + async def _run_without_timeout_control(self, code: str) -> dict[str, Any]: + stdout = _CappedTextIO(self._limits.max_stdout_bytes) + stderr = _CappedTextIO(self._limits.max_stderr_bytes) + + async def call_tool(name: str, **kwargs: Any) -> Any: + if name not in self._tools: + raise ValueError(f"Tool {name!r} is not registered.") + return json_safe(await _invoke_tool(self._tools[name], kwargs)) + + globals_dict: dict[str, Any] = { + "__builtins__": __builtins__, + "asyncio": asyncio, + "call_tool": call_tool, + } + for tool_name in self._tools: + if tool_name.isidentifier() and not keyword.iskeyword(tool_name): + globals_dict[tool_name] = self._make_direct_tool(tool_name) + + compiled, output_present = self._compile_main(code) + try: + with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr): + exec(compiled, globals_dict, globals_dict) # noqa: S102 - explicit unsafe in-process mode. + output = await globals_dict["__local_codeact_main__"]() + except Exception: + raise RuntimeError(traceback.format_exc(limit=20)) from None + + result = { + "stdout": stdout.getvalue(), + "stderr": stderr.getvalue(), + "stdout_truncated": stdout.truncated, + "stderr_truncated": stderr.truncated, + "output_present": output_present, + "output": json_safe(output), + } + _check_result_size(result, limits=self._limits) + return result + + def _make_direct_tool(self, name: str) -> Any: + async def _tool(**kwargs: Any) -> Any: + if name not in self._tools: + raise ValueError(f"Tool {name!r} is not registered.") + return json_safe(await _invoke_tool(self._tools[name], kwargs)) + + _tool.__name__ = name + return _tool + + def _compile_main(self, code: str) -> tuple[Any, bool]: + module = ast.parse(code, mode="exec") + body = list(module.body) + output_present = bool(body and isinstance(body[-1], ast.Expr)) + if output_present: + last_expr = body[-1] + if isinstance(last_expr, ast.Expr): + body[-1] = ast.Return(value=last_expr.value) + else: + body.append(ast.Return(value=ast.Constant(value=None))) + async_function_def = cast(Any, ast.AsyncFunctionDef) + function = async_function_def( + name="__local_codeact_main__", + args=ast.arguments( + posonlyargs=[], + args=[], + kwonlyargs=[], + kw_defaults=[], + defaults=[], + ), + body=body, + decorator_list=[], + returns=None, + type_comment=None, + ) + wrapped = ast.Module(body=[function], type_ignores=[]) + ast.fix_missing_locations(wrapped) + return compile(wrapped, "", "exec"), output_present diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py new file mode 100644 index 0000000000..e8550de51a --- /dev/null +++ b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py @@ -0,0 +1,465 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""``LocalExecuteCodeTool`` - run Python locally through a CodeAct surface.""" + +from __future__ import annotations + +import json +import sys +import tempfile +from collections.abc import Callable, Mapping, Sequence +from pathlib import Path +from typing import Any, cast + +from agent_framework import Content, FunctionTool +from agent_framework._tools import ApprovalMode, normalize_tools + +from ._bridge import SubprocessCodeBridge, UnsafeInProcessCodeBridge +from ._files import ( + WORKSPACE_MOUNT_PATH, + capture_written_files, + is_file_mount_pair, + normalize_file_mount, + normalize_mount_path, + resolve_existing_directory, + snapshot_writable_mounts, +) +from ._instructions import build_codeact_instructions, build_execute_code_description +from ._types import ExecutionMode, FileMount, FileMountInput, ProcessExecutionLimits +from ._validator import CodeValidationError, validate_code + +EXECUTE_CODE_TOOL_NAME = "execute_code" +EXECUTE_CODE_TOOL_DESCRIPTION = "Execute Python locally in the agent environment." + +EXECUTE_CODE_INPUT_SCHEMA: dict[str, Any] = { + "type": "object", + "title": "_ExecuteCodeInput", + "properties": { + "code": { + "type": "string", + "title": "Code", + "description": "Python code to execute locally in the agent environment.", + }, + }, + "required": ["code"], +} + + +def _collect_tools(*tool_groups: Any) -> list[FunctionTool]: + """Merge tool groups, dropping ``execute_code`` entries and deduping by name.""" + tools_by_name: dict[str, FunctionTool] = {} + for tool_group in tool_groups: + normalized_group = normalize_tools(tool_group) + for tool_obj in normalized_group: + if not isinstance(tool_obj, FunctionTool): + continue + if tool_obj.name == EXECUTE_CODE_TOOL_NAME: + continue + tools_by_name.pop(tool_obj.name, None) + tools_by_name[tool_obj.name] = tool_obj + return list(tools_by_name.values()) + + +def _resolve_execute_code_approval_mode( + *, base_approval_mode: ApprovalMode, tools: Sequence[FunctionTool] +) -> ApprovalMode: + if base_approval_mode == "always_require": + return "always_require" + if any(tool_obj.approval_mode == "always_require" for tool_obj in tools): + return "always_require" + return "never_require" + + +def _validate_code( + code: str, + *, + limits: ProcessExecutionLimits, + allowed_imports: set[str] | None = None, + blocked_imports: set[str] | None = None, + allowed_builtins: set[str] | None = None, + blocked_builtins: set[str] | None = None, +) -> None: + if not isinstance(code, str): + raise TypeError("code must be a string.") + if not code.strip(): + raise ValueError("code must not be empty.") + size = len(code.encode("utf-8")) + if size > limits.max_code_bytes: + raise ValueError(f"code exceeds max_code_bytes ({limits.max_code_bytes}).") + # Validate code against AST allow-lists + validate_code( + code, + allowed_imports=allowed_imports, + blocked_imports=blocked_imports, + allowed_builtins=allowed_builtins, + blocked_builtins=blocked_builtins, + ) + + +def _looks_like_path(value: str) -> bool: + return "/" in value or "\\" in value + + +def _normalize_python_executable(value: str | Path | None) -> str: + if value is None: + return sys.executable + raw = str(value).strip() + if not raw: + raise ValueError("python_executable must not be empty.") + candidate = Path(raw).expanduser() + if candidate.is_absolute() or _looks_like_path(raw): + absolute = candidate.absolute() + if not absolute.exists(): + raise ValueError(f"python_executable {raw!r} must point to an existing executable.") + if not absolute.is_file(): + raise ValueError(f"python_executable {raw!r} must point to an executable file.") + return str(absolute) + return raw + + +def _normalize_runner_script(value: str | Path | None) -> Path | None: + if value is None: + return None + try: + resolved = Path(value).expanduser().resolve(strict=True) + except OSError as exc: + raise ValueError(f"runner_script {str(value)!r} must point to an existing Python file.") from exc + if not resolved.is_file(): + raise ValueError(f"runner_script {str(value)!r} must point to an existing Python file.") + return resolved + + +def _build_execution_contents(*, result: Mapping[str, Any]) -> list[Content]: + stdout = str(result.get("stdout") or "").replace("\r\n", "\n") + stderr = str(result.get("stderr") or "").replace("\r\n", "\n") + output_present = bool(result.get("output_present")) + output_value = result.get("output") + stdout_truncated = bool(result.get("stdout_truncated")) + stderr_truncated = bool(result.get("stderr_truncated")) + + outputs: list[Content] = [] + if stdout: + text = stdout + if stdout_truncated: + text = f"{text}\n\n[stdout truncated]" + outputs.append(Content.from_text(text)) + elif stdout_truncated: + outputs.append(Content.from_text("[stdout truncated]")) + + if stderr: + text = stderr + if stderr_truncated: + text = f"{text}\n\n[stderr truncated]" + outputs.append(Content.from_text(text, additional_properties={"stream": "stderr"})) + elif stderr_truncated: + outputs.append(Content.from_text("[stderr truncated]", additional_properties={"stream": "stderr"})) + + if output_present: + try: + serialized_output = json.dumps(output_value, ensure_ascii=False) + except (TypeError, ValueError): + serialized_output = repr(output_value) + outputs.append(Content.from_text(serialized_output)) + + if not outputs: + outputs.append(Content.from_text("Code executed successfully without output.")) + + return outputs + + +class LocalExecuteCodeTool(FunctionTool): + """Execute Python code locally, with subprocess mode as the default. + + This tool is intended for externally sandboxed environments such as Foundry + hosted agents. Its controls are defense-in-depth only and do not make Python + execution safe on an unsandboxed host. + """ + + def __init__( + self, + *, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]] | None = None, + approval_mode: ApprovalMode | None = None, + workspace_root: str | Path | None = None, + file_mounts: FileMountInput | Sequence[FileMountInput] | None = None, + execution_limits: ProcessExecutionLimits | None = None, + env: Mapping[str, str] | None = None, + execution_mode: ExecutionMode = "subprocess", + python_executable: str | Path | None = None, + runner_script: str | Path | None = None, + allowed_imports: set[str] | None = None, + blocked_imports: set[str] | None = None, + allowed_builtins: set[str] | None = None, + blocked_builtins: set[str] | None = None, + ) -> None: + """Initialize a local execute-code tool. + + Args: + tools: Host tools available to generated code. + approval_mode: Base approval mode (propagates ``always_require`` from tools). + workspace_root: Read-write workspace directory (auto-mounted at /input). + file_mounts: Additional file mount configurations. + execution_limits: Timeout and byte limits for execution. + env: Environment variables for subprocess mode (does not apply to unsafe mode). + execution_mode: Either 'subprocess' (default) or 'unsafe_in_process'. + python_executable: Python interpreter path (defaults to sys.executable). + runner_script: Path to runner script (for hosts that bundle the runner). + allowed_imports: Custom allowed imports (replaces defaults). + blocked_imports: Custom blocked imports (replaces defaults). + allowed_builtins: Custom allowed builtins (replaces defaults). + blocked_builtins: Custom blocked builtins (replaces defaults). + """ + super().__init__( + name=EXECUTE_CODE_TOOL_NAME, + description=EXECUTE_CODE_TOOL_DESCRIPTION, + approval_mode="never_require", + func=self._run_code, + input_model=EXECUTE_CODE_INPUT_SCHEMA, + ) + self._default_approval_mode: ApprovalMode = approval_mode or "never_require" + self._managed_tools: list[FunctionTool] = [] + self._workspace_root: Path | None = ( + resolve_existing_directory(workspace_root) if workspace_root is not None else None + ) + self._file_mounts: dict[str, FileMount] = {} + self._execution_limits = execution_limits or ProcessExecutionLimits() + self._env = dict(env or {}) + if execution_mode not in {"subprocess", "unsafe_in_process"}: + raise ValueError("execution_mode must be 'subprocess' or 'unsafe_in_process'.") + self._execution_mode: ExecutionMode = execution_mode + self._python_executable = _normalize_python_executable(python_executable) + self._runner_script = _normalize_runner_script(runner_script) + self._allowed_imports = allowed_imports + self._blocked_imports = blocked_imports + self._allowed_builtins = allowed_builtins + self._blocked_builtins = blocked_builtins + if tools is not None: + self.add_tools(tools) + if file_mounts is not None: + self.add_file_mounts(file_mounts) + + self._refresh_approval_mode() + + @property + def description(self) -> str: + if not hasattr(self, "_managed_tools"): + return str(self.__dict__.get("description", EXECUTE_CODE_TOOL_DESCRIPTION)) + return build_execute_code_description( + tools=self._managed_tools, + mounts=self._effective_mounts(), + ) + + @description.setter + def description(self, value: str) -> None: + self.__dict__["description"] = value + + def add_tools( + self, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]], + ) -> None: + """Add host tools available to generated code.""" + self._managed_tools = _collect_tools(self._managed_tools, tools) + self._refresh_approval_mode() + + def get_tools(self) -> list[FunctionTool]: + """Return the currently managed host tools.""" + return list(self._managed_tools) + + def remove_tool(self, name: str) -> None: + """Remove one managed host tool by name.""" + remaining_tools = [tool_obj for tool_obj in self._managed_tools if tool_obj.name != name] + if len(remaining_tools) == len(self._managed_tools): + raise KeyError(f"No managed tool named {name!r} is registered.") + self._managed_tools = remaining_tools + self._refresh_approval_mode() + + def clear_tools(self) -> None: + """Remove all managed host tools.""" + self._managed_tools = [] + self._refresh_approval_mode() + + def add_file_mounts(self, file_mounts: FileMountInput | Sequence[FileMountInput]) -> None: + """Add one or more file mounts.""" + if isinstance(file_mounts, (str, FileMount)) or is_file_mount_pair(file_mounts): + normalized = [normalize_file_mount(cast("FileMountInput", file_mounts))] + else: + normalized = [normalize_file_mount(item) for item in cast("Sequence[FileMountInput]", file_mounts)] + + for mount in normalized: + self._file_mounts[mount.mount_path] = mount + + def get_file_mounts(self) -> list[FileMount]: + """Return configured file mounts, excluding ``workspace_root``.""" + return list(self._file_mounts.values()) + + def remove_file_mount(self, mount_path: str) -> None: + """Remove one file mount by its display/capture path.""" + normalized = normalize_mount_path(mount_path) + if normalized not in self._file_mounts: + raise KeyError(f"No file mount exists for {mount_path!r}.") + del self._file_mounts[normalized] + + def clear_file_mounts(self) -> None: + """Remove all configured file mounts.""" + self._file_mounts.clear() + + @property + def workspace_root(self) -> Path | None: + """Return the configured workspace root, if any.""" + return self._workspace_root + + @property + def execution_limits(self) -> ProcessExecutionLimits: + """Return the configured process execution limits.""" + return self._execution_limits + + @property + def execution_mode(self) -> ExecutionMode: + """Return the configured execution mode.""" + return self._execution_mode + + @property + def python_executable(self) -> str: + """Return the Python executable used for subprocess execution.""" + return self._python_executable + + @property + def runner_script(self) -> Path | None: + """Return the custom runner script used for subprocess execution, if any.""" + return self._runner_script + + def build_instructions(self, *, tools_visible_to_model: bool) -> str: + """Build current CodeAct instructions for this execute-code surface.""" + return build_codeact_instructions( + tools=list(self._managed_tools), + tools_visible_to_model=tools_visible_to_model, + mounts=self._effective_mounts(), + ) + + def create_run_tool(self) -> LocalExecuteCodeTool: + """Create a run-scoped snapshot of this execute-code surface.""" + return LocalExecuteCodeTool( + tools=self.get_tools(), + approval_mode=self._default_approval_mode, + workspace_root=self._workspace_root, + file_mounts=list(self._file_mounts.values()) or None, + execution_limits=self._execution_limits, + env=self._env, + execution_mode=self._execution_mode, + python_executable=self._python_executable, + runner_script=self._runner_script, + ) + + def build_serializable_state(self) -> dict[str, Any]: + """Return a JSON-serializable snapshot of the effective run state.""" + mounts = self._effective_mounts() + approval_mode = _resolve_execute_code_approval_mode( + base_approval_mode=self._default_approval_mode, + tools=self._managed_tools, + ) + return { + "runtime": "local_codeact", + "execution_mode": self._execution_mode, + "python_executable": self._python_executable, + "runner_script": str(self._runner_script) if self._runner_script is not None else None, + "approval_mode": approval_mode, + "tool_names": [tool_obj.name for tool_obj in self._managed_tools], + "workspace_root": str(self._workspace_root) if self._workspace_root is not None else None, + "file_mounts": [ + { + "host_path": str(mount.host_path), + "mount_path": mount.mount_path, + "mode": mount.mode, + "write_bytes_limit": mount.write_bytes_limit, + } + for mount in mounts + ], + "execution_limits": { + "timeout_seconds": self._execution_limits.timeout_seconds, + "max_code_bytes": self._execution_limits.max_code_bytes, + "max_stdout_bytes": self._execution_limits.max_stdout_bytes, + "max_stderr_bytes": self._execution_limits.max_stderr_bytes, + "max_result_bytes": self._execution_limits.max_result_bytes, + "max_captured_file_bytes": self._execution_limits.max_captured_file_bytes, + "max_total_captured_file_bytes": self._execution_limits.max_total_captured_file_bytes, + }, + "env_keys": sorted(self._env), + } + + def to_dict(self, *, exclude: set[str] | None = None, exclude_none: bool = True) -> dict[str, Any]: + self.__dict__["description"] = self.description + return super().to_dict(exclude=exclude, exclude_none=exclude_none) + + def _refresh_approval_mode(self) -> None: + self.approval_mode = _resolve_execute_code_approval_mode( + base_approval_mode=self._default_approval_mode, + tools=self._managed_tools, + ) + + def _effective_mounts(self) -> list[FileMount]: + mounts: list[FileMount] = [] + if self._workspace_root is not None and WORKSPACE_MOUNT_PATH not in self._file_mounts: + mounts.append( + FileMount( + host_path=self._workspace_root, + mount_path=WORKSPACE_MOUNT_PATH, + mode="read-write", + write_bytes_limit=None, + ) + ) + mounts.extend(self._file_mounts.values()) + return mounts + + async def _run_code(self, *, code: str) -> list[Content]: + try: + _validate_code( + code, + limits=self._execution_limits, + allowed_imports=self._allowed_imports, + blocked_imports=self._blocked_imports, + allowed_builtins=self._allowed_builtins, + blocked_builtins=self._blocked_builtins, + ) + except (TypeError, ValueError, CodeValidationError) as exc: + return [Content.from_error(message="Invalid code", error_details=str(exc))] + + tools = list(self._managed_tools) + mounts = self._effective_mounts() + pre_state = snapshot_writable_mounts(mounts) + + cwd: str | None = None + temp_dir: tempfile.TemporaryDirectory[str] | None = None + if self._workspace_root is not None: + cwd = str(self._workspace_root) + elif self._execution_mode == "subprocess": + temp_dir = tempfile.TemporaryDirectory(prefix="local-codeact-") + cwd = temp_dir.name + + try: + bridge = ( + UnsafeInProcessCodeBridge(tools=tools, limits=self._execution_limits) + if self._execution_mode == "unsafe_in_process" + else SubprocessCodeBridge( + tools=tools, + limits=self._execution_limits, + env=self._env, + cwd=cwd, + python_executable=self._python_executable, + runner_script=str(self._runner_script) if self._runner_script is not None else None, + ) + ) + result = await bridge.run(code) + except Exception as exc: + return [ + Content.from_error( + message="Execution error", + error_details=f"{type(exc).__name__}: {exc}", + ), + ] + finally: + if temp_dir is not None: + temp_dir.cleanup() + + contents = _build_execution_contents(result=result) + contents.extend(capture_written_files(mounts, pre_state, limits=self._execution_limits)) + return contents diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_files.py b/python/packages/local_codeact/agent_framework_local_codeact/_files.py new file mode 100644 index 0000000000..061efc0107 --- /dev/null +++ b/python/packages/local_codeact/agent_framework_local_codeact/_files.py @@ -0,0 +1,168 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Filesystem helpers for local CodeAct.""" + +from __future__ import annotations + +import mimetypes +from collections.abc import Iterator, Sequence +from pathlib import Path, PurePosixPath +from typing import Any, cast + +from agent_framework import Content + +from ._types import FileMount, FileMountInput, ProcessExecutionLimits + +WORKSPACE_MOUNT_PATH = "/input" + + +def normalize_mount_path(mount_path: str) -> str: + """Normalize a display/capture mount path to a clean POSIX absolute path.""" + raw = mount_path.strip().replace("\\", "/") + if not raw: + raise ValueError("mount_path must not be empty.") + pure = PurePosixPath(raw) + parts = [part for part in pure.parts if part not in {"", "/", "."}] + if any(part == ".." for part in parts): + raise ValueError("mount_path must not contain '..' segments.") + if not parts: + raise ValueError("mount_path must point to a concrete absolute path.") + return "/" + "/".join(parts) + + +def resolve_existing_directory(value: str | Path) -> Path: + """Resolve a path and require it to point at an existing directory.""" + resolved = Path(value).expanduser().resolve(strict=True) + if not resolved.is_dir(): + raise ValueError(f"Path {value!r} must point to an existing directory.") + return resolved + + +def is_file_mount_pair(value: Any) -> bool: + """Return whether ``value`` is a ``(host_path, mount_path)`` file-mount pair.""" + if not isinstance(value, tuple) or isinstance(value, FileMount): + return False + items = cast("tuple[object, ...]", value) + if len(items) != 2: + return False + host_path, mount_path = items + return isinstance(host_path, (str, Path)) and isinstance(mount_path, str) + + +def normalize_file_mount(file_mount: FileMountInput) -> FileMount: + """Normalize a public file-mount input.""" + if isinstance(file_mount, FileMount): + host_path = file_mount.host_path + mount_path = file_mount.mount_path + mode = file_mount.mode + write_limit = file_mount.write_bytes_limit + elif isinstance(file_mount, str): + host_path = file_mount + mount_path = file_mount + mode = "overlay" + write_limit = None + else: + host_path, mount_path = file_mount + mode = "overlay" + write_limit = None + + if write_limit is not None and write_limit < 0: + raise ValueError("write_bytes_limit must be non-negative or None.") + + return FileMount( + host_path=resolve_existing_directory(host_path), + mount_path=normalize_mount_path(mount_path), + mode=mode, + write_bytes_limit=write_limit, + ) + + +def iter_real_files(root: Path) -> Iterator[Path]: + """Walk ``root`` recursively, yielding only real non-symlink files.""" + stack: list[Path] = [root] + while stack: + current = stack.pop() + try: + entries = list(current.iterdir()) + except OSError: + continue + for entry in entries: + try: + if entry.is_symlink(): + continue + if entry.is_dir(): + stack.append(entry) + elif entry.is_file(): + yield entry + except OSError: + continue + + +def snapshot_writable_mounts(mounts: Sequence[FileMount]) -> dict[str, dict[str, tuple[int, int]]]: + """Capture ``(size, mtime_ns)`` for real files under read-write mounts.""" + snapshot: dict[str, dict[str, tuple[int, int]]] = {} + for mount in mounts: + if mount.mode != "read-write": + continue + host_root = Path(mount.host_path) + per_mount: dict[str, tuple[int, int]] = {} + for entry in iter_real_files(host_root): + try: + stat = entry.lstat() + except OSError: + continue + relative = entry.relative_to(host_root).as_posix() + per_mount[relative] = (int(stat.st_size), int(stat.st_mtime_ns)) + snapshot[mount.mount_path] = per_mount + return snapshot + + +def capture_written_files( + mounts: Sequence[FileMount], + pre_state: dict[str, dict[str, tuple[int, int]]], + *, + limits: ProcessExecutionLimits, +) -> list[Content]: + """Return content items for files written under read-write mounts.""" + captured: list[Content] = [] + total_bytes = 0 + for mount in mounts: + if mount.mode != "read-write": + continue + host_root = Path(mount.host_path) + before = pre_state.get(mount.mount_path, {}) + mount_bytes = 0 + for entry in sorted(iter_real_files(host_root)): + try: + stat = entry.lstat() + except OSError: + continue + relative = entry.relative_to(host_root).as_posix() + current = (int(stat.st_size), int(stat.st_mtime_ns)) + if before.get(relative) == current: + continue + sandbox_path = f"{mount.mount_path.rstrip('/')}/{relative}" + if stat.st_size > limits.max_captured_file_bytes: + captured.append(Content.from_text(f"[file {sandbox_path} omitted: file exceeds capture limit]")) + continue + if mount.write_bytes_limit is not None and mount_bytes + stat.st_size > mount.write_bytes_limit: + captured.append(Content.from_text(f"[file {sandbox_path} omitted: mount capture limit exceeded]")) + continue + if total_bytes + stat.st_size > limits.max_total_captured_file_bytes: + captured.append(Content.from_text(f"[file {sandbox_path} omitted: total capture limit exceeded]")) + continue + try: + data = entry.read_bytes() + except OSError: + continue + media_type = mimetypes.guess_type(entry.name)[0] or "application/octet-stream" + captured.append( + Content.from_data( + data=data, + media_type=media_type, + additional_properties={"path": sandbox_path}, + ) + ) + mount_bytes += stat.st_size + total_bytes += stat.st_size + return captured diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_instructions.py b/python/packages/local_codeact/agent_framework_local_codeact/_instructions.py new file mode 100644 index 0000000000..1b5d913177 --- /dev/null +++ b/python/packages/local_codeact/agent_framework_local_codeact/_instructions.py @@ -0,0 +1,129 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Dynamic CodeAct instructions and execute_code descriptions for local execution.""" + +from __future__ import annotations + +from collections.abc import Sequence + +from agent_framework import FunctionTool + +from ._types import FileMount + + +def _format_tool_summaries(tools: Sequence[FunctionTool]) -> str: + if not tools: + return "- No tools are currently registered." + + lines: list[str] = [] + for tool_obj in tools: + parameters = tool_obj.parameters().get("properties", {}) + parameter_names = [name for name in parameters if isinstance(name, str)] + parameter_summary = ", ".join(parameter_names) if parameter_names else "none" + description = str(tool_obj.description or "").strip() or "No description provided." + lines.append(f"- `{tool_obj.name}`: {description} Parameters: {parameter_summary}.") + return "\n".join(lines) + + +def _format_filesystem_capabilities(mounts: Sequence[FileMount]) -> str: + if not mounts: + return ( + "No workspace or file mounts are configured. Use only temporary files created by this execution, " + "or ask the operator to configure a sandboxed workspace." + ) + + lines = [ + ( + "Configured directories are direct paths inside the surrounding sandbox. " + "They are not virtualized by this package:" + ) + ] + for mount in mounts: + cap = "" + if mount.write_bytes_limit is not None: + cap = f", capture cap {mount.write_bytes_limit} bytes" + lines.append(f"- `{mount.mount_path}` -> `{mount.host_path}` ({mount.mode}{cap})") + + writable = [mount for mount in mounts if mount.mode == "read-write"] + if writable: + writable_paths = ", ".join(f"`{m.host_path}`" for m in writable) + lines.append( + f"New or modified files under {writable_paths} are returned to the caller as attached files. " + "Use those paths for output artifacts." + ) + + return "\n".join(lines) + + +def build_codeact_instructions( + *, + tools: Sequence[FunctionTool], + tools_visible_to_model: bool, + mounts: Sequence[FileMount] = (), +) -> str: + """Build dynamic CodeAct instructions for the effective local tool set.""" + tool_summaries = _format_tool_summaries(tools) + filesystem_text = _format_filesystem_capabilities(mounts) + + usage_note = ( + "Some tools may also appear directly, but prefer `execute_code` whenever you need to combine " + "Python control flow with host tool calls." + if tools_visible_to_model + else "Provider-owned host tools are not exposed separately; use `execute_code` when you need them." + ) + + return f"""You have one primary tool: `execute_code`. + +`execute_code` runs Python locally in the agent environment. This is not a +security sandbox; rely on the surrounding Foundry/container/VM sandbox for +isolation. + +Inside `execute_code`, call registered tools directly as async functions: +`result = await tool_name(param=value)`. Always use `await` and keyword arguments. +`await call_tool('name', **kwargs)` is also supported as a fallback. + +For fan-out, use `asyncio.gather`: +`results = await asyncio.gather(tool_a(...), tool_b(...))`. + +Surface results to the caller via `print(...)` (captured and returned as text) +or by ending the code with an expression whose value is JSON-encodable. + +Filesystem capabilities: +{filesystem_text} + +Registered tools: +{tool_summaries} + +Prefer a single `execute_code` call per request when possible, combining +multiple tool calls with Python control flow. + +{usage_note} +""" + + +def build_execute_code_description( + *, + tools: Sequence[FunctionTool], + mounts: Sequence[FileMount] = (), +) -> str: + """Build the dynamic ``execute_code`` tool description for standalone usage.""" + tool_summaries = _format_tool_summaries(tools) + filesystem_text = _format_filesystem_capabilities(mounts) + + return f"""Execute Python code locally in the agent environment. + +This is not a security sandbox. Use only when the surrounding environment +provides isolation, such as a Foundry hosted-agent sandbox. + +Inside the code, call registered tools directly as async functions: +`result = await tool_name(param=value)`. Always use `await` and keyword arguments. +`await call_tool('name', **kwargs)` is also supported as a fallback. + +Filesystem capabilities: +{filesystem_text} + +Registered tools: +{tool_summaries} + +Surface results via `print(...)` or by ending with a JSON-encodable expression. +""" diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_provider.py b/python/packages/local_codeact/agent_framework_local_codeact/_provider.py new file mode 100644 index 0000000000..05a53c8fa8 --- /dev/null +++ b/python/packages/local_codeact/agent_framework_local_codeact/_provider.py @@ -0,0 +1,123 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""``LocalCodeActProvider`` - context provider injecting local CodeAct.""" + +from __future__ import annotations + +from collections.abc import Callable, Mapping, Sequence +from pathlib import Path +from typing import Any + +from agent_framework import AgentSession, ContextProvider, FunctionTool, SessionContext +from agent_framework._tools import ApprovalMode + +from ._execute_code_tool import LocalExecuteCodeTool +from ._types import ExecutionMode, FileMount, FileMountInput, ProcessExecutionLimits + + +class LocalCodeActProvider(ContextProvider): + """Inject a local CodeAct surface using provider-owned host tools.""" + + DEFAULT_SOURCE_ID = "local_codeact" + + def __init__( + self, + source_id: str = DEFAULT_SOURCE_ID, + *, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]] | None = None, + approval_mode: ApprovalMode | None = None, + workspace_root: str | Path | None = None, + file_mounts: FileMountInput | Sequence[FileMountInput] | None = None, + execution_limits: ProcessExecutionLimits | None = None, + env: Mapping[str, str] | None = None, + execution_mode: ExecutionMode = "subprocess", + python_executable: str | Path | None = None, + runner_script: str | Path | None = None, + allowed_imports: set[str] | None = None, + blocked_imports: set[str] | None = None, + allowed_builtins: set[str] | None = None, + blocked_builtins: set[str] | None = None, + ) -> None: + """Initialize a local CodeAct context provider. + + Args: + source_id: Provider source identifier. + tools: Host tools available to generated code. + approval_mode: Base approval mode (propagates ``always_require`` from tools). + workspace_root: Read-write workspace directory (auto-mounted at /input). + file_mounts: Additional file mount configurations. + execution_limits: Timeout and byte limits for execution. + env: Environment variables for subprocess mode (does not apply to unsafe mode). + execution_mode: Either 'subprocess' (default) or 'unsafe_in_process'. + python_executable: Python interpreter path (defaults to sys.executable). + runner_script: Path to runner script (for hosts that bundle the runner). + allowed_imports: Custom allowed imports (replaces defaults). + blocked_imports: Custom blocked imports (replaces defaults). + allowed_builtins: Custom allowed builtins (replaces defaults). + blocked_builtins: Custom blocked builtins (replaces defaults). + """ + super().__init__(source_id) + self._execute_code_tool = LocalExecuteCodeTool( + tools=tools, + approval_mode=approval_mode, + workspace_root=workspace_root, + file_mounts=file_mounts, + execution_limits=execution_limits, + env=env, + execution_mode=execution_mode, + python_executable=python_executable, + runner_script=runner_script, + allowed_imports=allowed_imports, + blocked_imports=blocked_imports, + allowed_builtins=allowed_builtins, + blocked_builtins=blocked_builtins, + ) + + def add_tools( + self, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]], + ) -> None: + """Add provider-owned host tools.""" + self._execute_code_tool.add_tools(tools) + + def get_tools(self) -> list[FunctionTool]: + """Return provider-owned host tools.""" + return self._execute_code_tool.get_tools() + + def remove_tool(self, name: str) -> None: + """Remove one provider-owned host tool by name.""" + self._execute_code_tool.remove_tool(name) + + def clear_tools(self) -> None: + """Remove all provider-owned host tools.""" + self._execute_code_tool.clear_tools() + + def add_file_mounts(self, file_mounts: FileMountInput | Sequence[FileMountInput]) -> None: + """Add provider-managed file mounts.""" + self._execute_code_tool.add_file_mounts(file_mounts) + + def get_file_mounts(self) -> list[FileMount]: + """Return provider-managed file mounts, excluding ``workspace_root``.""" + return self._execute_code_tool.get_file_mounts() + + def remove_file_mount(self, mount_path: str) -> None: + """Remove one provider-managed file mount by its display/capture path.""" + self._execute_code_tool.remove_file_mount(mount_path) + + def clear_file_mounts(self) -> None: + """Remove all provider-managed file mounts.""" + self._execute_code_tool.clear_file_mounts() + + async def before_run( + self, + *, + agent: Any, + session: AgentSession | None, + context: SessionContext, + state: dict[str, Any], + ) -> None: + """Inject CodeAct instructions and a run-scoped execute-code tool.""" + run_tool = self._execute_code_tool.create_run_tool() + state[self.source_id] = run_tool.build_serializable_state() + context.extend_instructions(self.source_id, run_tool.build_instructions(tools_visible_to_model=False)) + context.extend_tools(self.source_id, [run_tool]) diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_runner.py b/python/packages/local_codeact/agent_framework_local_codeact/_runner.py new file mode 100644 index 0000000000..b1a644e584 --- /dev/null +++ b/python/packages/local_codeact/agent_framework_local_codeact/_runner.py @@ -0,0 +1,210 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Child-process runner for local CodeAct subprocess mode.""" + +from __future__ import annotations + +import ast +import asyncio +import contextlib +import io +import json +import keyword +import sys +import traceback +from collections.abc import Mapping, Sequence +from typing import Any, TextIO, cast + + +class _CappedTextIO(io.TextIOBase): + def __init__(self, limit: int) -> None: + super().__init__() + self._limit = max(0, limit) + self._buffer = io.StringIO() + self.truncated = False + + def writable(self) -> bool: + return True + + def write(self, value: str) -> int: + text = str(value) + current = self._buffer.tell() + remaining = max(0, self._limit - current) + if remaining: + self._buffer.write(text[:remaining]) + if len(text) > remaining: + self.truncated = True + return len(text) + + def getvalue(self) -> str: + return self._buffer.getvalue() + + +def _json_safe_mapping(value: Mapping[Any, Any]) -> dict[str, object]: + return {str(key): _json_safe(item) for key, item in value.items()} + + +def _json_safe_sequence(value: Sequence[Any]) -> list[object]: + return [_json_safe(item) for item in value] + + +def _json_safe(value: object) -> object: + try: + json.dumps(value) + except (TypeError, ValueError): + if isinstance(value, Mapping): + return _json_safe_mapping(cast("Mapping[Any, Any]", value)) # type: ignore[redundant-cast] + if isinstance(value, (list, tuple)): + return _json_safe_sequence(cast("Sequence[Any]", value)) + return repr(value) + return value + + +def _compile_main(code: str) -> tuple[Any, bool]: + module = ast.parse(code, mode="exec") + body = list(module.body) + output_present = bool(body and isinstance(body[-1], ast.Expr)) + if output_present: + last_expr = body[-1] + if isinstance(last_expr, ast.Expr): + body[-1] = ast.Return(value=last_expr.value) + else: + body.append(ast.Return(value=ast.Constant(value=None))) + + async_function_def = cast(Any, ast.AsyncFunctionDef) + function = async_function_def( + name="__local_codeact_main__", + args=ast.arguments( + posonlyargs=[], + args=[], + kwonlyargs=[], + kw_defaults=[], + defaults=[], + ), + body=body, + decorator_list=[], + returns=None, + type_comment=None, + ) + wrapped = ast.Module(body=[function], type_ignores=[]) + ast.fix_missing_locations(wrapped) + return compile(wrapped, "", "exec"), output_present + + +def _send(control: TextIO, payload: Mapping[str, Any]) -> None: + control.write(json.dumps(payload, separators=(",", ":")) + "\n") + control.flush() + + +async def _read_response(call_id: int) -> dict[str, Any]: + line = await asyncio.to_thread(sys.stdin.readline) + if not line: + raise RuntimeError("Parent process closed the tool bridge.") + response_value: Any = json.loads(line) + if not isinstance(response_value, dict): + raise RuntimeError("Received an invalid tool bridge response.") + response = cast("dict[str, Any]", response_value) + if response.get("call_id") != call_id: + raise RuntimeError("Received an invalid tool bridge response.") + if not response.get("ok"): + exc_type = str(response.get("exc_type") or "RuntimeError") + message = str(response.get("message") or "Tool call failed.") + raise RuntimeError(f"{exc_type}: {message}") + return response + + +def _make_tool(name: str, *, control: TextIO, bridge_lock: asyncio.Lock) -> Any: + async def _tool(**kwargs: Any) -> Any: + return await _call_tool(name, control=control, bridge_lock=bridge_lock, kwargs=kwargs) + + _tool.__name__ = name + return _tool + + +async def _call_tool( + name: str, + *, + control: TextIO, + bridge_lock: asyncio.Lock, + kwargs: Mapping[str, Any], +) -> Any: + call_id = id(kwargs) + async with bridge_lock: + _send( + control, + { + "type": "tool_call", + "call_id": call_id, + "name": name, + "kwargs": _json_safe(dict(kwargs)), + }, + ) + response = await _read_response(call_id) + return response.get("result") + + +async def _execute(request: Mapping[str, Any], control: TextIO) -> dict[str, Any]: + code = str(request.get("code") or "") + stdout = _CappedTextIO(int(request.get("max_stdout_bytes") or 0)) + stderr = _CappedTextIO(int(request.get("max_stderr_bytes") or 0)) + tool_names_value = request.get("tool_names") + tool_names = ( + [str(name) for name in cast("Sequence[Any]", tool_names_value)] if isinstance(tool_names_value, list) else [] + ) + bridge_lock = asyncio.Lock() + + async def call_tool(name: str, **kwargs: Any) -> Any: + return await _call_tool(name, control=control, bridge_lock=bridge_lock, kwargs=kwargs) + + globals_dict: dict[str, Any] = { + "__builtins__": __builtins__, + "asyncio": asyncio, + "call_tool": call_tool, + } + for tool_name in tool_names: + if tool_name.isidentifier() and not keyword.iskeyword(tool_name): + globals_dict[tool_name] = _make_tool(tool_name, control=control, bridge_lock=bridge_lock) + + compiled, output_present = _compile_main(code) + with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr): + exec(compiled, globals_dict, globals_dict) # noqa: S102 - this runner exists to execute generated code. + output = await globals_dict["__local_codeact_main__"]() + + return { + "stdout": stdout.getvalue(), + "stderr": stderr.getvalue(), + "stdout_truncated": stdout.truncated, + "stderr_truncated": stderr.truncated, + "output_present": output_present, + "output": _json_safe(output), + } + + +async def _main() -> int: + control = sys.stdout + line = await asyncio.to_thread(sys.stdin.readline) + if not line: + return 1 + try: + request_value: Any = json.loads(line) + if not isinstance(request_value, dict): + raise ValueError("Expected a JSON object request.") + request = cast("dict[str, Any]", request_value) + result = await _execute(request, control) + _send(control, {"type": "complete", "result": result}) + return 0 + except BaseException as exc: + _send( + control, + { + "type": "error", + "exc_type": type(exc).__name__, + "message": str(exc), + "traceback": traceback.format_exc(limit=20), + }, + ) + return 1 + + +if __name__ == "__main__": + raise SystemExit(asyncio.run(_main())) diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_types.py b/python/packages/local_codeact/agent_framework_local_codeact/_types.py new file mode 100644 index 0000000000..b5494f8955 --- /dev/null +++ b/python/packages/local_codeact/agent_framework_local_codeact/_types.py @@ -0,0 +1,48 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Public types for ``agent-framework-local-codeact``.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Literal, NamedTuple, TypeAlias + +ExecutionMode: TypeAlias = Literal["subprocess", "unsafe_in_process"] +MountMode: TypeAlias = Literal["overlay", "read-only", "read-write"] + + +class FileMount(NamedTuple): + """Describe a directory exposed to generated code by direct path. + + The local CodeAct executor does not provide a virtual filesystem. The + ``mount_path`` is a stable display/capture path used in instructions and + returned file metadata; generated code receives and uses ``host_path`` as a + direct path inside the surrounding sandbox. + """ + + host_path: str | Path + mount_path: str + mode: MountMode = "overlay" + write_bytes_limit: int | None = None + + +FileMountHostPath: TypeAlias = str | Path +FileMountInput: TypeAlias = str | tuple[FileMountHostPath, str] | FileMount + + +@dataclass(frozen=True) +class ProcessExecutionLimits: + """Defense-in-depth limits for local generated-code execution. + + These limits help keep accidental or buggy generated code bounded. They are + not a security boundary and should be paired with external sandboxing. + """ + + timeout_seconds: float = 10.0 + max_code_bytes: int = 64 * 1024 + max_stdout_bytes: int = 64 * 1024 + max_stderr_bytes: int = 64 * 1024 + max_result_bytes: int = 128 * 1024 + max_captured_file_bytes: int = 5 * 1024 * 1024 + max_total_captured_file_bytes: int = 25 * 1024 * 1024 diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_validator.py b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py new file mode 100644 index 0000000000..6bd4fdd809 --- /dev/null +++ b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py @@ -0,0 +1,448 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""AST validation for generated Python code.""" + +from __future__ import annotations + +import ast +from typing import Any + +# Allowed imports that generated code may use. +ALLOWED_IMPORTS: set[str] = { + "asyncio", + "pathlib", + "json", + "math", + "datetime", + "time", + "itertools", + "functools", + "collections", + "typing", + "dataclasses", + "decimal", + "fractions", + "re", + "base64", + "hashlib", + "uuid", + "random", + "os", # Limited to os.environ, os.path - validated via attribute access +} + +# Blocked imports that expose dangerous capabilities. +BLOCKED_IMPORTS: set[str] = { + "sys", + "subprocess", + "socket", + "urllib", + "requests", + "http", + "ftplib", + "smtplib", + "telnetlib", + "multiprocessing", + "threading", + "ctypes", + "shutil", + "tempfile", + "importlib", + "builtins", + "__builtin__", +} + +# Allowed builtin function names that generated code may call. +ALLOWED_BUILTINS: set[str] = { + "print", + "len", + "str", + "int", + "float", + "bool", + "list", + "dict", + "tuple", + "set", + "frozenset", + "range", + "enumerate", + "zip", + "map", + "filter", + "sorted", + "reversed", + "sum", + "min", + "max", + "abs", + "round", + "pow", + "divmod", + "all", + "any", + "chr", + "ord", + "hex", + "oct", + "bin", + "format", + "repr", + "ascii", + "bytes", + "bytearray", + "memoryview", + "isinstance", + "issubclass", + "hasattr", + "getattr", + "setattr", + "callable", + "type", + "id", + "hash", + "next", + "iter", + "slice", +} + +# Blocked builtin function names that expose dangerous capabilities. +BLOCKED_BUILTINS: set[str] = { + "eval", + "exec", + "compile", + "__import__", + "globals", + "locals", + "vars", + "dir", + "open", + "input", + "help", + "breakpoint", + "exit", + "quit", + "copyright", + "credits", + "license", +} + +# Allowed AST node types for code structure and operations. +ALLOWED_AST_NODES: set[type[ast.AST]] = { + ast.Module, + ast.Expr, + ast.Assign, + ast.AugAssign, + ast.AnnAssign, + ast.For, + ast.AsyncFor, + ast.While, + ast.If, + ast.With, + ast.AsyncWith, + ast.Try, + ast.ExceptHandler, + ast.Pass, + ast.Break, + ast.Continue, + ast.Return, + ast.Await, + # Comparisons and boolean operations + ast.Compare, + ast.BoolOp, + ast.UnaryOp, + ast.And, + ast.Or, + ast.Not, + ast.Eq, + ast.NotEq, + ast.Lt, + ast.LtE, + ast.Gt, + ast.GtE, + ast.In, + ast.NotIn, + ast.Is, + ast.IsNot, + ast.UAdd, + ast.USub, + ast.Invert, + # Data access + ast.Name, + ast.Load, + ast.Store, + ast.Del, + ast.Attribute, + ast.Subscript, + ast.Slice, + # Literals + ast.Constant, + ast.List, + ast.Tuple, + ast.Set, + ast.Dict, + # Arithmetic and bitwise operations + ast.BinOp, + ast.Add, + ast.Sub, + ast.Mult, + ast.Div, + ast.Mod, + ast.FloorDiv, + ast.Pow, + ast.LShift, + ast.RShift, + ast.BitOr, + ast.BitXor, + ast.BitAnd, + # Function calls and comprehensions + ast.Call, + ast.keyword, + ast.ListComp, + ast.SetComp, + ast.DictComp, + ast.GeneratorExp, + ast.comprehension, + # Control flow helpers + ast.IfExp, + ast.JoinedStr, + ast.FormattedValue, + # Imports (validated separately) + ast.Import, + ast.ImportFrom, + ast.alias, + # Function definitions (for local helpers) + ast.FunctionDef, + ast.AsyncFunctionDef, + ast.arguments, + ast.arg, + # Lambda expressions + ast.Lambda, + # Match statements (Python 3.10+) + ast.Match, + ast.match_case, + ast.MatchValue, + ast.MatchSingleton, + ast.MatchSequence, + ast.MatchMapping, + ast.MatchClass, + ast.MatchStar, + ast.MatchAs, + ast.MatchOr, + # Starred expressions + ast.Starred, +} + + +class CodeValidationError(ValueError): + """Raised when generated code violates the allow-list policy.""" + + pass + + +class _CodeValidator(ast.NodeVisitor): + """AST visitor that validates generated code against allow-lists.""" + + def __init__( + self, + *, + allowed_imports: set[str] | None = None, + blocked_imports: set[str] | None = None, + allowed_builtins: set[str] | None = None, + blocked_builtins: set[str] | None = None, + ) -> None: + super().__init__() + self._errors: list[str] = [] + self._allowed_imports = allowed_imports if allowed_imports is not None else ALLOWED_IMPORTS + self._blocked_imports = blocked_imports if blocked_imports is not None else BLOCKED_IMPORTS + self._allowed_builtins = allowed_builtins if allowed_builtins is not None else ALLOWED_BUILTINS + self._blocked_builtins = blocked_builtins if blocked_builtins is not None else BLOCKED_BUILTINS + + def validate(self, code: str) -> None: + """Validate code and raise CodeValidationError if it violates policy.""" + try: + tree = ast.parse(code, mode="exec") + except SyntaxError as exc: + raise CodeValidationError(f"Syntax error in generated code: {exc}") from exc + + self._errors = [] + self.visit(tree) + + if self._errors: + raise CodeValidationError( + "Generated code violates allow-list policy:\n" + "\n".join(f"- {err}" for err in self._errors) + ) + + def visit(self, node: ast.AST) -> Any: + """Visit a node and check if its type is allowed.""" + node_type = type(node) + if node_type not in ALLOWED_AST_NODES: + self._errors.append(f"AST node type '{node_type.__name__}' is not allowed") + return None + return super().visit(node) + + def visit_Import(self, node: ast.Import) -> None: + """Validate import statements.""" + for alias_node in node.names: + module_name = alias_node.name.split(".")[0] + if module_name in self._blocked_imports: + self._errors.append(f"Import of '{alias_node.name}' is not allowed (blocked: {module_name})") + elif module_name not in self._allowed_imports: + self._errors.append(f"Import of '{alias_node.name}' is not allowed (not in allow-list)") + self.generic_visit(node) + + def visit_ImportFrom(self, node: ast.ImportFrom) -> None: + """Validate from-import statements.""" + if node.module is None: + self._errors.append("Relative imports are not allowed") + return + + module_name = node.module.split(".")[0] + if module_name in self._blocked_imports: + self._errors.append(f"Import from '{node.module}' is not allowed (blocked: {module_name})") + elif module_name not in self._allowed_imports: + self._errors.append(f"Import from '{node.module}' is not allowed (not in allow-list)") + self.generic_visit(node) + + def visit_Call(self, node: ast.Call) -> None: + """Validate function calls.""" + # Check for blocked builtins + if isinstance(node.func, ast.Name): + func_name = node.func.id + if func_name in self._blocked_builtins: + self._errors.append(f"Call to builtin '{func_name}' is not allowed") + elif func_name not in self._allowed_builtins and func_name != "call_tool": + # Allow user-defined functions and registered tools (validated at runtime) + # We only block known dangerous builtins here + pass + + # Check for attribute access to dangerous methods + if isinstance(node.func, ast.Attribute): + attr_name = node.func.attr + # Block common dangerous attribute methods + if ( + attr_name.startswith("__") + and attr_name.endswith("__") + and attr_name not in {"__init__", "__str__", "__repr__", "__eq__", "__hash__"} + ): + self._errors.append(f"Call to dunder method '{attr_name}' is not allowed") + + self.generic_visit(node) + + def visit_Attribute(self, node: ast.Attribute) -> None: + """Validate attribute access.""" + # Check for dangerous os module operations + if isinstance(node.value, ast.Name) and node.value.id == "os": + # Block dangerous os operations + dangerous_os_attrs = { + "system", + "exec", + "execl", + "execle", + "execlp", + "execlpe", + "execv", + "execve", + "execvp", + "execvpe", + "spawn", + "spawnl", + "spawnle", + "spawnlp", + "spawnlpe", + "spawnv", + "spawnve", + "spawnvp", + "spawnvpe", + "popen", + "popen2", + "popen3", + "popen4", + "fork", + "forkpty", + "kill", + "killpg", + "abort", + "chdir", + "fchdir", + "chroot", + "chmod", + "chown", + "lchown", + "fchmod", + "fchown", + "remove", + "unlink", + "rmdir", + "removedirs", + "rename", + "renames", + "replace", + "link", + "symlink", + "mkdir", + "makedirs", + "access", + "putenv", + "unsetenv", + } + if node.attr in dangerous_os_attrs: + self._errors.append(f"Access to os.{node.attr} is not allowed") + + # Block access to certain dangerous attributes + if ( + node.attr.startswith("__") + and node.attr.endswith("__") + and node.attr + not in { + "__name__", + "__doc__", + "__dict__", + "__class__", + "__module__", + "__file__", + "__init__", + "__str__", + "__repr__", + "__eq__", + "__hash__", + "__len__", + "__iter__", + "__next__", + "__enter__", + "__exit__", + "__aenter__", + "__aexit__", + } + ): + self._errors.append(f"Access to attribute '{node.attr}' is not allowed") + + self.generic_visit(node) + + +def validate_code( + code: str, + *, + allowed_imports: set[str] | None = None, + blocked_imports: set[str] | None = None, + allowed_builtins: set[str] | None = None, + blocked_builtins: set[str] | None = None, +) -> None: + """Validate generated code against AST allow-lists. + + Args: + code: Python source code to validate. + allowed_imports: Custom set of allowed module names (replaces defaults). + blocked_imports: Custom set of blocked module names (replaces defaults). + allowed_builtins: Custom set of allowed builtin names (replaces defaults). + blocked_builtins: Custom set of blocked builtin names (replaces defaults). + + Raises: + CodeValidationError: If the code violates the allow-list policy. + """ + validator = _CodeValidator( + allowed_imports=allowed_imports, + blocked_imports=blocked_imports, + allowed_builtins=allowed_builtins, + blocked_builtins=blocked_builtins, + ) + validator.validate(code) diff --git a/python/packages/local_codeact/agent_framework_local_codeact/py.typed b/python/packages/local_codeact/agent_framework_local_codeact/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/packages/local_codeact/pyproject.toml b/python/packages/local_codeact/pyproject.toml new file mode 100644 index 0000000000..8247d581ba --- /dev/null +++ b/python/packages/local_codeact/pyproject.toml @@ -0,0 +1,103 @@ +[project] +name = "agent-framework-local-codeact" +description = "Local CodeAct integrations for Microsoft Agent Framework." +authors = [{ name = "Microsoft", email = "af-support@microsoft.com"}] +readme = "README.md" +requires-python = ">=3.10" +version = "1.0.0a260526" +license-files = ["LICENSE"] +urls.homepage = "https://aka.ms/agent-framework" +urls.source = "https://github.com/microsoft/agent-framework/tree/main/python" +urls.release_notes = "https://github.com/microsoft/agent-framework/releases?q=tag%3Apython-1&expanded=true" +urls.issues = "https://github.com/microsoft/agent-framework/issues" +classifiers = [ + "License :: OSI Approved :: MIT License", + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Typing :: Typed", +] +dependencies = [ + "agent-framework-core>=1.6.0,<2", +] + +[tool.uv] +prerelease = "if-necessary-or-explicit" +environments = [ + "sys_platform == 'darwin'", + "sys_platform == 'linux'", + "sys_platform == 'win32'" +] + +[tool.uv-dynamic-versioning] +fallback-version = "0.0.0" + +[tool.pytest.ini_options] +testpaths = 'tests' +addopts = "-ra -q -r fEX" +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +filterwarnings = [] +timeout = 120 +markers = [ + "integration: marks tests as integration tests that require external services", +] + +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.per-file-ignores] +"samples/**" = ["CPY", "INP", "T201"] +"tests/**" = ["D", "INP", "TD", "ERA001", "RUF", "S"] + +[tool.coverage.run] +omit = [ + "**/__init__.py" +] + +[tool.pyright] +extends = "../../pyproject.toml" +include = ["agent_framework_local_codeact"] +exclude = ['tests'] + +[tool.mypy] +plugins = ['pydantic.mypy'] +strict = true +python_version = "3.10" +ignore_missing_imports = true +disallow_untyped_defs = true +no_implicit_optional = true +check_untyped_defs = true +warn_return_any = true +show_error_codes = true +warn_unused_ignores = false +disallow_incomplete_defs = true +disallow_untyped_decorators = true + +[tool.bandit] +targets = ["agent_framework_local_codeact"] +exclude_dirs = ["tests"] + +[tool.poe] +executor.type = "uv" +include = "../../shared_tasks.toml" + +[tool.poe.tasks.mypy] +help = "Run MyPy for this package." +cmd = "mypy --config-file $POE_ROOT/pyproject.toml agent_framework_local_codeact" + +[tool.poe.tasks.test] +help = "Run the default unit test suite for this package." +cmd = 'pytest -m "not integration" --cov=agent_framework_local_codeact --cov-report=term-missing:skip-covered tests' + +[tool.flit.module] +name = "agent_framework_local_codeact" + +[build-system] +requires = ["flit-core >= 3.11,<4.0"] +build-backend = "flit_core.buildapi" diff --git a/python/packages/local_codeact/samples/README.md b/python/packages/local_codeact/samples/README.md new file mode 100644 index 0000000000..ff27acae93 --- /dev/null +++ b/python/packages/local_codeact/samples/README.md @@ -0,0 +1,19 @@ +# Local CodeAct samples + +These samples show how to wire `agent-framework-local-codeact` into an Agent +Framework application. + +Local CodeAct runs LLM-generated Python in the agent environment. Use these +patterns only in an externally sandboxed environment such as a Foundry hosted +agent, container, or VM. + +| Sample | Description | +| --- | --- | +| `foundry_hosted_agent.py` | Adds `LocalCodeActProvider` to an agent before wrapping it with `ResponsesHostServer`. | +| `local_execute_code.py` | Invokes `LocalExecuteCodeTool` directly with host tools, explicit environment variables, file mounts, subprocess mode, the Python executable path, and execution limits. | + +Run the local sample from the `python/` directory: + +```bash +uv run --package agent-framework-local-codeact packages/local_codeact/samples/local_execute_code.py +``` diff --git a/python/packages/local_codeact/samples/foundry_hosted_agent.py b/python/packages/local_codeact/samples/foundry_hosted_agent.py new file mode 100644 index 0000000000..7b37846a4d --- /dev/null +++ b/python/packages/local_codeact/samples/foundry_hosted_agent.py @@ -0,0 +1,46 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""This sample demonstrates wiring Local CodeAct into a Foundry hosted agent. + +Local CodeAct executes LLM-generated Python in the local agent environment. Use +it only when the deployment environment supplies the real sandbox, such as a +Foundry hosted-agent container. +""" + +from typing import Any + +from agent_framework import Agent +from agent_framework_foundry_hosting import ResponsesHostServer + +from agent_framework_local_codeact import LocalCodeActProvider, ProcessExecutionLimits + + +def create_model_client() -> Any: + """Return the model client configured for your hosted agent.""" + raise RuntimeError("Configure and return your model client here.") + + +def create_server() -> ResponsesHostServer: + """Create a Foundry Responses host server with Local CodeAct enabled.""" + # 1. Create the local agent and add Local CodeAct as a context provider. + agent = Agent( + client=create_model_client(), + instructions="Use execute_code for Python calculations and controlled host-tool fan-out.", + context_providers=[ + LocalCodeActProvider( + execution_limits=ProcessExecutionLimits(timeout_seconds=5), + ) + ], + ) + + # 2. Wrap the local agent for Foundry Agent Server hosting. + return ResponsesHostServer(agent) + + +if __name__ == "__main__": + create_server() + +""" +Sample output: +Configure and return your model client here. +""" diff --git a/python/packages/local_codeact/samples/local_execute_code.py b/python/packages/local_codeact/samples/local_execute_code.py new file mode 100644 index 0000000000..142a8225d5 --- /dev/null +++ b/python/packages/local_codeact/samples/local_execute_code.py @@ -0,0 +1,115 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import asyncio +import sys +import tempfile +from pathlib import Path + +from agent_framework import Content + +from agent_framework_local_codeact import FileMount, LocalExecuteCodeTool, ProcessExecutionLimits + +"""This sample demonstrates configuring and invoking Local CodeAct without Foundry hosting. + +Local CodeAct executes LLM-generated Python in the local agent environment. This +sample is meant for a disposable sandbox, container, or VM. It shows the +configuration surface directly on `LocalExecuteCodeTool`: host tools, explicit +environment variables, workspace/file mounts, execution limits, and subprocess +execution mode. +""" + + +def convert_usd_to_eur(amount: float) -> dict[str, float]: + """Convert a USD amount with a fixed demonstration exchange rate.""" + return {"usd": amount, "eur": round(amount * 0.92, 2)} + + +def describe_content(content: Content) -> str: + """Return a short printable description for sample output.""" + if content.type == "text": + return f"Text: {content.text}" + if content.type == "data": + return f"Data: {content.additional_properties.get('path')}" + if content.type == "error": + return f"Error: {content.error_details}" + return f"{content.type}: {content}" + + +async def main() -> None: + """Run a local execute-code call with representative configuration.""" + with tempfile.TemporaryDirectory(prefix="local-codeact-sample-") as temp_dir_name: + temp_dir = Path(temp_dir_name) + workspace = temp_dir / "workspace" + output_dir = temp_dir / "output" + workspace.mkdir() + output_dir.mkdir() + (workspace / "amounts.txt").write_text("12.50\n7.50\n", encoding="utf-8") + + # 1. Configure the local execute-code tool. + execute_code = LocalExecuteCodeTool( + tools=[convert_usd_to_eur], + approval_mode="never_require", + workspace_root=workspace, + file_mounts=[ + FileMount( + host_path=output_dir, + mount_path="/output", + mode="read-write", + write_bytes_limit=1024 * 1024, + ) + ], + execution_limits=ProcessExecutionLimits( + timeout_seconds=5, + max_code_bytes=16 * 1024, + max_stdout_bytes=4 * 1024, + max_stderr_bytes=4 * 1024, + max_result_bytes=8 * 1024, + max_captured_file_bytes=1024 * 1024, + max_total_captured_file_bytes=2 * 1024 * 1024, + ), + env={"REPORT_TITLE": "Local CodeAct sample report"}, + execution_mode="subprocess", + python_executable=sys.executable, + ) + + # 2. Execute generated Python. In a real agent run, the model would + # produce this code and call the `execute_code` tool. + code = f""" +import os +from pathlib import Path + +amounts = [ + float(line) + for line in Path({str(workspace / "amounts.txt")!r}).read_text(encoding="utf-8").splitlines() + if line +] +converted = await convert_usd_to_eur(amount=sum(amounts)) + +report_path = Path({str(output_dir / "report.txt")!r}) +report_path.write_text( + f"{{os.environ['REPORT_TITLE']}}\\nUSD: {{converted['usd']}}\\nEUR: {{converted['eur']}}\\n", + encoding="utf-8", +) + +print(os.environ["REPORT_TITLE"]) +converted +""" + + # 3. Print text results and any captured files returned as data content. + results = await execute_code.invoke(arguments={"code": code}) + for content in results: + print(describe_content(content)) + + +if __name__ == "__main__": + asyncio.run(main()) + +""" +Sample output: +Text: Local CodeAct sample report + +Text: {"usd": 20.0, "eur": 18.4} +Data: /output/report.txt +""" diff --git a/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py b/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py new file mode 100644 index 0000000000..1717a73e64 --- /dev/null +++ b/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py @@ -0,0 +1,234 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import base64 +import json +import sys +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock + +import pytest +from agent_framework import Content, FunctionTool, Message, SessionContext + +from agent_framework_local_codeact import ( + FileMount, + LocalCodeActProvider, + LocalExecuteCodeTool, + ProcessExecutionLimits, +) +from agent_framework_local_codeact import _runner as runner_module +from agent_framework_local_codeact._files import normalize_mount_path + +RUNNER_SCRIPT = Path(runner_module.__file__ or "").resolve() + + +def add_tool(a: int, b: int) -> int: + return a + b + + +dangerous_tool = FunctionTool( + name="dangerous_tool", + description="Requires approval.", + approval_mode="always_require", + func=lambda: "ok", +) + + +def _content_texts(contents: list[Content]) -> list[str]: + return [content.text or "" for content in contents if content.type == "text"] + + +def test_tool_construction_defaults() -> None: + local_tool = LocalExecuteCodeTool() + assert local_tool.name == "execute_code" + assert local_tool.approval_mode == "never_require" + assert local_tool.execution_mode == "subprocess" + assert local_tool.python_executable == sys.executable + assert local_tool.runner_script is None + assert local_tool.get_tools() == [] + + +def test_add_remove_clear_tools_round_trip() -> None: + local_tool = LocalExecuteCodeTool() + + local_tool.add_tools([add_tool, dangerous_tool]) + assert [tool.name for tool in local_tool.get_tools()] == ["add_tool", "dangerous_tool"] + assert local_tool.approval_mode == "always_require" + + local_tool.remove_tool("dangerous_tool") + assert [tool.name for tool in local_tool.get_tools()] == ["add_tool"] + assert local_tool.approval_mode == "never_require" + + with pytest.raises(KeyError): + local_tool.remove_tool("missing") + + local_tool.clear_tools() + assert local_tool.get_tools() == [] + + +def test_default_approval_mode_always_require_is_sticky() -> None: + local_tool = LocalExecuteCodeTool(tools=[add_tool], approval_mode="always_require") + assert local_tool.approval_mode == "always_require" + + local_tool.clear_tools() + assert local_tool.approval_mode == "always_require" + + +def test_file_mounts_normalized_and_round_tripped(tmp_path: Path) -> None: + host_a = tmp_path / "a" + host_a.mkdir() + host_b = tmp_path / "b" + host_b.mkdir() + + local_tool = LocalExecuteCodeTool( + file_mounts=[ + str(host_a), + (str(host_b), "/work"), + FileMount(host_path=host_a, mount_path="/data", mode="read-write"), + ], + ) + + mounts = local_tool.get_file_mounts() + by_mount = {mount.mount_path: mount for mount in mounts} + + assert set(by_mount) == {normalize_mount_path(str(host_a)), "/work", "/data"} + assert by_mount["/work"].host_path == host_b.resolve() + assert by_mount["/data"].mode == "read-write" + + +def test_workspace_root_auto_mounts_at_input(tmp_path: Path) -> None: + local_tool = LocalExecuteCodeTool(workspace_root=tmp_path) + state = local_tool.build_serializable_state() + assert any(mount["mount_path"] == "/input" and mount["mode"] == "read-write" for mount in state["file_mounts"]) + + +def test_build_serializable_state_matches_effective_config(tmp_path: Path) -> None: + local_tool = LocalExecuteCodeTool( + tools=[add_tool, dangerous_tool], + workspace_root=tmp_path, + env={"VISIBLE": "yes"}, + execution_limits=ProcessExecutionLimits(timeout_seconds=3), + python_executable=sys.executable, + ) + state = local_tool.build_serializable_state() + assert state["runtime"] == "local_codeact" + assert state["execution_mode"] == "subprocess" + assert state["python_executable"] == sys.executable + assert state["runner_script"] is None + assert state["approval_mode"] == "always_require" + assert set(state["tool_names"]) == {"add_tool", "dangerous_tool"} + assert state["workspace_root"] == str(tmp_path.resolve()) + assert state["execution_limits"]["timeout_seconds"] == 3 + assert state["env_keys"] == ["VISIBLE"] + + +async def test_provider_injects_execute_code_tool_and_instructions() -> None: + provider = LocalCodeActProvider(tools=[add_tool]) + context = SessionContext(input_messages=[Message(role="user", contents=[Content.from_text("hi")])]) + state: dict[str, Any] = {} + + await provider.before_run(agent=MagicMock(), session=None, context=context, state=state) + + assert state["local_codeact"]["tool_names"] == ["add_tool"] + assert any("add_tool" in instruction for instruction in context.instructions) + assert len(context.tools) == 1 + assert isinstance(context.tools[0], LocalExecuteCodeTool) + + +async def test_subprocess_run_code_surfaces_stdout_and_output() -> None: + local_tool = LocalExecuteCodeTool(execution_limits=ProcessExecutionLimits(timeout_seconds=5)) + result = await local_tool._run_code(code="print('hello')\n1 + 2") + + texts = _content_texts(result) + assert any("hello" in text for text in texts) + assert any(text.strip() == "3" for text in texts) + + +async def test_subprocess_run_code_invokes_registered_tool() -> None: + local_tool = LocalExecuteCodeTool(tools=[add_tool], execution_limits=ProcessExecutionLimits(timeout_seconds=5)) + result = await local_tool._run_code(code="await add_tool(a=2, b=3)") + + assert any(text.strip() == "5" for text in _content_texts(result)) + + +async def test_subprocess_call_tool_fallback_invokes_registered_tool() -> None: + local_tool = LocalExecuteCodeTool(tools=[add_tool], execution_limits=ProcessExecutionLimits(timeout_seconds=5)) + result = await local_tool._run_code(code="await call_tool('add_tool', a=4, b=8)") + + assert any(text.strip() == "12" for text in _content_texts(result)) + + +async def test_subprocess_fanout_tool_calls_are_serialized_over_bridge() -> None: + local_tool = LocalExecuteCodeTool(tools=[add_tool], execution_limits=ProcessExecutionLimits(timeout_seconds=5)) + result = await local_tool._run_code( + code="await asyncio.gather(add_tool(a=1, b=2), call_tool('add_tool', a=3, b=4))" + ) + + assert any(json.loads(text) == [3, 7] for text in _content_texts(result) if text.startswith("[")) + + +async def test_subprocess_environment_is_explicit(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("LOCAL_CODEACT_SECRET", "hidden") + local_tool = LocalExecuteCodeTool( + env={"VISIBLE": "yes"}, execution_limits=ProcessExecutionLimits(timeout_seconds=5) + ) + result = await local_tool._run_code( + code="import os\n{'visible': os.environ.get('VISIBLE'), 'secret': os.environ.get('LOCAL_CODEACT_SECRET')}" + ) + + payloads = [json.loads(text) for text in _content_texts(result) if text.startswith("{")] + assert payloads == [{"visible": "yes", "secret": None}] + + +async def test_subprocess_runner_script_executes_by_file_path() -> None: + local_tool = LocalExecuteCodeTool( + runner_script=RUNNER_SCRIPT, + execution_limits=ProcessExecutionLimits(timeout_seconds=5), + ) + result = await local_tool._run_code(code="'script runner'") + + assert local_tool.runner_script == RUNNER_SCRIPT + assert any(text.strip() == '"script runner"' for text in _content_texts(result)) + + +async def test_subprocess_timeout_returns_error_content() -> None: + local_tool = LocalExecuteCodeTool(execution_limits=ProcessExecutionLimits(timeout_seconds=0.2)) + result = await local_tool._run_code(code="import time\ntime.sleep(5)") + + assert len(result) == 1 + assert result[0].type == "error" + assert "exceeded" in (result[0].error_details or "") + + +async def test_file_capture_skips_symlinks_and_returns_written_files(tmp_path: Path) -> None: + mounted = tmp_path / "mounted" + mounted.mkdir() + outside = tmp_path / "outside.txt" + outside.write_text("secret", encoding="utf-8") + (mounted / "link.txt").symlink_to(outside) + + local_tool = LocalExecuteCodeTool( + file_mounts=[FileMount(mounted, "/output", mode="read-write")], + execution_limits=ProcessExecutionLimits(timeout_seconds=5), + ) + result = await local_tool._run_code( + code=f"from pathlib import Path\nPath({str(mounted)!r}, 'out.txt').write_text('hello', encoding='utf-8')" + ) + + data_contents = [content for content in result if content.type == "data"] + assert len(data_contents) == 1 + assert data_contents[0].additional_properties["path"] == "/output/out.txt" + assert data_contents[0].uri is not None + encoded = data_contents[0].uri.split(",", 1)[1] + assert base64.b64decode(encoded) == b"hello" + + +async def test_unsafe_in_process_mode_runs_code() -> None: + local_tool = LocalExecuteCodeTool(execution_mode="unsafe_in_process") + result = await local_tool._run_code(code="print('unsafe')\n'ran'") + + texts = _content_texts(result) + assert any("unsafe" in text for text in texts) + assert any(text.strip() == '"ran"' for text in texts) diff --git a/python/packages/local_codeact/tests/local_codeact/test_validator.py b/python/packages/local_codeact/tests/local_codeact/test_validator.py new file mode 100644 index 0000000000..7ec9f1525a --- /dev/null +++ b/python/packages/local_codeact/tests/local_codeact/test_validator.py @@ -0,0 +1,263 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import pytest + +from agent_framework_local_codeact import LocalExecuteCodeTool +from agent_framework_local_codeact._validator import CodeValidationError, validate_code + + +def test_validate_allows_basic_arithmetic() -> None: + """Basic arithmetic should be allowed.""" + code = "x = 1 + 2\ny = x * 3" + validate_code(code) # Should not raise + + +def test_validate_allows_tool_calls() -> None: + """Tool calls with await should be allowed.""" + code = "result = await my_tool(param='value')" + validate_code(code) # Should not raise + + +def test_validate_allows_call_tool_fallback() -> None: + """The call_tool fallback should be allowed.""" + code = "result = await call_tool('my_tool', param='value')" + validate_code(code) # Should not raise + + +def test_validate_allows_asyncio_gather() -> None: + """asyncio.gather for fan-out should be allowed.""" + code = """import asyncio +results = await asyncio.gather(tool_a(), tool_b())""" + validate_code(code) # Should not raise + + +def test_validate_allows_pathlib_file_operations() -> None: + """Pathlib for file operations should be allowed.""" + code = """from pathlib import Path +content = Path('/input/file.txt').read_text(encoding='utf-8')""" + validate_code(code) # Should not raise + + +def test_validate_allows_print() -> None: + """Print for output should be allowed.""" + code = "print('hello', 'world')" + validate_code(code) # Should not raise + + +def test_validate_allows_json_operations() -> None: + """JSON operations should be allowed.""" + code = """import json +data = json.dumps({'key': 'value'})""" + validate_code(code) # Should not raise + + +def test_validate_allows_comprehensions() -> None: + """List/dict/set comprehensions should be allowed.""" + code = """squares = [x**2 for x in range(10)] +evens = {x for x in range(10) if x % 2 == 0} +mapping = {x: x**2 for x in range(5)}""" + validate_code(code) # Should not raise + + +def test_validate_allows_control_flow() -> None: + """Control flow (if/for/while) should be allowed.""" + code = """for i in range(10): + if i % 2 == 0: + print(i) + else: + continue + +x = 0 +while x < 5: + x += 1""" + validate_code(code) # Should not raise + + +def test_validate_allows_os_environ() -> None: + """Safe os.environ operations should be allowed.""" + code = """import os +value = os.environ.get('KEY', 'default')""" + validate_code(code) # Should not raise + + +def test_validate_allows_os_path() -> None: + """os.path operations should be allowed.""" + code = """import os +joined = os.path.join('/base', 'file.txt')""" + validate_code(code) # Should not raise + + +def test_validate_blocks_eval() -> None: + """eval() should be blocked.""" + code = "result = eval('1 + 1')" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "eval" in str(exc_info.value) + + +def test_validate_blocks_exec() -> None: + """exec() should be blocked.""" + code = "exec('print(1)')" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "exec" in str(exc_info.value) + + +def test_validate_blocks_compile() -> None: + """compile() should be blocked.""" + code = "code_obj = compile('1 + 1', '', 'eval')" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "compile" in str(exc_info.value) + + +def test_validate_blocks_import_subprocess() -> None: + """Subprocess imports should be blocked.""" + code = "import subprocess" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "subprocess" in str(exc_info.value) + + +def test_validate_blocks_import_sys() -> None: + """sys imports should be blocked.""" + code = "import sys" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "sys" in str(exc_info.value) + + +def test_validate_blocks_import_socket() -> None: + """socket imports should be blocked.""" + code = "import socket" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "socket" in str(exc_info.value) + + +def test_validate_blocks_import_requests() -> None: + """requests imports should be blocked.""" + code = "import requests" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "requests" in str(exc_info.value) + + +def test_validate_blocks_unknown_import() -> None: + """Unknown imports should be blocked.""" + code = "import unknown_module" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "unknown_module" in str(exc_info.value) + + +def test_validate_blocks_os_system() -> None: + """os.system() should be blocked.""" + code = """import os +os.system('ls')""" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "os.system" in str(exc_info.value) + + +def test_validate_blocks_os_exec() -> None: + """os.exec* operations should be blocked.""" + code = """import os +os.execv('/bin/ls', ['ls'])""" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "os.execv" in str(exc_info.value) + + +def test_validate_blocks_os_popen() -> None: + """os.popen() should be blocked.""" + code = """import os +pipe = os.popen('ls')""" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "os.popen" in str(exc_info.value) + + +def test_validate_blocks_globals() -> None: + """globals() should be blocked.""" + code = "g = globals()" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "globals" in str(exc_info.value) + + +def test_validate_blocks_locals() -> None: + """locals() should be blocked.""" + code = "l = locals()" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "locals" in str(exc_info.value) + + +def test_validate_blocks_import_magic() -> None: + """__import__() should be blocked.""" + code = "mod = __import__('os')" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "__import__" in str(exc_info.value) + + +def test_validate_custom_allowed_imports() -> None: + """Custom allowed_imports should replace defaults.""" + # csv is not in the default allow-list + code_with_csv = "import csv" + with pytest.raises(CodeValidationError): + validate_code(code_with_csv) + + # But it should work with custom allow-list + custom_allowed = {"csv", "json"} + validate_code(code_with_csv, allowed_imports=custom_allowed) + + +def test_validate_custom_blocked_imports() -> None: + """Custom blocked_imports should replace defaults.""" + # json is normally allowed + code_with_json = "import json" + validate_code(code_with_json) # Should not raise + + # But block it with custom block-list + custom_blocked = {"json"} + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code_with_json, blocked_imports=custom_blocked) + assert "json" in str(exc_info.value) + + +def test_validate_custom_blocked_builtins() -> None: + """Custom blocked_builtins should replace defaults.""" + # len is normally allowed (not in default blocked list) + code_with_len = "x = len([1, 2, 3])" + validate_code(code_with_len) # Should not raise + + # But block it with custom block-list + custom_blocked = {"len"} + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code_with_len, blocked_builtins=custom_blocked) + assert "len" in str(exc_info.value) + + +async def test_tool_with_custom_allowed_imports() -> None: + """LocalExecuteCodeTool should respect custom allowed_imports.""" + # csv is not in default allow-list + tool = LocalExecuteCodeTool(allowed_imports={"csv", "json"}) + result = await tool._run_code(code="import csv\n{'ok': True}") + # Should execute successfully + assert len(result) > 0 + assert not any(c.type == "error" for c in result) + + +async def test_tool_with_custom_blocked_imports() -> None: + """LocalExecuteCodeTool should respect custom blocked_imports.""" + # json is normally allowed + tool = LocalExecuteCodeTool(blocked_imports={"json"}) + result = await tool._run_code(code="import json") + # Should be blocked + assert len(result) == 1 + assert result[0].type == "error" + assert "json" in (result[0].error_details or "") diff --git a/python/pyproject.toml b/python/pyproject.toml index 26e81b49cd..89c4948743 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -87,6 +87,7 @@ agent-framework-gemini = { workspace = true } agent-framework-github-copilot = { workspace = true } agent-framework-hyperlight = { workspace = true } agent-framework-lab = { workspace = true } +agent-framework-local-codeact = { workspace = true } agent-framework-mem0 = { workspace = true } agent-framework-monty = { workspace = true } agent-framework-ollama = { workspace = true } diff --git a/python/uv.lock b/python/uv.lock index dee89c9f0a..03a4d6a087 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -49,6 +49,7 @@ members = [ "agent-framework-github-copilot", "agent-framework-hyperlight", "agent-framework-lab", + "agent-framework-local-codeact", "agent-framework-mem0", "agent-framework-monty", "agent-framework-ollama", @@ -707,6 +708,17 @@ dev = [ { name = "uv", specifier = "==0.11.6" }, ] +[[package]] +name = "agent-framework-local-codeact" +version = "1.0.0a260526" +source = { editable = "packages/local_codeact" } +dependencies = [ + { name = "agent-framework-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, +] + +[package.metadata] +requires-dist = [{ name = "agent-framework-core", editable = "packages/core" }] + [[package]] name = "agent-framework-mem0" version = "1.0.0b260521" From fcae801751169a028a09c56a1a3eb0defd44f637 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Wed, 27 May 2026 09:14:47 +0200 Subject: [PATCH 02/12] fix: Address PR review comments - Remove 'open', 'getattr', 'setattr', 'hasattr' from ALLOWED_BUILTINS (bypass risk) - Add these to BLOCKED_BUILTINS with explanatory comments - Propagate AST validation settings to create_run_tool snapshot - Terminate subprocess before raising on error messages - Move module docstrings to file start in samples - Remove pointless string statements from samples - Document allowed_builtins behavior in visit_Call Fixes all 8 review comments in PR #6091 --- .../agent_framework_local_codeact/_bridge.py | 1 + .../_execute_code_tool.py | 4 +++ .../_validator.py | 26 ++++++++++++------- .../samples/foundry_hosted_agent.py | 5 ---- .../samples/local_execute_code.py | 26 +++++++------------ 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py b/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py index a1b184a4ab..32182edfde 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py @@ -167,6 +167,7 @@ async def _communicate(self, process: asyncio.subprocess.Process, code: str) -> return dict(result_dict) if message_type == "error": details = str(message.get("traceback") or message.get("message") or "Unknown execution error.") + await self._stop_process(process) raise RuntimeError(details) async def _handle_tool_call(self, process: asyncio.subprocess.Process, message: Mapping[str, Any]) -> None: diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py index e8550de51a..66fd96d572 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py @@ -348,6 +348,10 @@ def create_run_tool(self) -> LocalExecuteCodeTool: execution_mode=self._execution_mode, python_executable=self._python_executable, runner_script=self._runner_script, + allowed_imports=self._allowed_imports, + blocked_imports=self._blocked_imports, + allowed_builtins=self._allowed_builtins, + blocked_builtins=self._blocked_builtins, ) def build_serializable_state(self) -> dict[str, Any]: diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_validator.py b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py index 6bd4fdd809..b3fa811753 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_validator.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py @@ -52,6 +52,9 @@ } # Allowed builtin function names that generated code may call. +# Note: getattr/setattr/hasattr/delattr are NOT included because they can bypass +# AST attribute restrictions (e.g., getattr(os, 'system')('...') avoids os.system check). +# User-defined functions and registered tools are allowed at runtime. ALLOWED_BUILTINS: set[str] = { "print", "len", @@ -93,9 +96,6 @@ "memoryview", "isinstance", "issubclass", - "hasattr", - "getattr", - "setattr", "callable", "type", "id", @@ -115,7 +115,7 @@ "locals", "vars", "dir", - "open", + "open", # File I/O must go through pathlib with explicit mounts "input", "help", "breakpoint", @@ -124,6 +124,10 @@ "copyright", "credits", "license", + "delattr", + "getattr", # Can bypass AST attribute checks: getattr(os, 'system') + "setattr", # Can bypass AST attribute checks + "hasattr", # Can probe for dangerous attributes } # Allowed AST node types for code structure and operations. @@ -304,16 +308,20 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None: self.generic_visit(node) def visit_Call(self, node: ast.Call) -> None: - """Validate function calls.""" + """Validate function calls. + + Note: We only validate calls to known builtins against the block-list. + Calls to user-defined functions and registered tools are allowed (validated at runtime). + The allowed_builtins parameter exists for customization but does not enforce + an allow-list by default to permit user code and tools. + """ # Check for blocked builtins if isinstance(node.func, ast.Name): func_name = node.func.id if func_name in self._blocked_builtins: self._errors.append(f"Call to builtin '{func_name}' is not allowed") - elif func_name not in self._allowed_builtins and func_name != "call_tool": - # Allow user-defined functions and registered tools (validated at runtime) - # We only block known dangerous builtins here - pass + # Note: We don't enforce allowed_builtins for Names to allow user-defined + # functions and registered tools. Custom blocked_builtins can restrict specific names. # Check for attribute access to dangerous methods if isinstance(node.func, ast.Attribute): diff --git a/python/packages/local_codeact/samples/foundry_hosted_agent.py b/python/packages/local_codeact/samples/foundry_hosted_agent.py index 7b37846a4d..ea93e07389 100644 --- a/python/packages/local_codeact/samples/foundry_hosted_agent.py +++ b/python/packages/local_codeact/samples/foundry_hosted_agent.py @@ -39,8 +39,3 @@ def create_server() -> ResponsesHostServer: if __name__ == "__main__": create_server() - -""" -Sample output: -Configure and return your model client here. -""" diff --git a/python/packages/local_codeact/samples/local_execute_code.py b/python/packages/local_codeact/samples/local_execute_code.py index 142a8225d5..cfaf993185 100644 --- a/python/packages/local_codeact/samples/local_execute_code.py +++ b/python/packages/local_codeact/samples/local_execute_code.py @@ -1,5 +1,14 @@ # Copyright (c) Microsoft. All rights reserved. +"""This sample demonstrates configuring and invoking Local CodeAct without Foundry hosting. + +Local CodeAct executes LLM-generated Python in the local agent environment. This +sample is meant for a disposable sandbox, container, or VM. It shows the +configuration surface directly on `LocalExecuteCodeTool`: host tools, explicit +environment variables, workspace/file mounts, execution limits, and subprocess +execution mode. +""" + from __future__ import annotations import asyncio @@ -11,15 +20,6 @@ from agent_framework_local_codeact import FileMount, LocalExecuteCodeTool, ProcessExecutionLimits -"""This sample demonstrates configuring and invoking Local CodeAct without Foundry hosting. - -Local CodeAct executes LLM-generated Python in the local agent environment. This -sample is meant for a disposable sandbox, container, or VM. It shows the -configuration surface directly on `LocalExecuteCodeTool`: host tools, explicit -environment variables, workspace/file mounts, execution limits, and subprocess -execution mode. -""" - def convert_usd_to_eur(amount: float) -> dict[str, float]: """Convert a USD amount with a fixed demonstration exchange rate.""" @@ -105,11 +105,3 @@ async def main() -> None: if __name__ == "__main__": asyncio.run(main()) - -""" -Sample output: -Text: Local CodeAct sample report - -Text: {"usd": 20.0, "eur": 18.4} -Data: /output/report.txt -""" From 3f0946f0e8947b1faef6a9a938df2179f9408c9a Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Wed, 27 May 2026 09:16:53 +0200 Subject: [PATCH 03/12] fix: Python 3.10 compatibility for timeout test Python 3.10's TimeoutError has a different string representation than 3.11+. Update test to check for 'TimeoutError' instead of specific message content. Verified on Python 3.10.15 and 3.12.7. --- .../local_codeact/tests/local_codeact/test_local_codeact.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py b/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py index 1717a73e64..a208e3e4f4 100644 --- a/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py +++ b/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py @@ -199,7 +199,9 @@ async def test_subprocess_timeout_returns_error_content() -> None: assert len(result) == 1 assert result[0].type == "error" - assert "exceeded" in (result[0].error_details or "") + # Python 3.10 returns "TimeoutError: ", 3.11+ returns more detail + assert result[0].error_details is not None + assert "TimeoutError" in result[0].error_details async def test_file_capture_skips_symlinks_and_returns_written_files(tmp_path: Path) -> None: From df83f649333d331d3db2a8a022e58e8d05a76592 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Wed, 27 May 2026 14:09:20 +0200 Subject: [PATCH 04/12] Enforce builtin allow-list and silence bandit nosec false-positives - _validator.py: visit_Call now enforces ALLOWED_BUILTINS for names that match real Python builtins, while still treating unknown names as user-defined functions/registered tools. This makes the allowed_builtins parameter behave as a real allow-list. - _bridge.py / _runner.py: add explicit '# nosec' markers next to the existing '# noqa: S102/S404' so bandit accepts the intentional subprocess import and exec() calls (this package's whole purpose). - test_validator.py: add tests for unknown-builtin rejection, user-defined function acceptance, and custom allow-list expansion. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework_local_codeact/_bridge.py | 4 +-- .../agent_framework_local_codeact/_runner.py | 2 +- .../_validator.py | 16 +++++---- .../tests/local_codeact/test_validator.py | 34 +++++++++++++++++++ 4 files changed, 46 insertions(+), 10 deletions(-) diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py b/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py index 32182edfde..080de4b6c0 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_bridge.py @@ -11,7 +11,7 @@ import json import keyword import os -import subprocess # noqa: S404 - subprocess mode is the default execution strategy for this package. +import subprocess # noqa: S404 # nosec B404 - subprocess mode is the default execution strategy for this package. import traceback from collections.abc import Mapping, Sequence from copy import copy @@ -256,7 +256,7 @@ async def call_tool(name: str, **kwargs: Any) -> Any: compiled, output_present = self._compile_main(code) try: with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr): - exec(compiled, globals_dict, globals_dict) # noqa: S102 - explicit unsafe in-process mode. + exec(compiled, globals_dict, globals_dict) # noqa: S102 # nosec B102 - explicit unsafe in-process mode. output = await globals_dict["__local_codeact_main__"]() except Exception: raise RuntimeError(traceback.format_exc(limit=20)) from None diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_runner.py b/python/packages/local_codeact/agent_framework_local_codeact/_runner.py index b1a644e584..4a660cc75d 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_runner.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_runner.py @@ -167,7 +167,7 @@ async def call_tool(name: str, **kwargs: Any) -> Any: compiled, output_present = _compile_main(code) with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr): - exec(compiled, globals_dict, globals_dict) # noqa: S102 - this runner exists to execute generated code. + exec(compiled, globals_dict, globals_dict) # noqa: S102 # nosec B102 - this runner exists to execute generated code. output = await globals_dict["__local_codeact_main__"]() return { diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_validator.py b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py index b3fa811753..5a8d70f8b5 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_validator.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py @@ -5,8 +5,11 @@ from __future__ import annotations import ast +import builtins as _builtins from typing import Any +_PYTHON_BUILTIN_NAMES: frozenset[str] = frozenset(dir(_builtins)) + # Allowed imports that generated code may use. ALLOWED_IMPORTS: set[str] = { "asyncio", @@ -310,18 +313,17 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None: def visit_Call(self, node: ast.Call) -> None: """Validate function calls. - Note: We only validate calls to known builtins against the block-list. - Calls to user-defined functions and registered tools are allowed (validated at runtime). - The allowed_builtins parameter exists for customization but does not enforce - an allow-list by default to permit user code and tools. + For names that match a real Python builtin we enforce both the block-list + and the allow-list. Names that are not builtins are treated as user-defined + functions or registered tools and are allowed (validated at runtime). """ - # Check for blocked builtins if isinstance(node.func, ast.Name): func_name = node.func.id if func_name in self._blocked_builtins: self._errors.append(f"Call to builtin '{func_name}' is not allowed") - # Note: We don't enforce allowed_builtins for Names to allow user-defined - # functions and registered tools. Custom blocked_builtins can restrict specific names. + elif func_name in _PYTHON_BUILTIN_NAMES and func_name not in self._allowed_builtins: + # Real builtin that wasn't explicitly allowed — reject so the allow-list is meaningful. + self._errors.append(f"Call to builtin '{func_name}' is not in the allowed builtins list") # Check for attribute access to dangerous methods if isinstance(node.func, ast.Attribute): diff --git a/python/packages/local_codeact/tests/local_codeact/test_validator.py b/python/packages/local_codeact/tests/local_codeact/test_validator.py index 7ec9f1525a..c2fe732b96 100644 --- a/python/packages/local_codeact/tests/local_codeact/test_validator.py +++ b/python/packages/local_codeact/tests/local_codeact/test_validator.py @@ -89,6 +89,40 @@ def test_validate_allows_os_path() -> None: validate_code(code) # Should not raise +def test_validate_blocks_unknown_python_builtin() -> None: + """A real Python builtin not in the allow-list should be rejected.""" + # `vars` is a real builtin but not in ALLOWED_BUILTINS (it's in BLOCKED_BUILTINS). + # Even without explicit blocking, a real builtin missing from the allow-list must fail. + code = "result = aiter([])" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "aiter" in str(exc_info.value) + + +def test_validate_allows_user_defined_function_call() -> None: + """Names that are not Python builtins are treated as user code/tools and allowed.""" + code = """def my_helper(x): + return x + 1 +result = my_helper(5)""" + validate_code(code) # Should not raise + + +def test_validate_custom_allowed_builtins_permits_extra() -> None: + """Custom allow-list can permit extra builtins like `vars`.""" + code = "result = vars()" + # Default: blocked + with pytest.raises(CodeValidationError): + validate_code(code) + # With custom allow-list including `vars` and removed from blocked list: allowed + from agent_framework_local_codeact._validator import ALLOWED_BUILTINS, BLOCKED_BUILTINS + + validate_code( + code, + allowed_builtins=ALLOWED_BUILTINS | {"vars"}, + blocked_builtins=BLOCKED_BUILTINS - {"vars"}, + ) + + def test_validate_blocks_eval() -> None: """eval() should be blocked.""" code = "result = eval('1 + 1')" From e6a309beedd223d6de044829ad71c1ef5007674e Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Wed, 27 May 2026 14:33:40 +0200 Subject: [PATCH 05/12] Tolerate Windows file locks during workspace cleanup On Windows a freshly-killed subprocess can briefly hold the temporary workspace directory open. Swallow OSError from temp_dir.cleanup() so the caller still receives the proper error Content from the run and so the timeout test passes on Windows. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework_local_codeact/_execute_code_tool.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py index 66fd96d572..5d97d821f6 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py @@ -462,7 +462,13 @@ async def _run_code(self, *, code: str) -> list[Content]: ] finally: if temp_dir is not None: - temp_dir.cleanup() + # Windows: a freshly-killed subprocess can briefly hold the workspace + # directory open. Swallow rmtree failures so callers still get a clean + # error Content for the run. + try: + temp_dir.cleanup() + except OSError: + pass contents = _build_execution_contents(result=result) contents.extend(capture_written_files(mounts, pre_state, limits=self._execution_limits)) From a331ab8c5e152fc2c8b96d28174a4c8548f85dca Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Wed, 27 May 2026 14:41:59 +0200 Subject: [PATCH 06/12] Use contextlib.suppress for Windows cleanup race Ruff SIM105 prefers contextlib.suppress over try/except/pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework_local_codeact/_execute_code_tool.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py index 5d97d821f6..08600339e5 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py @@ -4,6 +4,7 @@ from __future__ import annotations +import contextlib import json import sys import tempfile @@ -465,10 +466,8 @@ async def _run_code(self, *, code: str) -> list[Content]: # Windows: a freshly-killed subprocess can briefly hold the workspace # directory open. Swallow rmtree failures so callers still get a clean # error Content for the run. - try: + with contextlib.suppress(OSError): temp_dir.cleanup() - except OSError: - pass contents = _build_execution_contents(result=result) contents.extend(capture_written_files(mounts, pre_state, limits=self._execution_limits)) From a660df246527e3b335b2bb393513d4ff79f52bf8 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Wed, 27 May 2026 15:13:10 +0200 Subject: [PATCH 07/12] Use ignore_cleanup_errors on Windows workspace tempdir The previous OSError-only suppression missed the RecursionError that Python's TemporaryDirectory cleanup can raise on Windows when a freshly killed subprocess still holds a handle to the workspace. Pass ignore_cleanup_errors=True (Python 3.10+) so the platform stops retrying rmtree, and broaden the outer suppression so unexpected cleanup errors do not mask the actual run result. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../_execute_code_tool.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py index 08600339e5..4c0caeec75 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py @@ -437,7 +437,10 @@ async def _run_code(self, *, code: str) -> list[Content]: if self._workspace_root is not None: cwd = str(self._workspace_root) elif self._execution_mode == "subprocess": - temp_dir = tempfile.TemporaryDirectory(prefix="local-codeact-") + # ignore_cleanup_errors handles the Windows race where a freshly-killed + # subprocess may still hold a file handle in the workspace; without it + # tempfile's recursive retry can hit RecursionError on Windows. + temp_dir = tempfile.TemporaryDirectory(prefix="local-codeact-", ignore_cleanup_errors=True) cwd = temp_dir.name try: @@ -463,10 +466,11 @@ async def _run_code(self, *, code: str) -> list[Content]: ] finally: if temp_dir is not None: - # Windows: a freshly-killed subprocess can briefly hold the workspace - # directory open. Swallow rmtree failures so callers still get a clean - # error Content for the run. - with contextlib.suppress(OSError): + # Best-effort cleanup; TemporaryDirectory(ignore_cleanup_errors=True) + # absorbs Windows file-lock errors. Swallow anything else (e.g. the + # RecursionError some Python versions raise inside their cleanup retry) + # so the caller still receives the proper error Content. + with contextlib.suppress(Exception): temp_dir.cleanup() contents = _build_execution_contents(result=result) From bf55826326572b7aa9f6d1fe5137642b0cc73205 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Thu, 28 May 2026 09:31:20 +0200 Subject: [PATCH 08/12] Make foundry_hosted_agent sample a runnable CodeAct example Model the sample after the hyperlight_codeact container sample: register compute and fetch_data as sandbox-only host tools on LocalCodeActProvider, wire a FoundryChatClient-backed agent, and serve via ResponsesHostServer. Update samples README with the new run/request instructions. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../packages/local_codeact/samples/README.md | 17 ++- .../samples/foundry_hosted_agent.py | 113 ++++++++++++++---- 2 files changed, 107 insertions(+), 23 deletions(-) diff --git a/python/packages/local_codeact/samples/README.md b/python/packages/local_codeact/samples/README.md index ff27acae93..ab1d3da8ef 100644 --- a/python/packages/local_codeact/samples/README.md +++ b/python/packages/local_codeact/samples/README.md @@ -9,7 +9,7 @@ agent, container, or VM. | Sample | Description | | --- | --- | -| `foundry_hosted_agent.py` | Adds `LocalCodeActProvider` to an agent before wrapping it with `ResponsesHostServer`. | +| `foundry_hosted_agent.py` | Hosts a `FoundryChatClient`-backed agent with `LocalCodeActProvider` behind `ResponsesHostServer`. Registers `compute` and `fetch_data` as sandbox-only host tools the model reaches via `call_tool(...)` from inside `execute_code`. | | `local_execute_code.py` | Invokes `LocalExecuteCodeTool` directly with host tools, explicit environment variables, file mounts, subprocess mode, the Python executable path, and execution limits. | Run the local sample from the `python/` directory: @@ -17,3 +17,18 @@ Run the local sample from the `python/` directory: ```bash uv run --package agent-framework-local-codeact packages/local_codeact/samples/local_execute_code.py ``` + +Run the Foundry hosted-agent sample (requires `FOUNDRY_PROJECT_ENDPOINT` and +`AZURE_AI_MODEL_DEPLOYMENT_NAME`, plus `az login` for `DefaultAzureCredential`): + +```bash +uv run --package agent-framework-local-codeact packages/local_codeact/samples/foundry_hosted_agent.py +``` + +Then send a request: + +```bash +curl -X POST http://localhost:8088/responses \ + -H "Content-Type: application/json" \ + -d '{"input": "Fetch all users, find the admins, multiply 7 by 6, and print the users, admins and multiplication result. Use execute_code with call_tool(...)."}' +``` diff --git a/python/packages/local_codeact/samples/foundry_hosted_agent.py b/python/packages/local_codeact/samples/foundry_hosted_agent.py index ea93e07389..2a4bf83823 100644 --- a/python/packages/local_codeact/samples/foundry_hosted_agent.py +++ b/python/packages/local_codeact/samples/foundry_hosted_agent.py @@ -1,41 +1,110 @@ # Copyright (c) Microsoft. All rights reserved. -"""This sample demonstrates wiring Local CodeAct into a Foundry hosted agent. +"""Hosts an Agent Framework agent with Local CodeAct behind the Foundry Responses protocol. -Local CodeAct executes LLM-generated Python in the local agent environment. Use -it only when the deployment environment supplies the real sandbox, such as a -Foundry hosted-agent container. +Local CodeAct runs LLM-generated Python in the agent process. Run this only +inside an externally sandboxed environment (Foundry hosted-agent container, +disposable VM, or similar). Two host tools (``compute``, ``fetch_data``) are +registered on ``LocalCodeActProvider`` and are reachable from inside the +generated code via ``call_tool(...)``; the model itself only sees a single +``execute_code`` tool. + +Required environment variables: + +- ``FOUNDRY_PROJECT_ENDPOINT`` -- Azure AI Foundry project endpoint. +- ``AZURE_AI_MODEL_DEPLOYMENT_NAME`` -- Model deployment name. + +See ``samples/README.md`` for run and request instructions. """ -from typing import Any +from __future__ import annotations + +import asyncio +import os +from typing import Annotated, Any, Literal -from agent_framework import Agent +from agent_framework import Agent, tool +from agent_framework.foundry import FoundryChatClient from agent_framework_foundry_hosting import ResponsesHostServer +from azure.identity import DefaultAzureCredential +from dotenv import load_dotenv from agent_framework_local_codeact import LocalCodeActProvider, ProcessExecutionLimits +load_dotenv() -def create_model_client() -> Any: - """Return the model client configured for your hosted agent.""" - raise RuntimeError("Configure and return your model client here.") +@tool(approval_mode="never_require") +def compute( + operation: Annotated[ + Literal["add", "subtract", "multiply", "divide"], + "Math operation: add, subtract, multiply, or divide.", + ], + a: Annotated[float, "First numeric operand."], + b: Annotated[float, "Second numeric operand."], +) -> float: + """Perform a math operation for sandboxed code.""" + operations = { + "add": a + b, + "subtract": a - b, + "multiply": a * b, + "divide": a / b if b else float("inf"), + } + return operations[operation] -def create_server() -> ResponsesHostServer: - """Create a Foundry Responses host server with Local CodeAct enabled.""" - # 1. Create the local agent and add Local CodeAct as a context provider. - agent = Agent( - client=create_model_client(), - instructions="Use execute_code for Python calculations and controlled host-tool fan-out.", - context_providers=[ - LocalCodeActProvider( - execution_limits=ProcessExecutionLimits(timeout_seconds=5), - ) + +@tool(approval_mode="never_require") +async def fetch_data( + table: Annotated[str, "Name of the simulated table to query."], +) -> list[dict[str, Any]]: + """Fetch records from a named table.""" + await asyncio.sleep(0.5) + data: dict[str, list[dict[str, Any]]] = { + "users": [ + {"id": 1, "name": "Alice", "role": "admin"}, + {"id": 2, "name": "Bob", "role": "user"}, + {"id": 3, "name": "Charlie", "role": "admin"}, + ], + "products": [ + {"id": 101, "name": "Widget", "price": 9.99}, + {"id": 102, "name": "Gadget", "price": 19.99}, ], + } + return data.get(table, []) + + +def main() -> None: + """Run the Local CodeAct agent behind a Foundry Responses host server.""" + # 1. Create the Foundry chat client. + client = FoundryChatClient( + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"], + credential=DefaultAzureCredential(), + function_invocation_configuration={"include_detailed_errors": True}, + ) + + # 2. Register sandbox tools on a Local CodeAct provider. The model only sees + # `execute_code`; `compute` and `fetch_data` are reachable from inside + # the generated Python via `call_tool(...)`. + codeact = LocalCodeActProvider( + tools=[compute, fetch_data], + approval_mode="never_require", + execution_limits=ProcessExecutionLimits(timeout_seconds=5), + ) + + # 3. Build the agent. History is managed by the hosting infrastructure, so + # request the model not to persist server-side conversation state. + agent = Agent( + client=client, + instructions="You are a helpful assistant. Keep your answers brief.", + context_providers=[codeact], + default_options={"store": False}, ) - # 2. Wrap the local agent for Foundry Agent Server hosting. - return ResponsesHostServer(agent) + # 4. Serve the agent over the Foundry Responses protocol. + server = ResponsesHostServer(agent) + server.run() if __name__ == "__main__": - create_server() + main() From e6627366af90dc415ca517a7195c9204c1121e11 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Thu, 28 May 2026 09:33:22 +0200 Subject: [PATCH 09/12] Point Local CodeAct foundry sample at shared hosted-agent setup Add a note to the sample docstring and the samples README directing readers to python/samples/04-hosting/foundry-hosted-agents/responses for the surrounding Foundry hosted-agent environment, Dockerfile, manifest, and deployment workflow. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/packages/local_codeact/samples/README.md | 8 ++++++-- .../local_codeact/samples/foundry_hosted_agent.py | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/python/packages/local_codeact/samples/README.md b/python/packages/local_codeact/samples/README.md index ab1d3da8ef..c2cb3d41a1 100644 --- a/python/packages/local_codeact/samples/README.md +++ b/python/packages/local_codeact/samples/README.md @@ -9,7 +9,7 @@ agent, container, or VM. | Sample | Description | | --- | --- | -| `foundry_hosted_agent.py` | Hosts a `FoundryChatClient`-backed agent with `LocalCodeActProvider` behind `ResponsesHostServer`. Registers `compute` and `fetch_data` as sandbox-only host tools the model reaches via `call_tool(...)` from inside `execute_code`. | +| `foundry_hosted_agent.py` | Hosts a `FoundryChatClient`-backed agent with `LocalCodeActProvider` behind `ResponsesHostServer`. Registers `compute` and `fetch_data` as sandbox-only host tools the model reaches via `call_tool(...)` from inside `execute_code`. Use it together with the shared Foundry hosted-agent setup in [`python/samples/04-hosting/foundry-hosted-agents/responses`](../../../../samples/04-hosting/foundry-hosted-agents/responses) for the Dockerfile, manifest, and deployment workflow used by the other Responses-based hosted agents. | | `local_execute_code.py` | Invokes `LocalExecuteCodeTool` directly with host tools, explicit environment variables, file mounts, subprocess mode, the Python executable path, and execution limits. | Run the local sample from the `python/` directory: @@ -19,7 +19,11 @@ uv run --package agent-framework-local-codeact packages/local_codeact/samples/lo ``` Run the Foundry hosted-agent sample (requires `FOUNDRY_PROJECT_ENDPOINT` and -`AZURE_AI_MODEL_DEPLOYMENT_NAME`, plus `az login` for `DefaultAzureCredential`): +`AZURE_AI_MODEL_DEPLOYMENT_NAME`, plus `az login` for `DefaultAzureCredential`). +Use it together with the shared Foundry hosted-agent setup in +[`python/samples/04-hosting/foundry-hosted-agents/responses`](../../../../samples/04-hosting/foundry-hosted-agents/responses) +for the Dockerfile, manifest, and deployment workflow used by the other +Responses-based hosted agents: ```bash uv run --package agent-framework-local-codeact packages/local_codeact/samples/foundry_hosted_agent.py diff --git a/python/packages/local_codeact/samples/foundry_hosted_agent.py b/python/packages/local_codeact/samples/foundry_hosted_agent.py index 2a4bf83823..5b26528325 100644 --- a/python/packages/local_codeact/samples/foundry_hosted_agent.py +++ b/python/packages/local_codeact/samples/foundry_hosted_agent.py @@ -2,6 +2,10 @@ """Hosts an Agent Framework agent with Local CodeAct behind the Foundry Responses protocol. +Use this sample alongside the shared Foundry hosted-agent setup documented in +[`python/samples/04-hosting/foundry-hosted-agents/responses`](https://github.com/microsoft/agent-framework/tree/main/python/samples/04-hosting/foundry-hosted-agents/responses) +for the surrounding environment, Dockerfile, manifest, and deployment steps. + Local CodeAct runs LLM-generated Python in the agent process. Run this only inside an externally sandboxed environment (Foundry hosted-agent container, disposable VM, or similar). Two host tools (``compute``, ``fetch_data``) are From 9f40659b594428a367a6cb531747c97f55a4b0fe Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Thu, 28 May 2026 09:35:46 +0200 Subject: [PATCH 10/12] Fix relative link to foundry-hosted-agents responses folder Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/packages/local_codeact/samples/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/packages/local_codeact/samples/README.md b/python/packages/local_codeact/samples/README.md index c2cb3d41a1..b8938be667 100644 --- a/python/packages/local_codeact/samples/README.md +++ b/python/packages/local_codeact/samples/README.md @@ -9,7 +9,7 @@ agent, container, or VM. | Sample | Description | | --- | --- | -| `foundry_hosted_agent.py` | Hosts a `FoundryChatClient`-backed agent with `LocalCodeActProvider` behind `ResponsesHostServer`. Registers `compute` and `fetch_data` as sandbox-only host tools the model reaches via `call_tool(...)` from inside `execute_code`. Use it together with the shared Foundry hosted-agent setup in [`python/samples/04-hosting/foundry-hosted-agents/responses`](../../../../samples/04-hosting/foundry-hosted-agents/responses) for the Dockerfile, manifest, and deployment workflow used by the other Responses-based hosted agents. | +| `foundry_hosted_agent.py` | Hosts a `FoundryChatClient`-backed agent with `LocalCodeActProvider` behind `ResponsesHostServer`. Registers `compute` and `fetch_data` as sandbox-only host tools the model reaches via `call_tool(...)` from inside `execute_code`. Use it together with the shared Foundry hosted-agent setup in [`python/samples/04-hosting/foundry-hosted-agents/responses`](../../../samples/04-hosting/foundry-hosted-agents/responses) for the Dockerfile, manifest, and deployment workflow used by the other Responses-based hosted agents. | | `local_execute_code.py` | Invokes `LocalExecuteCodeTool` directly with host tools, explicit environment variables, file mounts, subprocess mode, the Python executable path, and execution limits. | Run the local sample from the `python/` directory: @@ -21,7 +21,7 @@ uv run --package agent-framework-local-codeact packages/local_codeact/samples/lo Run the Foundry hosted-agent sample (requires `FOUNDRY_PROJECT_ENDPOINT` and `AZURE_AI_MODEL_DEPLOYMENT_NAME`, plus `az login` for `DefaultAzureCredential`). Use it together with the shared Foundry hosted-agent setup in -[`python/samples/04-hosting/foundry-hosted-agents/responses`](../../../../samples/04-hosting/foundry-hosted-agents/responses) +[`python/samples/04-hosting/foundry-hosted-agents/responses`](../../../samples/04-hosting/foundry-hosted-agents/responses) for the Dockerfile, manifest, and deployment workflow used by the other Responses-based hosted agents: From 18e27744a756a52966f82fffced01d4973c4b244 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Thu, 28 May 2026 10:08:28 +0200 Subject: [PATCH 11/12] fix(local-codeact): enforce os.* allow-list and harden mount capture Address Copilot review: the validator's deny-list for os.* attributes was broader than the documented contract ('os.environ and os.path only') so attributes like os.listdir, os.open, and os.getcwd slipped through. Replace the deny-list with an allow-list of {environ, path} threaded through validator -> tool -> provider via a new allowed_os_attrs parameter. Harden virtual mount-path handling so a mount cannot be tricked into surfacing protected host data: - resolve_existing_directory rejects symlinked mount roots so a mount whose host_path is itself a symlink cannot expose another directory. - iter_real_files skips hardlinks (st_nlink > 1) and requires every entry's resolved path to stay under the mount root, defeating ln-based hardlink-into-mount and junction-style escapes. Update README to document the virtual-mount-paths-are-labels contract, the os.* allow-list, and the capture-time defenses. Add tests covering os.listdir/os.open/os.getcwd rejection, allowed_os_attrs override, hardlink skipping, and symlinked-mount-root rejection. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/packages/local_codeact/README.md | 31 +++++++- .../_execute_code_tool.py | 8 ++ .../agent_framework_local_codeact/_files.py | 41 +++++++++- .../_provider.py | 4 + .../_validator.py | 74 +++++-------------- .../tests/local_codeact/test_local_codeact.py | 36 +++++++++ .../tests/local_codeact/test_validator.py | 43 +++++++++++ 7 files changed, 174 insertions(+), 63 deletions(-) diff --git a/python/packages/local_codeact/README.md b/python/packages/local_codeact/README.md index dabda508ac..f5c79549fa 100644 --- a/python/packages/local_codeact/README.md +++ b/python/packages/local_codeact/README.md @@ -109,6 +109,31 @@ and output capture; they are not an OS-level filesystem policy. Only files under `read-write` mounts are captured after execution. +### Virtual mount paths are display labels + +Each mount has a `mount_path` (for example `/input`, `/output`) that is used +**only** for instructions to the model and for tagging captured files in the +response. The subprocess executes against the real host filesystem and does +not see a chrooted virtual path. Filesystem isolation comes from the outer +sandbox (for example a Foundry hosted-agent container); within that sandbox +the validator and capture rules below provide defense-in-depth so that +generated code cannot redirect a virtual mount label to data outside the +configured host directory: + +- Mount roots may not themselves be symbolic links — a symlinked `host_path` + is rejected so it cannot silently point at another directory. +- Symbolic links inside a mount (file or directory) are skipped during + capture, so a symlink created at runtime cannot leak content from outside + the mount root. +- Hard links inside a mount are skipped during capture — a hardlink whose + inode lives outside the mount (for example `ln /etc/passwd + /input/loot.txt`) cannot surface protected host data through the mount. +- During capture every entry's resolved path is required to stay under the + mount root, so OS features that bypass `is_symlink()` (junctions, bind + mounts) still cannot escape. +- `mount_path` is normalized to reject `..` segments so the virtual path + cannot be crafted to traverse out of the mount in captured-file metadata. + ## Python interpreter and runner Subprocess mode launches Python as: @@ -146,8 +171,10 @@ Generated code is validated against AST allow-lists before execution: `threading`, `multiprocessing`, and others. - **Blocked builtins**: `eval`, `exec`, `compile`, `__import__`, `globals`, `locals`, `open`, and others. -- **Blocked os operations**: `os.system`, `os.exec*`, `os.popen`, `os.fork`, - file system modifications outside configured mounts, and others. +- **`os` attribute allow-list**: only `os.environ` and `os.path` are permitted. + Every other `os.` access (`os.system`, `os.exec*`, `os.popen`, + `os.fork`, `os.listdir`, `os.open`, `os.getcwd`, file-system mutations, etc.) + is rejected. Override via `allowed_os_attrs` to opt in to a different set. Validation errors are returned as `Content.from_error` with details about which operations are not allowed. This is defense-in-depth only and does not make diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py index 4c0caeec75..7450fbb704 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_execute_code_tool.py @@ -79,6 +79,7 @@ def _validate_code( blocked_imports: set[str] | None = None, allowed_builtins: set[str] | None = None, blocked_builtins: set[str] | None = None, + allowed_os_attrs: set[str] | None = None, ) -> None: if not isinstance(code, str): raise TypeError("code must be a string.") @@ -94,6 +95,7 @@ def _validate_code( blocked_imports=blocked_imports, allowed_builtins=allowed_builtins, blocked_builtins=blocked_builtins, + allowed_os_attrs=allowed_os_attrs, ) @@ -192,6 +194,7 @@ def __init__( blocked_imports: set[str] | None = None, allowed_builtins: set[str] | None = None, blocked_builtins: set[str] | None = None, + allowed_os_attrs: set[str] | None = None, ) -> None: """Initialize a local execute-code tool. @@ -209,6 +212,8 @@ def __init__( blocked_imports: Custom blocked imports (replaces defaults). allowed_builtins: Custom allowed builtins (replaces defaults). blocked_builtins: Custom blocked builtins (replaces defaults). + allowed_os_attrs: Custom allowed ``os`` attribute names (replaces the + default ``{"environ", "path"}`` allow-list). """ super().__init__( name=EXECUTE_CODE_TOOL_NAME, @@ -234,6 +239,7 @@ def __init__( self._blocked_imports = blocked_imports self._allowed_builtins = allowed_builtins self._blocked_builtins = blocked_builtins + self._allowed_os_attrs = allowed_os_attrs if tools is not None: self.add_tools(tools) if file_mounts is not None: @@ -353,6 +359,7 @@ def create_run_tool(self) -> LocalExecuteCodeTool: blocked_imports=self._blocked_imports, allowed_builtins=self._allowed_builtins, blocked_builtins=self._blocked_builtins, + allowed_os_attrs=self._allowed_os_attrs, ) def build_serializable_state(self) -> dict[str, Any]: @@ -424,6 +431,7 @@ async def _run_code(self, *, code: str) -> list[Content]: blocked_imports=self._blocked_imports, allowed_builtins=self._allowed_builtins, blocked_builtins=self._blocked_builtins, + allowed_os_attrs=self._allowed_os_attrs, ) except (TypeError, ValueError, CodeValidationError) as exc: return [Content.from_error(message="Invalid code", error_details=str(exc))] diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_files.py b/python/packages/local_codeact/agent_framework_local_codeact/_files.py index 061efc0107..130556a71d 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_files.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_files.py @@ -31,8 +31,18 @@ def normalize_mount_path(mount_path: str) -> str: def resolve_existing_directory(value: str | Path) -> Path: - """Resolve a path and require it to point at an existing directory.""" - resolved = Path(value).expanduser().resolve(strict=True) + """Resolve a path and require it to point at an existing real directory. + + Symlinks at the mount root are rejected: a mount whose host_path is itself + a symlink could silently expose a directory outside the intended location + (for example a ``/tmp/foo`` symlink pointing at ``/etc``). Callers must + supply concrete directories so the surface visible to generated code is + the surface the host actually approved. + """ + raw = Path(value).expanduser() + if raw.is_symlink(): + raise ValueError(f"Path {value!r} must not be a symbolic link.") + resolved = raw.resolve(strict=True) if not resolved.is_dir(): raise ValueError(f"Path {value!r} must point to an existing directory.") return resolved @@ -78,8 +88,25 @@ def normalize_file_mount(file_mount: FileMountInput) -> FileMount: def iter_real_files(root: Path) -> Iterator[Path]: - """Walk ``root`` recursively, yielding only real non-symlink files.""" - stack: list[Path] = [root] + """Walk ``root`` recursively, yielding only real non-symlink files. + + Defenses against generated code trying to surface protected host data via + a virtual mount path: + + * Symlinks (file or directory) are skipped so they cannot redirect the + walk to content outside the mount. + * Hardlinks (``st_nlink > 1``) are skipped because a hardlink inside the + mount can point at an inode whose canonical path is outside the mount + (for example ``ln /etc/passwd /input/loot.txt``). + * Every entry's resolved path is required to stay under ``root`` so that + junctions, bind mounts, or any other filesystem feature that ``is_symlink`` + does not flag cannot escape the mount boundary. + """ + try: + root_resolved = root.resolve(strict=True) + except OSError: + return + stack: list[Path] = [root_resolved] while stack: current = stack.pop() try: @@ -90,9 +117,15 @@ def iter_real_files(root: Path) -> Iterator[Path]: try: if entry.is_symlink(): continue + resolved = entry.resolve(strict=False) + if not resolved.is_relative_to(root_resolved): + continue if entry.is_dir(): stack.append(entry) elif entry.is_file(): + stat = entry.lstat() + if stat.st_nlink > 1: + continue yield entry except OSError: continue diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_provider.py b/python/packages/local_codeact/agent_framework_local_codeact/_provider.py index 05a53c8fa8..4d41745aae 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_provider.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_provider.py @@ -37,6 +37,7 @@ def __init__( blocked_imports: set[str] | None = None, allowed_builtins: set[str] | None = None, blocked_builtins: set[str] | None = None, + allowed_os_attrs: set[str] | None = None, ) -> None: """Initialize a local CodeAct context provider. @@ -55,6 +56,8 @@ def __init__( blocked_imports: Custom blocked imports (replaces defaults). allowed_builtins: Custom allowed builtins (replaces defaults). blocked_builtins: Custom blocked builtins (replaces defaults). + allowed_os_attrs: Custom allowed ``os`` attribute names (replaces the + default ``{"environ", "path"}`` allow-list). """ super().__init__(source_id) self._execute_code_tool = LocalExecuteCodeTool( @@ -71,6 +74,7 @@ def __init__( blocked_imports=blocked_imports, allowed_builtins=allowed_builtins, blocked_builtins=blocked_builtins, + allowed_os_attrs=allowed_os_attrs, ) def add_tools( diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_validator.py b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py index 5a8d70f8b5..2972934fad 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_validator.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py @@ -54,6 +54,12 @@ "__builtin__", } +# Allowed `os` attribute names. Generated code may only touch `os.environ` and +# `os.path`; everything else (file I/O, process control, mutating helpers, etc.) +# is rejected by default. Users may pass a custom allow-list via +# ``allowed_os_attrs`` on the validator entry points. +ALLOWED_OS_ATTRS: set[str] = {"environ", "path"} + # Allowed builtin function names that generated code may call. # Note: getattr/setattr/hasattr/delattr are NOT included because they can bypass # AST attribute restrictions (e.g., getattr(os, 'system')('...') avoids os.system check). @@ -256,6 +262,7 @@ def __init__( blocked_imports: set[str] | None = None, allowed_builtins: set[str] | None = None, blocked_builtins: set[str] | None = None, + allowed_os_attrs: set[str] | None = None, ) -> None: super().__init__() self._errors: list[str] = [] @@ -263,6 +270,7 @@ def __init__( self._blocked_imports = blocked_imports if blocked_imports is not None else BLOCKED_IMPORTS self._allowed_builtins = allowed_builtins if allowed_builtins is not None else ALLOWED_BUILTINS self._blocked_builtins = blocked_builtins if blocked_builtins is not None else BLOCKED_BUILTINS + self._allowed_os_attrs = allowed_os_attrs if allowed_os_attrs is not None else ALLOWED_OS_ATTRS def validate(self, code: str) -> None: """Validate code and raise CodeValidationError if it violates policy.""" @@ -340,63 +348,11 @@ def visit_Call(self, node: ast.Call) -> None: def visit_Attribute(self, node: ast.Attribute) -> None: """Validate attribute access.""" - # Check for dangerous os module operations - if isinstance(node.value, ast.Name) and node.value.id == "os": - # Block dangerous os operations - dangerous_os_attrs = { - "system", - "exec", - "execl", - "execle", - "execlp", - "execlpe", - "execv", - "execve", - "execvp", - "execvpe", - "spawn", - "spawnl", - "spawnle", - "spawnlp", - "spawnlpe", - "spawnv", - "spawnve", - "spawnvp", - "spawnvpe", - "popen", - "popen2", - "popen3", - "popen4", - "fork", - "forkpty", - "kill", - "killpg", - "abort", - "chdir", - "fchdir", - "chroot", - "chmod", - "chown", - "lchown", - "fchmod", - "fchown", - "remove", - "unlink", - "rmdir", - "removedirs", - "rename", - "renames", - "replace", - "link", - "symlink", - "mkdir", - "makedirs", - "access", - "putenv", - "unsetenv", - } - if node.attr in dangerous_os_attrs: - self._errors.append(f"Access to os.{node.attr} is not allowed") + # Enforce the `os` attribute allow-list. Anything outside `ALLOWED_OS_ATTRS` + # (file I/O, process control, mutating helpers, etc.) is rejected so the + # validator matches the documented `os.environ` / `os.path`-only contract. + if isinstance(node.value, ast.Name) and node.value.id == "os" and node.attr not in self._allowed_os_attrs: + self._errors.append(f"Access to os.{node.attr} is not allowed") # Block access to certain dangerous attributes if ( @@ -436,6 +392,7 @@ def validate_code( blocked_imports: set[str] | None = None, allowed_builtins: set[str] | None = None, blocked_builtins: set[str] | None = None, + allowed_os_attrs: set[str] | None = None, ) -> None: """Validate generated code against AST allow-lists. @@ -445,6 +402,8 @@ def validate_code( blocked_imports: Custom set of blocked module names (replaces defaults). allowed_builtins: Custom set of allowed builtin names (replaces defaults). blocked_builtins: Custom set of blocked builtin names (replaces defaults). + allowed_os_attrs: Custom set of allowed ``os`` attribute names + (replaces the default ``{"environ", "path"}`` allow-list). Raises: CodeValidationError: If the code violates the allow-list policy. @@ -454,5 +413,6 @@ def validate_code( blocked_imports=blocked_imports, allowed_builtins=allowed_builtins, blocked_builtins=blocked_builtins, + allowed_os_attrs=allowed_os_attrs, ) validator.validate(code) diff --git a/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py b/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py index a208e3e4f4..6204e677c1 100644 --- a/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py +++ b/python/packages/local_codeact/tests/local_codeact/test_local_codeact.py @@ -227,6 +227,42 @@ async def test_file_capture_skips_symlinks_and_returns_written_files(tmp_path: P assert base64.b64decode(encoded) == b"hello" +@pytest.mark.skipif(sys.platform.startswith("win"), reason="POSIX hardlink semantics required.") +async def test_file_capture_skips_hardlinks_to_outside_files(tmp_path: Path) -> None: + """A hardlink inside the mount must not surface a file whose canonical path is outside it.""" + import os + + mounted = tmp_path / "mounted" + mounted.mkdir() + secret = tmp_path / "secret.txt" + secret.write_text("classified", encoding="utf-8") + os.link(secret, mounted / "loot.txt") + + local_tool = LocalExecuteCodeTool( + file_mounts=[FileMount(mounted, "/output", mode="read-write")], + execution_limits=ProcessExecutionLimits(timeout_seconds=5), + ) + result = await local_tool._run_code( + code=f"from pathlib import Path\nPath({str(mounted)!r}, 'visible.txt').write_text('ok', encoding='utf-8')" + ) + + data_contents = [content for content in result if content.type == "data"] + paths = [content.additional_properties["path"] for content in data_contents] + assert "/output/visible.txt" in paths + assert all("loot.txt" not in path for path in paths) + + +def test_symlinked_mount_root_is_rejected(tmp_path: Path) -> None: + """A symlink at the mount root must be rejected so it cannot expose another directory.""" + real_dir = tmp_path / "real" + real_dir.mkdir() + link = tmp_path / "link" + link.symlink_to(real_dir) + + with pytest.raises(ValueError, match="symbolic link"): + LocalExecuteCodeTool(workspace_root=link) + + async def test_unsafe_in_process_mode_runs_code() -> None: local_tool = LocalExecuteCodeTool(execution_mode="unsafe_in_process") result = await local_tool._run_code(code="print('unsafe')\n'ran'") diff --git a/python/packages/local_codeact/tests/local_codeact/test_validator.py b/python/packages/local_codeact/tests/local_codeact/test_validator.py index c2fe732b96..7d1793ca5f 100644 --- a/python/packages/local_codeact/tests/local_codeact/test_validator.py +++ b/python/packages/local_codeact/tests/local_codeact/test_validator.py @@ -214,6 +214,49 @@ def test_validate_blocks_os_popen() -> None: assert "os.popen" in str(exc_info.value) +def test_validate_blocks_os_listdir() -> None: + """os.listdir is not in the default allow-list ({environ, path}) and must be rejected.""" + code = """import os +entries = os.listdir('/etc')""" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "os.listdir" in str(exc_info.value) + + +def test_validate_blocks_os_open() -> None: + """os.open bypasses pathlib mounts and must be rejected by the allow-list.""" + code = """import os +fd = os.open('/etc/passwd', 0)""" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "os.open" in str(exc_info.value) + + +def test_validate_blocks_os_getcwd() -> None: + """Any os.* attribute outside {environ, path} must be rejected by the allow-list.""" + code = """import os +cwd = os.getcwd()""" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code) + assert "os.getcwd" in str(exc_info.value) + + +def test_validate_custom_allowed_os_attrs() -> None: + """Custom allowed_os_attrs replaces the default {environ, path} allow-list.""" + code = """import os +entries = os.listdir('/tmp')""" + # Default policy rejects. + with pytest.raises(CodeValidationError): + validate_code(code) + # Caller can opt in to a broader allow-list. + validate_code(code, allowed_os_attrs={"environ", "path", "listdir"}) + # And opting in to a narrower allow-list still rejects environ. + code_env = "import os\nv = os.environ.get('K')" + with pytest.raises(CodeValidationError) as exc_info: + validate_code(code_env, allowed_os_attrs={"path"}) + assert "os.environ" in str(exc_info.value) + + def test_validate_blocks_globals() -> None: """globals() should be blocked.""" code = "g = globals()" From 861945c84b6b1e9aad01f57c9432efe1c33f595b Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Thu, 28 May 2026 17:07:16 +0200 Subject: [PATCH 12/12] fix(local-codeact): enforce os.* allow-list on from-imports Mirror the .NET PR fix: visit_ImportFrom now applies the os attribute allow-list to `from os import X` statements so names like `system`, `getcwd`, `open` cannot bypass the visit_Attribute restriction. The behavior continues to honor a caller-supplied allowed_os_attrs override. Adds tests covering the rejection path, mixed allowed/disallowed names, the happy path for default-allowed names, and the custom allow-list extension applying to from-imports too. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../_validator.py | 6 +++++ .../tests/local_codeact/test_validator.py | 26 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/python/packages/local_codeact/agent_framework_local_codeact/_validator.py b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py index 2972934fad..9b9a3d7101 100644 --- a/python/packages/local_codeact/agent_framework_local_codeact/_validator.py +++ b/python/packages/local_codeact/agent_framework_local_codeact/_validator.py @@ -316,6 +316,12 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None: self._errors.append(f"Import from '{node.module}' is not allowed (blocked: {module_name})") elif module_name not in self._allowed_imports: self._errors.append(f"Import from '{node.module}' is not allowed (not in allow-list)") + elif module_name == "os": + # Mirror the os.* attribute allow-list for ``from os import X``, + # otherwise ``from os import system`` would bypass visit_Attribute. + for alias_node in node.names: + if alias_node.name not in self._allowed_os_attrs: + self._errors.append(f"Import from 'os' of '{alias_node.name}' is not allowed") self.generic_visit(node) def visit_Call(self, node: ast.Call) -> None: diff --git a/python/packages/local_codeact/tests/local_codeact/test_validator.py b/python/packages/local_codeact/tests/local_codeact/test_validator.py index 7d1793ca5f..ccb1e2ace4 100644 --- a/python/packages/local_codeact/tests/local_codeact/test_validator.py +++ b/python/packages/local_codeact/tests/local_codeact/test_validator.py @@ -257,6 +257,32 @@ def test_validate_custom_allowed_os_attrs() -> None: assert "os.environ" in str(exc_info.value) +def test_validate_blocks_from_os_import_system() -> None: + """`from os import system` must be rejected — the os.* allow-list applies to from-imports too.""" + with pytest.raises(CodeValidationError) as exc_info: + validate_code("from os import system") + assert "Import from 'os' of 'system'" in str(exc_info.value) + + +def test_validate_blocks_from_os_import_mixed() -> None: + """When `from os import` lists multiple names, only disallowed names are rejected.""" + with pytest.raises(CodeValidationError) as exc_info: + validate_code("from os import environ, system") + msg = str(exc_info.value) + assert "Import from 'os' of 'system'" in msg + assert "of 'environ'" not in msg + + +def test_validate_allows_from_os_import_allowed_names() -> None: + """Allowed names (environ, path) can still be from-imported.""" + validate_code("from os import environ, path\nx = environ.get('HOME')") + + +def test_validate_custom_allowed_os_attrs_applies_to_from_import() -> None: + """An expanded allowed_os_attrs lets a name be imported via `from os import ...`.""" + validate_code("from os import listdir", allowed_os_attrs={"environ", "path", "listdir"}) + + def test_validate_blocks_globals() -> None: """globals() should be blocked.""" code = "g = globals()"