From 992628d322cc888c3c0e9d8a8a602dfa64045221 Mon Sep 17 00:00:00 2001 From: Amit Patil Date: Tue, 19 May 2026 18:53:51 -0700 Subject: [PATCH 1/5] feat: add Superserve sandbox backend --- examples/sandbox/extensions/README.md | 45 + .../sandbox/extensions/superserve_runner.py | 246 ++++++ pyproject.toml | 1 + src/agents/extensions/sandbox/__init__.py | 26 + .../extensions/sandbox/superserve/__init__.py | 31 + .../extensions/sandbox/superserve/sandbox.py | 806 ++++++++++++++++++ tests/extensions/sandbox/test_superserve.py | 760 +++++++++++++++++ 7 files changed, 1915 insertions(+) create mode 100644 examples/sandbox/extensions/superserve_runner.py create mode 100644 src/agents/extensions/sandbox/superserve/__init__.py create mode 100644 src/agents/extensions/sandbox/superserve/sandbox.py create mode 100644 tests/extensions/sandbox/test_superserve.py diff --git a/examples/sandbox/extensions/README.md b/examples/sandbox/extensions/README.md index 837d9dfa28..b9a27b2de4 100644 --- a/examples/sandbox/extensions/README.md +++ b/examples/sandbox/extensions/README.md @@ -243,6 +243,51 @@ export DAYTONA_API_KEY=... uv run python examples/sandbox/extensions/daytona/daytona_runner.py --stream ``` +## Superserve + +### Setup + +Install the repo extra: + +```bash +uv sync --extra superserve +``` + +Create a Superserve account at , generate an API key, and export +the required environment variables: + +```bash +export OPENAI_API_KEY=... +export SUPERSERVE_API_KEY=... +``` + +To target staging instead of production, also set: + +```bash +export SUPERSERVE_BASE_URL=https://api-staging.superserve.ai +``` + +### Run + +```bash +uv run python examples/sandbox/extensions/superserve_runner.py --stream +``` + +Useful flags: + +- `--template superserve/python-3.11` — use a different curated template (others: + `superserve/base`, `superserve/node-22`, `superserve/code-interpreter`, + `superserve/python-ml`, `superserve/claude-code`). Team-owned template UUIDs also work. +- `--pause-on-exit` — pause the sandbox on shutdown instead of killing it. Superserve sandboxes + never die on their own by default, so this lets you reconnect with `SuperserveSandboxClient.resume` + later without recreating workspace state. +- `--timeout-seconds 300` — opt into an inactivity timeout (off by default). +- `--skip-snapshot-check` — skip the pause/resume snapshot round-trip verification. + +Pause/resume is a first-class part of the Superserve API surface, so the example exercises both +the standard create→exec→shutdown flow and the explicit +`pause → serialize state → resume → read` round-trip. + ## Runloop ### Setup diff --git a/examples/sandbox/extensions/superserve_runner.py b/examples/sandbox/extensions/superserve_runner.py new file mode 100644 index 0000000000..f462f4ef16 --- /dev/null +++ b/examples/sandbox/extensions/superserve_runner.py @@ -0,0 +1,246 @@ +""" +Minimal Superserve-backed sandbox example for manual validation. + +This example mirrors the other cloud extension runners: it creates a tiny workspace, asks a +sandboxed agent to inspect it through one shell tool, prints a short answer, and verifies that +pause/resume preserves workspace state. +""" + +from __future__ import annotations + +import argparse +import asyncio +import io +import os +import sys +import tempfile +from pathlib import Path +from typing import cast + +from openai.types.responses import ResponseTextDeltaEvent + +from agents import ModelSettings, Runner +from agents.run import RunConfig +from agents.sandbox import LocalSnapshotSpec, Manifest, SandboxAgent, SandboxRunConfig +from agents.sandbox.session import BaseSandboxSession + +if __package__ is None or __package__ == "": + sys.path.insert(0, str(Path(__file__).resolve().parents[3])) + +from examples.sandbox.misc.example_support import text_manifest +from examples.sandbox.misc.workspace_shell import WorkspaceShellCapability + +try: + from agents.extensions.sandbox import ( + DEFAULT_SUPERSERVE_WORKSPACE_ROOT, + SuperserveSandboxClient, + SuperserveSandboxClientOptions, + ) +except Exception as exc: # pragma: no cover - import path depends on optional extras + raise SystemExit( + "Superserve sandbox examples require the optional repo extra.\n" + "Install it with: uv sync --extra superserve" + ) from exc + + +DEFAULT_QUESTION = "Summarize this cloud sandbox workspace in 2 sentences." +DEFAULT_TEMPLATE = "superserve/base" +SNAPSHOT_CHECK_PATH = Path("snapshot-check.txt") +SNAPSHOT_CHECK_CONTENT = "superserve snapshot round-trip ok\n" + + +def _build_manifest() -> Manifest: + manifest = text_manifest( + { + "README.md": ( + "# Superserve Demo Workspace\n\n" + "This workspace exists to validate the Superserve sandbox backend manually.\n" + ), + "renewal.md": ( + "# Renewal Notes\n\n" + "- Customer: Northwind Health.\n" + "- Renewal date: 2026-04-15.\n" + "- Risk: unresolved SSO setup.\n" + ), + "next_steps.md": ( + "# Next steps\n\n" + "1. Finish the SSO fix.\n" + "2. Confirm legal language before procurement review.\n" + ), + } + ) + return Manifest(root=DEFAULT_SUPERSERVE_WORKSPACE_ROOT, entries=manifest.entries) + + +def _require_env(name: str) -> None: + if os.environ.get(name): + return + raise SystemExit(f"{name} must be set before running this example.") + + +async def _read_text(session: BaseSandboxSession, path: Path) -> str: + data = await session.read(path) + text = cast(str | bytes, data.read()) + if isinstance(text, bytes): + return text.decode("utf-8") + return text + + +async def _verify_stop_resume( + *, + template: str, + pause_on_exit: bool, + timeout_seconds: int | None, +) -> None: + client = SuperserveSandboxClient() + manifest = _build_manifest() + with tempfile.TemporaryDirectory(prefix="superserve-snapshot-example-") as snapshot_dir: + sandbox = await client.create( + manifest=manifest, + snapshot=LocalSnapshotSpec(base_path=Path(snapshot_dir)), + options=SuperserveSandboxClientOptions( + template=template, + pause_on_exit=pause_on_exit, + timeout_seconds=timeout_seconds, + ), + ) + + try: + await sandbox.start() + await sandbox.write( + SNAPSHOT_CHECK_PATH, + io.BytesIO(SNAPSHOT_CHECK_CONTENT.encode("utf-8")), + ) + await sandbox.stop() + finally: + await sandbox.shutdown() + + resumed = await client.resume(sandbox.state) + try: + await resumed.start() + restored = await _read_text(resumed, SNAPSHOT_CHECK_PATH) + if restored != SNAPSHOT_CHECK_CONTENT: + raise RuntimeError( + "Snapshot resume verification failed: " + f"expected {SNAPSHOT_CHECK_CONTENT!r}, got {restored!r}" + ) + finally: + await resumed.aclose() + + print("snapshot round-trip ok") + + +async def main( + *, + model: str, + question: str, + template: str, + pause_on_exit: bool, + timeout_seconds: int | None, + stream: bool, + skip_snapshot_check: bool, +) -> None: + _require_env("OPENAI_API_KEY") + _require_env("SUPERSERVE_API_KEY") + + if not skip_snapshot_check: + await _verify_stop_resume( + template=template, + pause_on_exit=pause_on_exit, + timeout_seconds=timeout_seconds, + ) + + manifest = _build_manifest() + agent = SandboxAgent( + name="Superserve Sandbox Assistant", + model=model, + instructions=( + "Answer questions about the sandbox workspace. Inspect the files before answering " + "and keep the response concise. " + "Do not invent files or statuses that are not present in the workspace. Cite the " + "file names you inspected." + ), + default_manifest=manifest, + capabilities=[WorkspaceShellCapability()], + model_settings=ModelSettings(tool_choice="required"), + ) + + client = SuperserveSandboxClient() + run_config = RunConfig( + sandbox=SandboxRunConfig( + client=client, + options=SuperserveSandboxClientOptions( + template=template, + pause_on_exit=pause_on_exit, + timeout_seconds=timeout_seconds, + ), + ), + workflow_name="Superserve sandbox example", + ) + + if not stream: + result = await Runner.run(agent, question, run_config=run_config) + print(result.final_output) + return + + stream_result = Runner.run_streamed(agent, question, run_config=run_config) + saw_text_delta = False + async for event in stream_result.stream_events(): + if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent): + if not saw_text_delta: + print("assistant> ", end="", flush=True) + saw_text_delta = True + print(event.data.delta, end="", flush=True) + + if saw_text_delta: + print() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") + parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") + parser.add_argument( + "--template", + default=DEFAULT_TEMPLATE, + help=( + "Superserve template name or UUID. Defaults to `superserve/base`. " + "Other curated templates: superserve/python-3.11, superserve/node-22, " + "superserve/code-interpreter, superserve/python-ml, superserve/claude-code." + ), + ) + parser.add_argument( + "--pause-on-exit", + action="store_true", + default=False, + help="Pause the Superserve sandbox on shutdown instead of killing it.", + ) + parser.add_argument( + "--timeout-seconds", + type=int, + default=None, + help=( + "Optional inactivity timeout in seconds. Superserve sandboxes do not die on their own " + "by default; set this to opt into automatic shutdown." + ), + ) + parser.add_argument("--stream", action="store_true", default=False, help="Stream the response.") + parser.add_argument( + "--skip-snapshot-check", + action="store_true", + default=False, + help="Skip the pause/resume snapshot round-trip verification.", + ) + args = parser.parse_args() + + asyncio.run( + main( + model=args.model, + question=args.question, + template=args.template, + pause_on_exit=args.pause_on_exit, + timeout_seconds=args.timeout_seconds, + stream=args.stream, + skip_snapshot_check=args.skip_snapshot_check, + ) + ) diff --git a/pyproject.toml b/pyproject.toml index 4d0122049f..7563a3baf7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ cloudflare = ["aiohttp>=3.12,<4"] e2b = ["e2b==2.20.0", "e2b-code-interpreter==2.4.1"] modal = ["modal==1.3.5"] runloop = ["runloop_api_client>=1.16.0,<2.0.0"] +superserve = ["superserve>=0.7.0,<1"] vercel = ["vercel>=0.5.6,<0.6"] s3 = ["boto3>=1.34"] temporal = [ diff --git a/src/agents/extensions/sandbox/__init__.py b/src/agents/extensions/sandbox/__init__.py index d7b082ba1f..e560f5cf20 100644 --- a/src/agents/extensions/sandbox/__init__.py +++ b/src/agents/extensions/sandbox/__init__.py @@ -97,6 +97,20 @@ except Exception: # pragma: no cover _HAS_RUNLOOP = False +try: + from .superserve import ( + DEFAULT_SUPERSERVE_WORKSPACE_ROOT as DEFAULT_SUPERSERVE_WORKSPACE_ROOT, + SuperserveSandboxClient as SuperserveSandboxClient, + SuperserveSandboxClientOptions as SuperserveSandboxClientOptions, + SuperserveSandboxSession as SuperserveSandboxSession, + SuperserveSandboxSessionState as SuperserveSandboxSessionState, + SuperserveSandboxTimeouts as SuperserveSandboxTimeouts, + ) + + _HAS_SUPERSERVE = True +except Exception: # pragma: no cover + _HAS_SUPERSERVE = False + try: from .vercel import ( VercelSandboxClient as VercelSandboxClient, @@ -177,6 +191,18 @@ ] ) +if _HAS_SUPERSERVE: + __all__.extend( + [ + "DEFAULT_SUPERSERVE_WORKSPACE_ROOT", + "SuperserveSandboxClient", + "SuperserveSandboxClientOptions", + "SuperserveSandboxSession", + "SuperserveSandboxSessionState", + "SuperserveSandboxTimeouts", + ] + ) + if _HAS_VERCEL: __all__.extend( [ diff --git a/src/agents/extensions/sandbox/superserve/__init__.py b/src/agents/extensions/sandbox/superserve/__init__.py new file mode 100644 index 0000000000..380c278fe4 --- /dev/null +++ b/src/agents/extensions/sandbox/superserve/__init__.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from ....sandbox.errors import ( + ExecTimeoutError, + ExecTransportError, + WorkspaceArchiveReadError, + WorkspaceArchiveWriteError, + WorkspaceReadNotFoundError, +) +from .sandbox import ( + DEFAULT_SUPERSERVE_WORKSPACE_ROOT, + SuperserveSandboxClient, + SuperserveSandboxClientOptions, + SuperserveSandboxSession, + SuperserveSandboxSessionState, + SuperserveSandboxTimeouts, +) + +__all__ = [ + "DEFAULT_SUPERSERVE_WORKSPACE_ROOT", + "ExecTimeoutError", + "ExecTransportError", + "SuperserveSandboxClient", + "SuperserveSandboxClientOptions", + "SuperserveSandboxSession", + "SuperserveSandboxSessionState", + "SuperserveSandboxTimeouts", + "WorkspaceArchiveReadError", + "WorkspaceArchiveWriteError", + "WorkspaceReadNotFoundError", +] diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py new file mode 100644 index 0000000000..c5973bb349 --- /dev/null +++ b/src/agents/extensions/sandbox/superserve/sandbox.py @@ -0,0 +1,806 @@ +""" +Superserve sandbox (https://superserve.ai) implementation. + +This module provides a Superserve-backed sandbox client/session implementation backed by +`superserve.AsyncSandbox`. + +The `superserve` dependency is optional, so package-level exports should guard imports of this +module. Within this module, Superserve SDK imports happen lazily so users without the extra can +still import the package. +""" + +from __future__ import annotations + +import asyncio +import io +import logging +import uuid +from pathlib import Path +from typing import Any, Literal, cast + +from pydantic import BaseModel, Field + +from ....sandbox.errors import ( + ConfigurationError, + ErrorCode, + ExecNonZeroError, + ExecTimeoutError, + ExecTransportError, + ExposedPortUnavailableError, + WorkspaceArchiveReadError, + WorkspaceArchiveWriteError, + WorkspaceReadNotFoundError, + WorkspaceStartError, + WorkspaceWriteTypeError, +) +from ....sandbox.manifest import Manifest +from ....sandbox.session import SandboxSession, SandboxSessionState +from ....sandbox.session.base_sandbox_session import BaseSandboxSession +from ....sandbox.session.dependencies import Dependencies +from ....sandbox.session.manager import Instrumentation +from ....sandbox.session.mount_lifecycle import with_ephemeral_mounts_removed +from ....sandbox.session.runtime_helpers import RESOLVE_WORKSPACE_PATH_HELPER, RuntimeHelperScript +from ....sandbox.session.sandbox_client import BaseSandboxClient, BaseSandboxClientOptions +from ....sandbox.snapshot import SnapshotBase, SnapshotSpec, resolve_snapshot +from ....sandbox.types import ExecResult, ExposedPortEndpoint, User +from ....sandbox.util.retry import ( + exception_chain_contains_type, + exception_chain_has_status_code, + retry_async, +) +from ....sandbox.util.tar_utils import UnsafeTarMemberError, validate_tar_bytes +from ....sandbox.workspace_paths import ( + coerce_posix_path, + posix_path_as_path, + sandbox_path_str, +) + +DEFAULT_SUPERSERVE_WORKSPACE_ROOT = "/workspace" +DEFAULT_SUPERSERVE_TEMPLATE = "superserve/base" +_DEFAULT_MANIFEST_ROOT = cast(str, Manifest.model_fields["root"].default) +_SUPERSERVE_TRANSIENT_STATUS_CODES: frozenset[int] = frozenset({408, 425, 429, 500, 502, 503, 504}) + +logger = logging.getLogger(__name__) + + +def _import_superserve_sdk() -> tuple[Any, Any]: + """Lazily import Superserve SDK classes, raising a clear error if missing.""" + try: + from superserve import AsyncSandbox, NetworkConfig + + return AsyncSandbox, NetworkConfig + except ImportError as exc: + raise ImportError( + "SuperserveSandboxClient requires the optional `superserve` dependency.\n" + "Install the Superserve extra before using this sandbox backend." + ) from exc + + +def _import_superserve_errors() -> dict[str, type[BaseException]]: + """Best-effort import of Superserve exception classes for fine-grained mapping.""" + try: + from superserve import ( + AuthenticationError, + ConflictError, + NotFoundError, + SandboxError, + SandboxTimeoutError, + ServerError, + ValidationError, + ) + except Exception: + return {} + return { + "base": SandboxError, + "authentication": AuthenticationError, + "validation": ValidationError, + "not_found": NotFoundError, + "conflict": ConflictError, + "timeout": SandboxTimeoutError, + "server": ServerError, + } + + +def _provider_error_detail(error: BaseException) -> str | None: + message = str(error) + status = getattr(error, "status_code", None) + code = getattr(error, "code", None) + parts: list[str] = [] + if isinstance(status, int): + parts.append(f"HTTP {status}") + if isinstance(code, str) and code: + parts.append(code) + if message: + parts.append(message) + if not parts: + return type(error).__name__ + return ": ".join(parts) + + +def _is_transient_error(exc: BaseException) -> bool: + return exception_chain_has_status_code( + exc, _SUPERSERVE_TRANSIENT_STATUS_CODES + ) or exception_chain_contains_type(exc, (asyncio.TimeoutError,)) + + +def _resolve_manifest_root(manifest: Manifest | None) -> Manifest: + if manifest is None: + return Manifest(root=DEFAULT_SUPERSERVE_WORKSPACE_ROOT) + if manifest.root == _DEFAULT_MANIFEST_ROOT: + return manifest.model_copy(update={"root": DEFAULT_SUPERSERVE_WORKSPACE_ROOT}) + return manifest + + +def _resolve_template(value: str | None) -> str: + return value or DEFAULT_SUPERSERVE_TEMPLATE + + +class SuperserveSandboxTimeouts(BaseModel): + """Timeout configuration for Superserve sandbox operations (seconds).""" + + model_config = {"frozen": True} + + exec_timeout_unbounded_s: int = Field(default=24 * 60 * 60, ge=1) + keepalive_s: int = Field(default=10, ge=1) + cleanup_s: int = Field(default=30, ge=1) + fast_op_s: int = Field(default=30, ge=1) + file_upload_s: int = Field(default=300, ge=1) + file_download_s: int = Field(default=300, ge=1) + workspace_tar_s: int = Field(default=300, ge=1) + + +class SuperserveSandboxClientOptions(BaseSandboxClientOptions): + """Client options for the Superserve sandbox backend.""" + + type: Literal["superserve"] = "superserve" + template: str | None = None + name: str | None = None + env_vars: dict[str, str] | None = None + metadata: dict[str, str] | None = None + network: dict[str, object] | None = None + timeout_seconds: int | None = None + pause_on_exit: bool = False + api_key: str | None = None + base_url: str | None = None + exposed_ports: tuple[int, ...] = () + timeouts: SuperserveSandboxTimeouts | dict[str, object] | None = None + + def __init__( + self, + template: str | None = None, + name: str | None = None, + env_vars: dict[str, str] | None = None, + metadata: dict[str, str] | None = None, + network: dict[str, object] | None = None, + timeout_seconds: int | None = None, + pause_on_exit: bool = False, + api_key: str | None = None, + base_url: str | None = None, + exposed_ports: tuple[int, ...] = (), + timeouts: SuperserveSandboxTimeouts | dict[str, object] | None = None, + *, + type: Literal["superserve"] = "superserve", + ) -> None: + super().__init__( + type=type, + template=template, + name=name, + env_vars=env_vars, + metadata=metadata, + network=network, + timeout_seconds=timeout_seconds, + pause_on_exit=pause_on_exit, + api_key=api_key, + base_url=base_url, + exposed_ports=exposed_ports, + timeouts=timeouts, + ) + + +class SuperserveSandboxSessionState(SandboxSessionState): + """Serializable state for a Superserve-backed session.""" + + type: Literal["superserve"] = "superserve" + sandbox_id: str + template: str = DEFAULT_SUPERSERVE_TEMPLATE + name: str | None = None + base_env_vars: dict[str, str] = Field(default_factory=dict) + base_metadata: dict[str, str] = Field(default_factory=dict) + base_network: dict[str, object] | None = None + timeout_seconds: int | None = None + pause_on_exit: bool = False + base_url: str | None = None + api_key: str | None = None + timeouts: SuperserveSandboxTimeouts = Field(default_factory=SuperserveSandboxTimeouts) + + +class SuperserveSandboxSession(BaseSandboxSession): + """SandboxSession implementation backed by a Superserve sandbox.""" + + state: SuperserveSandboxSessionState + _sandbox: Any | None + + def __init__( + self, + *, + state: SuperserveSandboxSessionState, + sandbox: Any | None = None, + ) -> None: + self.state = state + self._sandbox = sandbox + + @classmethod + def from_state( + cls, + state: SuperserveSandboxSessionState, + *, + sandbox: Any | None = None, + ) -> SuperserveSandboxSession: + return cls(state=state, sandbox=sandbox) + + @property + def sandbox_id(self) -> str: + return self.state.sandbox_id + + def supports_pty(self) -> bool: + return False + + def _reject_user_arg( + self, *, op: Literal["exec", "read", "write"], user: str | User + ) -> None: + user_name = user.name if isinstance(user, User) else user + raise ConfigurationError( + message=( + "SuperserveSandboxSession does not support sandbox-local users; " + f"`{op}` must be called without `user`" + ), + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op=op, + context={"backend": "superserve", "user": user_name}, + ) + + def _prepare_exec_command( + self, + *command: str | Path, + shell: bool | list[str], + user: str | User | None, + ) -> list[str]: + if user is not None: + self._reject_user_arg(op="exec", user=user) + return super()._prepare_exec_command(*command, shell=shell, user=user) + + async def _validate_path_access(self, path: Path | str, *, for_write: bool = False) -> Path: + return await self._validate_remote_path_access(path, for_write=for_write) + + def _runtime_helpers(self) -> tuple[RuntimeHelperScript, ...]: + return (RESOLVE_WORKSPACE_PATH_HELPER,) + + async def _resolved_envs(self) -> dict[str, str]: + manifest_envs = await self.state.manifest.environment.resolve() + resolved: dict[str, str] = {} + for key, value in {**self.state.base_env_vars, **manifest_envs}.items(): + if value is None: + continue + resolved[key] = value + return resolved + + async def _ensure_sandbox(self) -> Any: + sandbox = self._sandbox + if sandbox is not None: + return sandbox + + AsyncSandbox, NetworkConfig = _import_superserve_sdk() + env_vars = await self._resolved_envs() + network_payload = self.state.base_network + network = ( + NetworkConfig.model_validate(network_payload) if network_payload is not None else None + ) + try: + sandbox = await AsyncSandbox.create( + name=self.state.name or self.state.session_id.hex, + from_template=self.state.template, + timeout_seconds=self.state.timeout_seconds, + metadata=dict(self.state.base_metadata) or None, + env_vars=env_vars or None, + network=network, + api_key=self.state.api_key, + base_url=self.state.base_url, + ) + except Exception as exc: + raise WorkspaceStartError( + path=self._workspace_root_path(), + context={"backend": "superserve", "reason": "create_failed"}, + cause=exc, + message=f"failed to start Superserve sandbox: {_provider_error_detail(exc)}", + ) from exc + + self._sandbox = sandbox + self.state.sandbox_id = sandbox.id + return sandbox + + async def _prepare_backend_workspace(self) -> None: + root = self._workspace_root_path() + sandbox = await self._ensure_sandbox() + try: + result = await sandbox.commands.run( + f"mkdir -p -- {_shell_quote(root.as_posix())}", + timeout_seconds=self.state.timeouts.fast_op_s, + ) + except Exception as exc: + raise WorkspaceStartError( + path=root, + context={"backend": "superserve", "reason": "workspace_root_setup_failed"}, + cause=exc, + message=( + "failed to start session: Superserve workspace root setup failed: " + f"{_provider_error_detail(exc)}" + ), + ) from exc + + exit_code = int(getattr(result, "exit_code", 0) or 0) + if exit_code != 0: + stdout = getattr(result, "stdout", "") or "" + stderr = getattr(result, "stderr", "") or "" + raise WorkspaceStartError( + path=root, + context={ + "backend": "superserve", + "reason": "workspace_root_nonzero_exit", + "exit_code": exit_code, + "stdout": stdout, + "stderr": stderr, + }, + message=( + f"failed to start session: Superserve workspace root setup exited with " + f"{exit_code}" + ), + ) + + async def running(self) -> bool: + sandbox = self._sandbox + if sandbox is None: + return False + try: + info = await asyncio.wait_for( + sandbox.get_info(), + timeout=self.state.timeouts.keepalive_s, + ) + except Exception: + return False + status = getattr(info, "status", None) + status_value = getattr(status, "value", status) + return status_value == "active" + + async def shutdown(self) -> None: + await self._shutdown_backend() + + async def _shutdown_backend(self) -> None: + sandbox = self._sandbox + if sandbox is None: + return + try: + if self.state.pause_on_exit: + await sandbox.pause() + else: + await sandbox.kill() + except Exception: + pass + finally: + self._sandbox = None + + async def _exec_internal( + self, + *command: str | Path, + timeout: float | None = None, + ) -> ExecResult: + sandbox = await self._ensure_sandbox() + sup_errors = _import_superserve_errors() + timeout_exc = sup_errors.get("timeout") + normalized = [str(part) for part in command] + if not normalized: + return ExecResult(stdout=b"", stderr=b"", exit_code=0) + + command_str = " ".join(_shell_quote(part) for part in normalized) + envs = await self._resolved_envs() + cwd = sandbox_path_str(self.state.manifest.root) + timeout_seconds = ( + None + if timeout is None + else max(1, int(timeout + 0.999)) # round up; Superserve only accepts ints + ) + + try: + result = await sandbox.commands.run( + command_str, + cwd=cwd, + env=envs or None, + timeout_seconds=timeout_seconds, + ) + except asyncio.TimeoutError as exc: + raise ExecTimeoutError( + command=tuple(normalized), timeout_s=timeout, cause=exc + ) from exc + except Exception as exc: + if timeout_exc is not None and isinstance(exc, timeout_exc): + raise ExecTimeoutError( + command=tuple(normalized), timeout_s=timeout, cause=exc + ) from exc + raise ExecTransportError( + command=tuple(normalized), + context={ + "backend": "superserve", + "sandbox_id": self.state.sandbox_id, + "provider_error": _provider_error_detail(exc), + }, + cause=exc, + ) from exc + + stdout = (getattr(result, "stdout", "") or "").encode("utf-8", errors="replace") + stderr = (getattr(result, "stderr", "") or "").encode("utf-8", errors="replace") + exit_code = int(getattr(result, "exit_code", 0) or 0) + return ExecResult(stdout=stdout, stderr=stderr, exit_code=exit_code) + + async def _resolve_exposed_port(self, port: int) -> ExposedPortEndpoint: + raise ExposedPortUnavailableError( + port=port, + exposed_ports=self.state.exposed_ports, + reason="backend_unavailable", + context={ + "backend": "superserve", + "detail": "exposed_ports_not_supported", + }, + ) + + async def read(self, path: Path, *, user: str | User | None = None) -> io.IOBase: + if user is not None: + self._reject_user_arg(op="read", user=user) + sup_errors = _import_superserve_errors() + not_found_exc = sup_errors.get("not_found") + + normalized_path = await self._validate_path_access(path) + sandbox = await self._ensure_sandbox() + try: + payload = await sandbox.files.read( + sandbox_path_str(normalized_path), + timeout=self.state.timeouts.file_download_s, + ) + except Exception as exc: + if not_found_exc is not None and isinstance(exc, not_found_exc): + raise WorkspaceReadNotFoundError(path=normalized_path, cause=exc) from exc + raise WorkspaceArchiveReadError(path=normalized_path, cause=exc) from exc + return io.BytesIO(payload) + + async def write( + self, + path: Path, + data: io.IOBase, + *, + user: str | User | None = None, + ) -> None: + if user is not None: + self._reject_user_arg(op="write", user=user) + + normalized_path = await self._validate_path_access(path, for_write=True) + payload = data.read() + if isinstance(payload, str): + payload = payload.encode("utf-8") + if not isinstance(payload, bytes | bytearray): + raise WorkspaceWriteTypeError( + path=normalized_path, + actual_type=type(payload).__name__, + ) + try: + await self._write_bytes_with_retry( + sandbox_path_str(normalized_path), bytes(payload) + ) + except Exception as exc: + raise WorkspaceArchiveWriteError(path=normalized_path, cause=exc) from exc + + @retry_async( + retry_if=lambda exc, self, _path, _data: _is_transient_error(exc), + ) + async def _write_bytes_with_retry(self, path: str, data: bytes) -> None: + sandbox = await self._ensure_sandbox() + await sandbox.files.write(path, data, timeout=self.state.timeouts.file_upload_s) + + async def persist_workspace(self) -> io.IOBase: + return await with_ephemeral_mounts_removed( + self, + self._persist_workspace_internal, + error_path=self._workspace_root_path(), + error_cls=WorkspaceArchiveReadError, + operation_error_context_key="snapshot_error_before_remount_corruption", + ) + + async def _persist_workspace_internal(self) -> io.IOBase: + root = self._workspace_root_path() + archive_path = posix_path_as_path( + coerce_posix_path(f"/tmp/openai-agents-{self.state.session_id.hex}.tar") + ) + excludes = [ + f"--exclude=./{rel_path.as_posix()}" + for rel_path in sorted( + self._persist_workspace_skip_relpaths(), + key=lambda item: item.as_posix(), + ) + ] + tar_command = ("tar", "cf", archive_path.as_posix(), *excludes, ".") + + sandbox = await self._ensure_sandbox() + sup_errors = _import_superserve_errors() + not_found_exc = sup_errors.get("not_found") + + try: + result = await self.exec(*tar_command, shell=False) + if not result.ok(): + raise WorkspaceArchiveReadError( + path=root, + cause=ExecNonZeroError( + result, + command=tar_command, + context={ + "backend": "superserve", + "sandbox_id": self.state.sandbox_id, + }, + ), + ) + + try: + archive = await sandbox.files.read( + archive_path.as_posix(), + timeout=self.state.timeouts.file_download_s, + ) + except Exception as exc: + if not_found_exc is not None and isinstance(exc, not_found_exc): + raise WorkspaceReadNotFoundError(path=archive_path, cause=exc) from exc + raise + + return io.BytesIO(archive) + except (WorkspaceArchiveReadError, WorkspaceReadNotFoundError): + raise + except Exception as exc: + raise WorkspaceArchiveReadError(path=root, cause=exc) from exc + finally: + try: + await self.exec( + "rm", + "-f", + "--", + archive_path.as_posix(), + shell=False, + ) + except Exception: + pass + + async def hydrate_workspace(self, data: io.IOBase) -> None: + raw = data.read() + if isinstance(raw, str): + raw = raw.encode("utf-8") + if not isinstance(raw, bytes | bytearray): + raise WorkspaceWriteTypeError( + path=self._workspace_root_path(), + actual_type=type(raw).__name__, + ) + + await with_ephemeral_mounts_removed( + self, + lambda: self._hydrate_workspace_internal(bytes(raw)), + error_path=self._workspace_root_path(), + error_cls=WorkspaceArchiveWriteError, + operation_error_context_key="hydrate_error_before_remount_corruption", + ) + + async def _hydrate_workspace_internal(self, raw: bytes) -> None: + root = self._workspace_root_path() + archive_path = posix_path_as_path( + coerce_posix_path(f"/tmp/openai-agents-{self.state.session_id.hex}.tar") + ) + tar_command = ("tar", "xf", archive_path.as_posix(), "-C", root.as_posix()) + + try: + validate_tar_bytes(raw, allow_external_symlink_targets=False) + except UnsafeTarMemberError as exc: + raise WorkspaceArchiveWriteError( + path=root, + context={ + "reason": "unsafe_or_invalid_tar", + "member": exc.member, + "detail": str(exc), + }, + cause=exc, + ) from exc + + try: + await self.mkdir(root, parents=True) + await self._write_bytes_with_retry(archive_path.as_posix(), raw) + result = await self.exec(*tar_command, shell=False) + if not result.ok(): + raise WorkspaceArchiveWriteError( + path=root, + cause=ExecNonZeroError( + result, + command=tar_command, + context={ + "backend": "superserve", + "sandbox_id": self.state.sandbox_id, + }, + ), + ) + except WorkspaceArchiveWriteError: + raise + except Exception as exc: + raise WorkspaceArchiveWriteError(path=root, cause=exc) from exc + finally: + try: + await self.exec( + "rm", + "-f", + "--", + archive_path.as_posix(), + shell=False, + ) + except Exception: + pass + + +def _shell_quote(value: str) -> str: + """Minimal shlex.quote without importing shlex twice — keeps Vercel/Daytona-style quoting.""" + import shlex + + return shlex.quote(value) + + +class SuperserveSandboxClient(BaseSandboxClient[SuperserveSandboxClientOptions]): + """Superserve-backed sandbox client managing sandbox lifecycle via AsyncSandbox.""" + + backend_id = "superserve" + _instrumentation: Instrumentation + _api_key: str | None + _base_url: str | None + + def __init__( + self, + *, + api_key: str | None = None, + base_url: str | None = None, + instrumentation: Instrumentation | None = None, + dependencies: Dependencies | None = None, + ) -> None: + super().__init__() + self._api_key = api_key + self._base_url = base_url + self._instrumentation = instrumentation or Instrumentation() + self._dependencies = dependencies + + def _resolve_timeouts( + self, + value: SuperserveSandboxTimeouts | dict[str, object] | None, + ) -> SuperserveSandboxTimeouts: + if isinstance(value, SuperserveSandboxTimeouts): + return value + if value is None: + return SuperserveSandboxTimeouts() + return SuperserveSandboxTimeouts.model_validate(value) + + async def create( + self, + *, + snapshot: SnapshotSpec | SnapshotBase | None = None, + manifest: Manifest | None = None, + options: SuperserveSandboxClientOptions, + ) -> SandboxSession: + resolved_manifest = _resolve_manifest_root(manifest) + timeouts = self._resolve_timeouts(options.timeouts) + api_key = options.api_key or self._api_key + base_url = options.base_url or self._base_url + template = _resolve_template(options.template) + + session_id = uuid.uuid4() + sandbox_name = options.name or f"openai-agents-{session_id.hex[:12]}" + snapshot_instance = resolve_snapshot(snapshot, str(session_id)) + + state = SuperserveSandboxSessionState( + session_id=session_id, + manifest=resolved_manifest, + snapshot=snapshot_instance, + sandbox_id="", + template=template, + name=sandbox_name, + base_env_vars=dict(options.env_vars or {}), + base_metadata=dict(options.metadata or {}), + base_network=dict(options.network) if options.network is not None else None, + timeout_seconds=options.timeout_seconds, + pause_on_exit=options.pause_on_exit, + base_url=base_url, + api_key=api_key, + timeouts=timeouts, + exposed_ports=options.exposed_ports, + ) + inner = SuperserveSandboxSession.from_state(state) + await inner._ensure_sandbox() + return self._wrap_session(inner, instrumentation=self._instrumentation) + + async def delete(self, session: SandboxSession) -> SandboxSession: + inner = session._inner + if not isinstance(inner, SuperserveSandboxSession): + raise TypeError( + "SuperserveSandboxClient.delete expects a SuperserveSandboxSession" + ) + try: + await inner.shutdown() + except Exception: + pass + return session + + async def resume(self, state: SandboxSessionState) -> SandboxSession: + if not isinstance(state, SuperserveSandboxSessionState): + raise TypeError( + "SuperserveSandboxClient.resume expects a SuperserveSandboxSessionState" + ) + + AsyncSandbox, _ = _import_superserve_sdk() + sup_errors = _import_superserve_errors() + not_found_exc = sup_errors.get("not_found") + + api_key = state.api_key or self._api_key + base_url = state.base_url or self._base_url + if state.api_key is None and api_key is not None: + state.api_key = api_key + if state.base_url is None and base_url is not None: + state.base_url = base_url + + sandbox: Any | None = None + reconnected = False + + if state.sandbox_id: + try: + sandbox = await AsyncSandbox.connect( + state.sandbox_id, + api_key=api_key, + base_url=base_url, + ) + status = getattr(sandbox, "status", None) + status_value = getattr(status, "value", status) + if status_value == "paused": + await sandbox.resume() + elif status_value == "resuming": + await sandbox.resume() + elif status_value == "failed": + sandbox = None + # else status_value == "active" → already running + if sandbox is not None: + reconnected = True + except Exception as exc: + if not_found_exc is not None and isinstance(exc, not_found_exc): + logger.debug( + "superserve sandbox %s not found, will recreate", state.sandbox_id + ) + else: + logger.debug( + "superserve connect/resume failed (will recreate): %s", exc + ) + sandbox = None + + if sandbox is None: + state.sandbox_id = "" + state.workspace_root_ready = False + + inner = SuperserveSandboxSession.from_state(state, sandbox=sandbox) + if sandbox is None: + await inner._ensure_sandbox() + inner._set_start_state_preserved(reconnected, system=reconnected) + return self._wrap_session(inner, instrumentation=self._instrumentation) + + def deserialize_session_state(self, payload: dict[str, object]) -> SandboxSessionState: + return SuperserveSandboxSessionState.model_validate(payload) + + +__all__ = [ + "DEFAULT_SUPERSERVE_TEMPLATE", + "DEFAULT_SUPERSERVE_WORKSPACE_ROOT", + "SuperserveSandboxClient", + "SuperserveSandboxClientOptions", + "SuperserveSandboxSession", + "SuperserveSandboxSessionState", + "SuperserveSandboxTimeouts", +] diff --git a/tests/extensions/sandbox/test_superserve.py b/tests/extensions/sandbox/test_superserve.py new file mode 100644 index 0000000000..01540cd054 --- /dev/null +++ b/tests/extensions/sandbox/test_superserve.py @@ -0,0 +1,760 @@ +from __future__ import annotations + +import importlib +import io +import sys +import tarfile +import types +from pathlib import Path +from typing import Any, cast + +import pytest +from pydantic import BaseModel + +from agents.sandbox import Manifest +from agents.sandbox.entries import File +from agents.sandbox.errors import ( + ConfigurationError, + ExposedPortUnavailableError, + InvalidManifestPathError, +) +from agents.sandbox.snapshot import NoopSnapshot +from agents.sandbox.types import User +from tests._fake_workspace_paths import resolve_fake_workspace_path + + +class _FakeCommandResult: + def __init__(self, *, stdout: str = "", stderr: str = "", exit_code: int = 0) -> None: + self.stdout = stdout + self.stderr = stderr + self.exit_code = exit_code + + +class _FakeSandboxInfo(BaseModel): + status: str = "active" + + +class _FakeNetworkConfig(BaseModel): + allow_out: list[str] | None = None + deny_out: list[str] | None = None + + +class _SuperserveNotFoundError(Exception): + status_code = 404 + + +class _SuperserveAuthenticationError(Exception): + status_code = 401 + + +class _SuperserveValidationError(Exception): + status_code = 400 + + +class _SuperserveConflictError(Exception): + status_code = 409 + + +class _SuperserveServerError(Exception): + status_code = 500 + + +class _SuperserveSandboxTimeoutError(Exception): + pass + + +class _SuperserveSandboxError(Exception): + pass + + +class _FakeCommands: + def __init__(self, sandbox: _FakeAsyncSandbox) -> None: + self._sandbox = sandbox + self.calls: list[dict[str, object]] = [] + + async def run( + self, + command: str, + *, + cwd: str | None = None, + env: dict[str, str] | None = None, + timeout_seconds: int | None = None, + on_stdout: object | None = None, + on_stderr: object | None = None, + ) -> _FakeCommandResult: + _ = (on_stdout, on_stderr) + self.calls.append( + { + "command": command, + "cwd": cwd, + "env": dict(env) if env is not None else None, + "timeout_seconds": timeout_seconds, + } + ) + # Test hooks can override the next result or throw. + if self._sandbox.command_failures: + raise self._sandbox.command_failures.pop(0) + next_result = ( + self._sandbox.command_results.pop(0) if self._sandbox.command_results else None + ) + if next_result is not None: + return next_result + + # Handle workspace-path resolution helper used by the base session + # for `_validate_remote_path_access`. + resolved = resolve_fake_workspace_path( + command, + symlinks=self._sandbox.symlinks, + home_dir="/workspace", + ) + if resolved is not None: + return _FakeCommandResult( + exit_code=resolved.exit_code, + stdout=resolved.stdout, + stderr=resolved.stderr, + ) + + # Built-in handlers for common shell shapes used by the session. + if command.startswith("mkdir -p"): + return _FakeCommandResult(exit_code=0) + if command.startswith("tar cf"): + # tar cf [--exclude=./X ...] . + tokens = command.split() + archive_path = tokens[2] + include_root = tokens[-1] == "." + exclusions = { + token.removeprefix("--exclude=./") + for token in tokens + if token.startswith("--exclude=./") + } + cwd_eff = cwd or "/" + buffer = io.BytesIO() + with tarfile.open(fileobj=buffer, mode="w") as archive: + for path, content in sorted(self._sandbox._file_store.items()): + if not path.startswith(cwd_eff.rstrip("/") + "/"): + continue + rel_path = path[len(cwd_eff.rstrip("/")) + 1 :] + if any( + rel_path == exclusion or rel_path.startswith(f"{exclusion}/") + for exclusion in exclusions + ): + continue + info = tarfile.TarInfo(name=rel_path if include_root else path) + info.size = len(content) + archive.addfile(info, io.BytesIO(content)) + self._sandbox._file_store[archive_path] = buffer.getvalue() + return _FakeCommandResult(exit_code=0) + if command.startswith("tar xf"): + tokens = command.split() + archive_path = tokens[2] + destination = tokens[-1] + raw = self._sandbox._file_store.get(archive_path) + if raw is None: + return _FakeCommandResult(exit_code=1, stderr="archive missing") + with tarfile.open(fileobj=io.BytesIO(raw), mode="r") as archive: + for member in archive.getmembers(): + if not member.isfile(): + continue + extracted = archive.extractfile(member) + assert extracted is not None + self._sandbox._file_store[ + f"{destination.rstrip('/')}/{member.name}" + ] = extracted.read() + return _FakeCommandResult(exit_code=0) + if command.startswith("rm -f --"): + for token in command.split()[3:]: + self._sandbox._file_store.pop(token, None) + return _FakeCommandResult(exit_code=0) + return _FakeCommandResult(exit_code=0) + + +class _FakeFiles: + def __init__(self, sandbox: _FakeAsyncSandbox) -> None: + self._sandbox = sandbox + self.write_calls: list[tuple[str, bytes]] = [] + self.read_calls: list[str] = [] + + async def write(self, path: str, content: bytes | str, *, timeout: float | None = None) -> None: + _ = timeout + if self._sandbox.write_failures: + raise self._sandbox.write_failures.pop(0) + payload = content.encode("utf-8") if isinstance(content, str) else bytes(content) + self.write_calls.append((path, payload)) + self._sandbox._file_store[path] = payload + + async def read(self, path: str, *, timeout: float | None = None) -> bytes: + _ = timeout + self.read_calls.append(path) + if path not in self._sandbox._file_store: + raise _SuperserveNotFoundError(f"missing {path}") + return self._sandbox._file_store[path] + + +class _FakeAsyncSandbox: + create_calls: list[dict[str, object]] = [] + connect_calls: list[dict[str, object]] = [] + sandboxes: dict[str, _FakeAsyncSandbox] = {} + fail_connect_ids: set[str] = set() + create_failures: list[BaseException] = [] + + def __init__(self, *, sandbox_id: str, status: str = "active") -> None: + self.id = sandbox_id + self.name = sandbox_id + self.status = status + self.metadata: dict[str, str] = {} + self._file_store: dict[str, bytes] = {} + self.symlinks: dict[str, str] = {} + self.command_results: list[_FakeCommandResult] = [] + self.command_failures: list[BaseException] = [] + self.write_failures: list[BaseException] = [] + self.pause_calls = 0 + self.resume_calls = 0 + self.kill_calls = 0 + self.commands = _FakeCommands(self) + self.files = _FakeFiles(self) + + @classmethod + def reset(cls) -> None: + cls.create_calls = [] + cls.connect_calls = [] + cls.sandboxes = {} + cls.fail_connect_ids = set() + cls.create_failures = [] + + @classmethod + async def create(cls, **kwargs: object) -> _FakeAsyncSandbox: + cls.create_calls.append(dict(kwargs)) + if cls.create_failures: + raise cls.create_failures.pop(0) + sandbox_id = f"sup-{len(cls.create_calls)}" + sandbox = cls(sandbox_id=sandbox_id) + sandbox.metadata = dict(cast(dict[str, str], kwargs.get("metadata") or {})) + cls.sandboxes[sandbox_id] = sandbox + return sandbox + + @classmethod + async def connect(cls, sandbox_id: str, **kwargs: object) -> _FakeAsyncSandbox: + cls.connect_calls.append({"sandbox_id": sandbox_id, **kwargs}) + if sandbox_id in cls.fail_connect_ids: + raise _SuperserveNotFoundError(f"sandbox {sandbox_id} not found") + sandbox = cls.sandboxes.get(sandbox_id) + if sandbox is None: + raise _SuperserveNotFoundError(f"sandbox {sandbox_id} not found") + return sandbox + + async def get_info(self) -> _FakeSandboxInfo: + return _FakeSandboxInfo(status=self.status) + + async def pause(self) -> None: + self.pause_calls += 1 + self.status = "paused" + + async def resume(self) -> None: + self.resume_calls += 1 + self.status = "active" + + async def kill(self) -> None: + self.kill_calls += 1 + self.status = "deleted" + +def _load_superserve_module(monkeypatch: pytest.MonkeyPatch) -> Any: + _FakeAsyncSandbox.reset() + + fake_module = types.ModuleType("superserve") + fake_module.AsyncSandbox = _FakeAsyncSandbox # type: ignore[attr-defined] + fake_module.NetworkConfig = _FakeNetworkConfig # type: ignore[attr-defined] + fake_module.NotFoundError = _SuperserveNotFoundError # type: ignore[attr-defined] + fake_module.AuthenticationError = _SuperserveAuthenticationError # type: ignore[attr-defined] + fake_module.ValidationError = _SuperserveValidationError # type: ignore[attr-defined] + fake_module.ConflictError = _SuperserveConflictError # type: ignore[attr-defined] + fake_module.ServerError = _SuperserveServerError # type: ignore[attr-defined] + fake_module.SandboxTimeoutError = _SuperserveSandboxTimeoutError # type: ignore[attr-defined] + fake_module.SandboxError = _SuperserveSandboxError # type: ignore[attr-defined] + + monkeypatch.setitem(sys.modules, "superserve", fake_module) + sys.modules.pop("agents.extensions.sandbox.superserve.sandbox", None) + sys.modules.pop("agents.extensions.sandbox.superserve", None) + + return importlib.import_module("agents.extensions.sandbox.superserve.sandbox") + + +# --------------------------------------------------------------------------- +# Package re-exports & basic shape +# --------------------------------------------------------------------------- + + +def test_superserve_package_re_exports_backend_symbols(monkeypatch: pytest.MonkeyPatch) -> None: + superserve_module = _load_superserve_module(monkeypatch) + package_module = importlib.import_module("agents.extensions.sandbox.superserve") + + assert package_module.SuperserveSandboxClient is superserve_module.SuperserveSandboxClient + assert ( + package_module.SuperserveSandboxSessionState + is superserve_module.SuperserveSandboxSessionState + ) + assert ( + package_module.DEFAULT_SUPERSERVE_WORKSPACE_ROOT + == superserve_module.DEFAULT_SUPERSERVE_WORKSPACE_ROOT + ) + + +def test_superserve_supports_pty_is_false(monkeypatch: pytest.MonkeyPatch) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000001", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + ) + session = superserve_module.SuperserveSandboxSession.from_state(state) + assert not session.supports_pty() + + +def test_superserve_options_round_trip(monkeypatch: pytest.MonkeyPatch) -> None: + superserve_module = _load_superserve_module(monkeypatch) + options = superserve_module.SuperserveSandboxClientOptions( + template="superserve/python-3.11", + env_vars={"HELLO": "world"}, + metadata={"team": "agents"}, + pause_on_exit=True, + timeout_seconds=300, + ) + dumped = options.model_dump(mode="json") + rebuilt = superserve_module.SuperserveSandboxClientOptions.model_validate(dumped) + assert rebuilt.template == "superserve/python-3.11" + assert rebuilt.env_vars == {"HELLO": "world"} + assert rebuilt.metadata == {"team": "agents"} + assert rebuilt.pause_on_exit is True + assert rebuilt.timeout_seconds == 300 + + +def test_superserve_session_state_round_trip(monkeypatch: pytest.MonkeyPatch) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000099", + manifest=Manifest(root="/workspace"), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + template="superserve/node-22", + pause_on_exit=True, + base_env_vars={"FLAG": "1"}, + ) + payload = state.model_dump(mode="json") + client = superserve_module.SuperserveSandboxClient() + rebuilt = client.deserialize_session_state(payload) + assert isinstance(rebuilt, superserve_module.SuperserveSandboxSessionState) + assert rebuilt.sandbox_id == "sup-existing" + assert rebuilt.template == "superserve/node-22" + assert rebuilt.pause_on_exit is True + assert rebuilt.base_env_vars == {"FLAG": "1"} + + +# --------------------------------------------------------------------------- +# create() +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_superserve_create_passes_provider_options(monkeypatch: pytest.MonkeyPatch) -> None: + superserve_module = _load_superserve_module(monkeypatch) + client = superserve_module.SuperserveSandboxClient() + + session = await client.create( + manifest=Manifest(), + options=superserve_module.SuperserveSandboxClientOptions( + template="superserve/python-3.11", + env_vars={"HELLO": "world"}, + metadata={"team": "agents"}, + timeout_seconds=600, + ), + ) + + assert len(_FakeAsyncSandbox.create_calls) == 1 + call = _FakeAsyncSandbox.create_calls[0] + assert call["from_template"] == "superserve/python-3.11" + assert call["env_vars"] == {"HELLO": "world"} + assert call["metadata"] == {"team": "agents"} + assert call["timeout_seconds"] == 600 + assert session._inner.state.sandbox_id == "sup-1" + assert ( + session._inner.state.manifest.root + == superserve_module.DEFAULT_SUPERSERVE_WORKSPACE_ROOT + ) + assert session._inner.state.template == "superserve/python-3.11" + + +@pytest.mark.asyncio +async def test_superserve_create_uses_default_template(monkeypatch: pytest.MonkeyPatch) -> None: + superserve_module = _load_superserve_module(monkeypatch) + client = superserve_module.SuperserveSandboxClient() + + session = await client.create( + manifest=Manifest(), + options=superserve_module.SuperserveSandboxClientOptions(), + ) + + call = _FakeAsyncSandbox.create_calls[0] + assert call["from_template"] == superserve_module.DEFAULT_SUPERSERVE_TEMPLATE + assert session._inner.state.template == superserve_module.DEFAULT_SUPERSERVE_TEMPLATE + + +@pytest.mark.asyncio +async def test_superserve_create_allows_manifest_root_outside_provider_workspace( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + client = superserve_module.SuperserveSandboxClient() + + session = await client.create( + manifest=Manifest(root="/tmp/outside"), + options=superserve_module.SuperserveSandboxClientOptions(), + ) + + assert session._inner.state.manifest.root == "/tmp/outside" + + +# --------------------------------------------------------------------------- +# exec / read / write +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_superserve_exec_propagates_command_result(monkeypatch: pytest.MonkeyPatch) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000002", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + sandbox.command_results.append( + _FakeCommandResult(stdout="hello\n", stderr="warn\n", exit_code=0) + ) + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + result = await session.exec("echo", "hello", shell=False) + + assert result.ok() + assert result.stdout == b"hello\n" + assert result.stderr == b"warn\n" + assert sandbox.commands.calls[0]["cwd"] == "/workspace" + + +@pytest.mark.asyncio +async def test_superserve_exec_translates_timeout_and_transport( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000003", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + sandbox.command_failures.append(_SuperserveSandboxTimeoutError("slow")) + sandbox.command_failures.append(_SuperserveServerError("boom")) + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + with pytest.raises(superserve_module.ExecTimeoutError): + await session.exec("sleep", "1000", shell=False) + with pytest.raises(superserve_module.ExecTransportError): + await session.exec("true", shell=False) + + +@pytest.mark.asyncio +async def test_superserve_read_and_write_round_trip(monkeypatch: pytest.MonkeyPatch) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000004", + manifest=Manifest(root="/workspace"), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + await session.write(Path("notes.txt"), io.BytesIO(b"payload")) + payload = await session.read(Path("notes.txt")) + + assert sandbox.files.write_calls == [("/workspace/notes.txt", b"payload")] + assert payload.read() == b"payload" + + +@pytest.mark.asyncio +async def test_superserve_read_missing_file_raises_not_found( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000005", + manifest=Manifest(root="/workspace"), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + with pytest.raises(superserve_module.WorkspaceReadNotFoundError): + await session.read(Path("nope.txt")) + + +@pytest.mark.asyncio +async def test_superserve_exec_read_write_reject_path_escape( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + client = superserve_module.SuperserveSandboxClient() + + session = await client.create( + manifest=Manifest(root="/workspace/project"), + options=superserve_module.SuperserveSandboxClientOptions(), + ) + + with pytest.raises(InvalidManifestPathError): + await session.read("../outside.txt") + with pytest.raises(InvalidManifestPathError): + await session.write("/etc/passwd", io.BytesIO(b"nope")) + + +@pytest.mark.asyncio +async def test_superserve_rejects_sandbox_local_user_arguments( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + client = superserve_module.SuperserveSandboxClient() + session = await client.create( + manifest=Manifest(root="/workspace/project"), + options=superserve_module.SuperserveSandboxClientOptions(), + ) + + with pytest.raises(ConfigurationError, match="does not support sandbox-local users"): + await session.exec("pwd", user="sandbox-user") + with pytest.raises(ConfigurationError, match="does not support sandbox-local users"): + await session.read("notes.txt", user=User(name="sandbox-user")) + with pytest.raises(ConfigurationError, match="does not support sandbox-local users"): + await session.write("notes.txt", io.BytesIO(b"payload"), user="sandbox-user") + + +# --------------------------------------------------------------------------- +# Workspace setup / manifest materialization +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_superserve_start_creates_workspace_and_materializes_manifest( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000010", + manifest=Manifest( + root="/workspace", + entries={"notes.txt": File(content=b"payload")}, + ), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + await session.start() + payload = await session.read(Path("notes.txt")) + + # First exec is the workspace-root mkdir. + assert sandbox.commands.calls[0]["command"].startswith("mkdir -p") + assert ("/workspace/notes.txt", b"payload") in sandbox.files.write_calls + assert session.state.workspace_root_ready is True + assert payload.read() == b"payload" + + +# --------------------------------------------------------------------------- +# Exposed ports (v1: unsupported) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_superserve_resolve_exposed_port_raises(monkeypatch: pytest.MonkeyPatch) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000020", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + exposed_ports=(3000,), + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + with pytest.raises(ExposedPortUnavailableError) as exc_info: + await session.resolve_exposed_port(3000) + + assert exc_info.value.context["backend"] == "superserve" + + +# --------------------------------------------------------------------------- +# Shutdown semantics: pause-on-exit vs kill +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_superserve_shutdown_pauses_when_pause_on_exit_true( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000030", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + pause_on_exit=True, + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + await session.shutdown() + + assert sandbox.pause_calls == 1 + assert sandbox.kill_calls == 0 + + +@pytest.mark.asyncio +async def test_superserve_shutdown_kills_when_pause_on_exit_false( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000031", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + pause_on_exit=False, + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + await session.shutdown() + + assert sandbox.kill_calls == 1 + assert sandbox.pause_calls == 0 + + +# --------------------------------------------------------------------------- +# Resume contract +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_superserve_resume_reconnects_active_sandbox( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + existing = _FakeAsyncSandbox(sandbox_id="sup-existing", status="active") + _FakeAsyncSandbox.sandboxes[existing.id] = existing + + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000040", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id=existing.id, + ) + + client = superserve_module.SuperserveSandboxClient() + resumed = await client.resume(state) + + assert _FakeAsyncSandbox.connect_calls[0]["sandbox_id"] == existing.id + assert resumed._inner.state.sandbox_id == existing.id + assert _FakeAsyncSandbox.create_calls == [] + # Already active, no resume() + assert existing.resume_calls == 0 + + +@pytest.mark.asyncio +async def test_superserve_resume_calls_resume_for_paused_sandbox( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + existing = _FakeAsyncSandbox(sandbox_id="sup-paused", status="paused") + _FakeAsyncSandbox.sandboxes[existing.id] = existing + + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000041", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id=existing.id, + ) + + client = superserve_module.SuperserveSandboxClient() + resumed = await client.resume(state) + + assert existing.resume_calls == 1 + assert resumed._inner.state.sandbox_id == existing.id + assert _FakeAsyncSandbox.create_calls == [] + + +@pytest.mark.asyncio +async def test_superserve_resume_falls_back_to_create_on_not_found( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + _FakeAsyncSandbox.fail_connect_ids.add("sup-missing") + + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000042", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-missing", + template="superserve/python-3.11", + ) + + client = superserve_module.SuperserveSandboxClient() + resumed = await client.resume(state) + + assert _FakeAsyncSandbox.connect_calls[0]["sandbox_id"] == "sup-missing" + assert len(_FakeAsyncSandbox.create_calls) == 1 + assert _FakeAsyncSandbox.create_calls[0]["from_template"] == "superserve/python-3.11" + # New backend ID + assert resumed._inner.state.sandbox_id != "sup-missing" + # System state is no longer preserved after a recreate. + assert resumed._inner._workspace_state_preserved_on_start() is False + + +# --------------------------------------------------------------------------- +# Workspace tar round-trip +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_superserve_persist_and_hydrate_workspace_round_trip( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000050", + manifest=Manifest(root="/workspace"), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + sandbox._file_store["/workspace/notes.txt"] = b"payload" + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + persisted = await session.persist_workspace() + raw = persisted.read() + assert isinstance(raw, bytes) + assert raw # non-empty tar + + # Hydrate into a *new* sandbox; verify the file lands at the expected path. + other_sandbox = _FakeAsyncSandbox(sandbox_id="sup-other") + other_state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000051", + manifest=Manifest(root="/workspace"), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-other", + ) + other = superserve_module.SuperserveSandboxSession.from_state( + other_state, sandbox=other_sandbox + ) + await other.hydrate_workspace(io.BytesIO(raw)) + assert other_sandbox._file_store["/workspace/notes.txt"] == b"payload" From b475c06bb6f5918d3013f6a18a6a109f987c8ac4 Mon Sep 17 00:00:00 2001 From: Amit Patil Date: Wed, 20 May 2026 19:17:37 -0700 Subject: [PATCH 2/5] refactor: address Superserve sandbox backend review feedback --- .../extensions/sandbox/superserve/sandbox.py | 301 +++++++++++++++--- tests/extensions/sandbox/test_superserve.py | 180 +++++++++++ 2 files changed, 428 insertions(+), 53 deletions(-) diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py index c5973bb349..00ced23de2 100644 --- a/src/agents/extensions/sandbox/superserve/sandbox.py +++ b/src/agents/extensions/sandbox/superserve/sandbox.py @@ -14,6 +14,9 @@ import asyncio import io import logging +import math +import shlex +import time import uuid from pathlib import Path from typing import Any, Literal, cast @@ -59,6 +62,11 @@ DEFAULT_SUPERSERVE_TEMPLATE = "superserve/base" _DEFAULT_MANIFEST_ROOT = cast(str, Manifest.model_fields["root"].default) _SUPERSERVE_TRANSIENT_STATUS_CODES: frozenset[int] = frozenset({408, 425, 429, 500, 502, 503, 504}) +_SUPERSERVE_ACTIVE_STATUSES: frozenset[str] = frozenset({"active"}) +_SUPERSERVE_RESUMING_STATUSES: frozenset[str] = frozenset({"paused", "resuming"}) +_SUPERSERVE_TERMINAL_STATUSES: frozenset[str] = frozenset({"failed"}) +_RESUME_READY_TIMEOUT_S: float = 60.0 +_RESUME_READY_POLL_INTERVAL_S: float = 1.0 logger = logging.getLogger(__name__) @@ -117,6 +125,46 @@ def _provider_error_detail(error: BaseException) -> str | None: return ": ".join(parts) +def _superserve_error_context(error: BaseException) -> dict[str, object]: + """Structured error context — split status/code/message so consumers don't parse strings.""" + context: dict[str, object] = { + "backend": "superserve", + "cause_type": type(error).__name__, + } + message = str(error) + if message: + context["provider_message"] = message + status = getattr(error, "status_code", None) + if isinstance(status, int): + context["http_status"] = status + code = getattr(error, "code", None) + if isinstance(code, str) and code: + context["provider_code"] = code + return context + + +def _superserve_exec_transport_error( + *, + command: tuple[str | Path, ...], + cause: BaseException, + sandbox_id: str | None = None, +) -> ExecTransportError: + context = _superserve_error_context(cause) + if sandbox_id: + context["sandbox_id"] = sandbox_id + detail = _provider_error_detail(cause) + message = "Superserve exec failed" + if detail: + message = f"{message}: {detail}" + return ExecTransportError(command=command, context=context, cause=cause, message=message) + + +def _is_superserve_conflict(error: BaseException, conflict_exc: type[BaseException] | None) -> bool: + if conflict_exc is not None and isinstance(error, conflict_exc): + return True + return exception_chain_has_status_code(error, frozenset({409})) + + def _is_transient_error(exc: BaseException) -> bool: return exception_chain_has_status_code( exc, _SUPERSERVE_TRANSIENT_STATUS_CODES @@ -124,6 +172,15 @@ def _is_transient_error(exc: BaseException) -> bool: def _resolve_manifest_root(manifest: Manifest | None) -> Manifest: + """Resolve the manifest root for a Superserve sandbox. + + - No manifest → fresh manifest rooted at `/workspace`. + - Manifest whose root is the SDK's default placeholder (`Manifest.model_fields["root"].default`) + → rewrite the root to the Superserve default `/workspace` for ergonomics. + - Caller-provided non-default root (anywhere on the filesystem) → keep verbatim. We do not + reject arbitrary roots; this mirrors Vercel's behaviour and lets callers stage work outside + `/workspace` deliberately. If you need confinement, set extra path grants on the manifest. + """ if manifest is None: return Manifest(root=DEFAULT_SUPERSERVE_WORKSPACE_ROOT) if manifest.root == _DEFAULT_MANIFEST_ROOT: @@ -147,6 +204,8 @@ class SuperserveSandboxTimeouts(BaseModel): file_upload_s: int = Field(default=300, ge=1) file_download_s: int = Field(default=300, ge=1) workspace_tar_s: int = Field(default=300, ge=1) + resume_ready_timeout_s: int = Field(default=60, ge=1) + resume_ready_poll_interval_s: float = Field(default=1.0, gt=0) class SuperserveSandboxClientOptions(BaseSandboxClientOptions): @@ -275,6 +334,10 @@ async def _validate_path_access(self, path: Path | str, *, for_write: bool = Fal def _runtime_helpers(self) -> tuple[RuntimeHelperScript, ...]: return (RESOLVE_WORKSPACE_PATH_HELPER,) + def _current_runtime_helper_cache_key(self) -> object | None: + """Invalidate helper-script cache when the backing sandbox is swapped on resume.""" + return self.state.sandbox_id or None + async def _resolved_envs(self) -> dict[str, str]: manifest_envs = await self.state.manifest.environment.resolve() resolved: dict[str, str] = {} @@ -290,6 +353,8 @@ async def _ensure_sandbox(self) -> Any: return sandbox AsyncSandbox, NetworkConfig = _import_superserve_sdk() + sup_errors = _import_superserve_errors() + conflict_exc = sup_errors.get("conflict") env_vars = await self._resolved_envs() network_payload = self.state.base_network network = ( @@ -307,9 +372,14 @@ async def _ensure_sandbox(self) -> Any: base_url=self.state.base_url, ) except Exception as exc: + reason = ( + "name_collision" if _is_superserve_conflict(exc, conflict_exc) else "create_failed" + ) + context = _superserve_error_context(exc) + context["reason"] = reason raise WorkspaceStartError( path=self._workspace_root_path(), - context={"backend": "superserve", "reason": "create_failed"}, + context=context, cause=exc, message=f"failed to start Superserve sandbox: {_provider_error_detail(exc)}", ) from exc @@ -318,18 +388,84 @@ async def _ensure_sandbox(self) -> Any: self.state.sandbox_id = sandbox.id return sandbox + async def _wait_until_active( + self, + *, + timeout_s: float | None = None, + poll_interval_s: float | None = None, + ) -> None: + """Poll get_info() until status is `active`, or raise. + + Used after `await sandbox.resume()` to guarantee the sandbox is ready before the caller + runs the first exec. Superserve's resume() returns once the API has accepted the request; + the sandbox may still be in `resuming` for a short window. + """ + sandbox = self._sandbox + if sandbox is None: + return + deadline = time.monotonic() + (timeout_s or self.state.timeouts.resume_ready_timeout_s) + interval = poll_interval_s or self.state.timeouts.resume_ready_poll_interval_s + last_status: str | None = None + while True: + try: + info = await asyncio.wait_for( + sandbox.get_info(), + timeout=self.state.timeouts.keepalive_s, + ) + except Exception as exc: + raise WorkspaceStartError( + path=self._workspace_root_path(), + context=_superserve_error_context(exc) | {"reason": "wait_until_active_failed"}, + cause=exc, + message=f"failed to confirm sandbox active: {_provider_error_detail(exc)}", + ) from exc + status = getattr(info, "status", None) + last_status = getattr(status, "value", status) + if last_status in _SUPERSERVE_ACTIVE_STATUSES: + return + if last_status in _SUPERSERVE_TERMINAL_STATUSES: + raise WorkspaceStartError( + path=self._workspace_root_path(), + context={ + "backend": "superserve", + "reason": "sandbox_failed_during_resume", + "sandbox_status": last_status, + }, + message=( + f"sandbox reached terminal status {last_status!r} during resume" + ), + ) + if time.monotonic() >= deadline: + raise WorkspaceStartError( + path=self._workspace_root_path(), + context={ + "backend": "superserve", + "reason": "wait_until_active_timeout", + "sandbox_status": last_status, + "timeout_s": timeout_s or self.state.timeouts.resume_ready_timeout_s, + }, + message=( + f"sandbox did not become active within " + f"{timeout_s or self.state.timeouts.resume_ready_timeout_s}s " + f"(last status: {last_status!r})" + ), + ) + await asyncio.sleep(interval) + async def _prepare_backend_workspace(self) -> None: root = self._workspace_root_path() sandbox = await self._ensure_sandbox() try: result = await sandbox.commands.run( - f"mkdir -p -- {_shell_quote(root.as_posix())}", + f"mkdir -p -- {shlex.quote(root.as_posix())}", timeout_seconds=self.state.timeouts.fast_op_s, ) except Exception as exc: + context = _superserve_error_context(exc) + context["reason"] = "workspace_root_setup_failed" raise WorkspaceStartError( path=root, - context={"backend": "superserve", "reason": "workspace_root_setup_failed"}, + context=context, cause=exc, message=( "failed to start session: Superserve workspace root setup failed: " @@ -400,14 +536,11 @@ async def _exec_internal( if not normalized: return ExecResult(stdout=b"", stderr=b"", exit_code=0) - command_str = " ".join(_shell_quote(part) for part in normalized) + command_str = shlex.join(normalized) envs = await self._resolved_envs() cwd = sandbox_path_str(self.state.manifest.root) - timeout_seconds = ( - None - if timeout is None - else max(1, int(timeout + 0.999)) # round up; Superserve only accepts ints - ) + # Superserve accepts only int seconds; round up so we never undershoot the caller. + timeout_seconds = None if timeout is None else max(1, math.ceil(timeout)) try: result = await sandbox.commands.run( @@ -425,14 +558,10 @@ async def _exec_internal( raise ExecTimeoutError( command=tuple(normalized), timeout_s=timeout, cause=exc ) from exc - raise ExecTransportError( + raise _superserve_exec_transport_error( command=tuple(normalized), - context={ - "backend": "superserve", - "sandbox_id": self.state.sandbox_id, - "provider_error": _provider_error_detail(exc), - }, cause=exc, + sandbox_id=self.state.sandbox_id, ) from exc stdout = (getattr(result, "stdout", "") or "").encode("utf-8", errors="replace") @@ -506,16 +635,22 @@ async def _write_bytes_with_retry(self, path: str, data: bytes) -> None: async def persist_workspace(self) -> io.IOBase: return await with_ephemeral_mounts_removed( self, - self._persist_workspace_internal, + self._persist_workspace_with_retry, error_path=self._workspace_root_path(), error_cls=WorkspaceArchiveReadError, operation_error_context_key="snapshot_error_before_remount_corruption", ) + @retry_async(retry_if=lambda exc, self: _is_transient_error(exc)) + async def _persist_workspace_with_retry(self) -> io.IOBase: + return await self._persist_workspace_internal() + async def _persist_workspace_internal(self) -> io.IOBase: root = self._workspace_root_path() archive_path = posix_path_as_path( - coerce_posix_path(f"/tmp/openai-agents-{self.state.session_id.hex}.tar") + coerce_posix_path( + f"/tmp/openai-agents-persist-{self.state.session_id.hex}.tar" + ) ) excludes = [ f"--exclude=./{rel_path.as_posix()}" @@ -584,16 +719,22 @@ async def hydrate_workspace(self, data: io.IOBase) -> None: await with_ephemeral_mounts_removed( self, - lambda: self._hydrate_workspace_internal(bytes(raw)), + lambda: self._hydrate_workspace_with_retry(bytes(raw)), error_path=self._workspace_root_path(), error_cls=WorkspaceArchiveWriteError, operation_error_context_key="hydrate_error_before_remount_corruption", ) + @retry_async(retry_if=lambda exc, self, _raw: _is_transient_error(exc)) + async def _hydrate_workspace_with_retry(self, raw: bytes) -> None: + await self._hydrate_workspace_internal(raw) + async def _hydrate_workspace_internal(self, raw: bytes) -> None: root = self._workspace_root_path() archive_path = posix_path_as_path( - coerce_posix_path(f"/tmp/openai-agents-{self.state.session_id.hex}.tar") + coerce_posix_path( + f"/tmp/openai-agents-hydrate-{self.state.session_id.hex}.tar" + ) ) tar_command = ("tar", "xf", archive_path.as_posix(), "-C", root.as_posix()) @@ -643,13 +784,6 @@ async def _hydrate_workspace_internal(self, raw: bytes) -> None: pass -def _shell_quote(value: str) -> str: - """Minimal shlex.quote without importing shlex twice — keeps Vercel/Daytona-style quoting.""" - import shlex - - return shlex.quote(value) - - class SuperserveSandboxClient(BaseSandboxClient[SuperserveSandboxClientOptions]): """Superserve-backed sandbox client managing sandbox lifecycle via AsyncSandbox.""" @@ -753,33 +887,13 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession: reconnected = False if state.sandbox_id: - try: - sandbox = await AsyncSandbox.connect( - state.sandbox_id, - api_key=api_key, - base_url=base_url, - ) - status = getattr(sandbox, "status", None) - status_value = getattr(status, "value", status) - if status_value == "paused": - await sandbox.resume() - elif status_value == "resuming": - await sandbox.resume() - elif status_value == "failed": - sandbox = None - # else status_value == "active" → already running - if sandbox is not None: - reconnected = True - except Exception as exc: - if not_found_exc is not None and isinstance(exc, not_found_exc): - logger.debug( - "superserve sandbox %s not found, will recreate", state.sandbox_id - ) - else: - logger.debug( - "superserve connect/resume failed (will recreate): %s", exc - ) - sandbox = None + sandbox, reconnected = await self._reattach_sandbox( + AsyncSandbox=AsyncSandbox, + state=state, + api_key=api_key, + base_url=base_url, + not_found_exc=not_found_exc, + ) if sandbox is None: state.sandbox_id = "" @@ -791,6 +905,87 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession: inner._set_start_state_preserved(reconnected, system=reconnected) return self._wrap_session(inner, instrumentation=self._instrumentation) + async def _reattach_sandbox( + self, + *, + AsyncSandbox: Any, + state: SuperserveSandboxSessionState, + api_key: str | None, + base_url: str | None, + not_found_exc: type[BaseException] | None, + ) -> tuple[Any | None, bool]: + """Try to reattach to an existing Superserve sandbox by id. + + Returns (sandbox, reconnected). On any failure path, returns (None, False) so the caller + falls back to recreating from scratch. + """ + try: + sandbox = await AsyncSandbox.connect( + state.sandbox_id, + api_key=api_key, + base_url=base_url, + ) + except Exception as exc: + if not_found_exc is not None and isinstance(exc, not_found_exc): + logger.debug( + "superserve sandbox %s not found, will recreate", state.sandbox_id + ) + else: + logger.debug( + "superserve connect failed for %s (will recreate): %s", + state.sandbox_id, + exc, + ) + return None, False + + status = getattr(sandbox, "status", None) + status_value = getattr(status, "value", status) + + if status_value in _SUPERSERVE_TERMINAL_STATUSES: + logger.debug( + "superserve sandbox %s is in terminal status %r; recreating", + state.sandbox_id, + status_value, + ) + return None, False + + if status_value in _SUPERSERVE_RESUMING_STATUSES: + # Only call resume() if the sandbox is paused; for `resuming` just wait. Calling + # resume() while resume is in flight typically 409s on the API. + if status_value == "paused": + try: + await sandbox.resume() + except Exception as exc: + logger.debug( + "superserve resume() failed for %s, will recreate: %s", + state.sandbox_id, + exc, + ) + return None, False + + probe = SuperserveSandboxSession.from_state(state, sandbox=sandbox) + try: + await probe._wait_until_active() + except WorkspaceStartError as exc: + logger.debug( + "superserve sandbox %s did not become active after resume: %s", + state.sandbox_id, + exc, + ) + return None, False + return sandbox, True + + if status_value in _SUPERSERVE_ACTIVE_STATUSES: + return sandbox, True + + # Unknown or transitional status (e.g. "stopping", future enum values) — don't trust it. + logger.debug( + "superserve sandbox %s has unrecognized status %r; recreating", + state.sandbox_id, + status_value, + ) + return None, False + def deserialize_session_state(self, payload: dict[str, object]) -> SandboxSessionState: return SuperserveSandboxSessionState.model_validate(payload) diff --git a/tests/extensions/sandbox/test_superserve.py b/tests/extensions/sandbox/test_superserve.py index 01540cd054..d74126fb61 100644 --- a/tests/extensions/sandbox/test_superserve.py +++ b/tests/extensions/sandbox/test_superserve.py @@ -257,6 +257,10 @@ async def kill(self) -> None: self.kill_calls += 1 self.status = "deleted" +async def _noop_sleep(*_args: object, **_kwargs: object) -> None: + return None + + def _load_superserve_module(monkeypatch: pytest.MonkeyPatch) -> Any: _FakeAsyncSandbox.reset() @@ -693,6 +697,182 @@ async def test_superserve_resume_calls_resume_for_paused_sandbox( assert _FakeAsyncSandbox.create_calls == [] +@pytest.mark.asyncio +async def test_superserve_resume_polls_until_active_after_resume_call( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + existing = _FakeAsyncSandbox(sandbox_id="sup-paused-poll", status="paused") + _FakeAsyncSandbox.sandboxes[existing.id] = existing + + # Make sandbox.resume() leave the status at "resuming" so _wait_until_active has to poll. + original_resume = _FakeAsyncSandbox.resume + + async def _slow_resume(self: _FakeAsyncSandbox) -> None: + self.resume_calls += 1 + self.status = "resuming" + + monkeypatch.setattr(_FakeAsyncSandbox, "resume", _slow_resume) + + # On the second get_info call, flip status to "active" so polling succeeds. + get_info_count = {"n": 0} + + async def _get_info_then_active(self: _FakeAsyncSandbox) -> _FakeSandboxInfo: + get_info_count["n"] += 1 + if get_info_count["n"] >= 2: + self.status = "active" + return _FakeSandboxInfo(status=self.status) + + monkeypatch.setattr(_FakeAsyncSandbox, "get_info", _get_info_then_active) + + # Tighten the poll cadence so the test doesn't actually sleep. + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000043", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id=existing.id, + timeouts=superserve_module.SuperserveSandboxTimeouts( + resume_ready_poll_interval_s=0.001, + resume_ready_timeout_s=5, + ), + ) + + client = superserve_module.SuperserveSandboxClient() + resumed = await client.resume(state) + + # Restore original method to avoid leaking into other tests. + monkeypatch.setattr(_FakeAsyncSandbox, "resume", original_resume) + + assert existing.resume_calls == 1 + assert get_info_count["n"] >= 2 # polled at least twice + assert resumed._inner.state.sandbox_id == existing.id + assert existing.status == "active" + + +@pytest.mark.asyncio +async def test_superserve_resume_skips_resume_call_when_already_resuming( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + existing = _FakeAsyncSandbox(sandbox_id="sup-already-resuming", status="resuming") + _FakeAsyncSandbox.sandboxes[existing.id] = existing + + # Flip to active on first get_info so the poll exits immediately. + async def _get_info_active(self: _FakeAsyncSandbox) -> _FakeSandboxInfo: + self.status = "active" + return _FakeSandboxInfo(status="active") + + monkeypatch.setattr(_FakeAsyncSandbox, "get_info", _get_info_active) + + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000044", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id=existing.id, + ) + + client = superserve_module.SuperserveSandboxClient() + resumed = await client.resume(state) + + # Critical: do NOT call resume() when status is already "resuming". + assert existing.resume_calls == 0 + assert resumed._inner.state.sandbox_id == existing.id + + +@pytest.mark.asyncio +async def test_superserve_resume_recreates_on_unknown_status( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + existing = _FakeAsyncSandbox(sandbox_id="sup-stopping", status="stopping") + _FakeAsyncSandbox.sandboxes[existing.id] = existing + + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000045", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id=existing.id, + template="superserve/python-3.11", + ) + + client = superserve_module.SuperserveSandboxClient() + resumed = await client.resume(state) + + # Unknown/stopping → recreate. + assert len(_FakeAsyncSandbox.create_calls) == 1 + assert resumed._inner.state.sandbox_id != existing.id + assert resumed._inner._workspace_state_preserved_on_start() is False + + +@pytest.mark.asyncio +async def test_superserve_resume_recreates_on_failed_status( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + existing = _FakeAsyncSandbox(sandbox_id="sup-failed", status="failed") + _FakeAsyncSandbox.sandboxes[existing.id] = existing + + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000046", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id=existing.id, + ) + + client = superserve_module.SuperserveSandboxClient() + resumed = await client.resume(state) + + assert len(_FakeAsyncSandbox.create_calls) == 1 + assert resumed._inner.state.sandbox_id != existing.id + # Original sandbox never had resume() called on it. + assert existing.resume_calls == 0 + + +# --------------------------------------------------------------------------- +# Error classification +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_superserve_create_classifies_conflict_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + _FakeAsyncSandbox.create_failures = [_SuperserveConflictError("name already exists")] + + client = superserve_module.SuperserveSandboxClient() + with pytest.raises(Exception) as exc_info: + await client.create( + manifest=Manifest(), + options=superserve_module.SuperserveSandboxClientOptions(name="duplicate-name"), + ) + assert exc_info.value.context.get("reason") == "name_collision" + assert exc_info.value.context.get("http_status") == 409 + + +def test_superserve_runtime_helper_cache_key_is_sandbox_id( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000060", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-cache-key", + ) + session = superserve_module.SuperserveSandboxSession.from_state(state) + assert session._current_runtime_helper_cache_key() == "sup-cache-key" + + empty_state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000061", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="", + ) + empty_session = superserve_module.SuperserveSandboxSession.from_state(empty_state) + assert empty_session._current_runtime_helper_cache_key() is None + + @pytest.mark.asyncio async def test_superserve_resume_falls_back_to_create_on_not_found( monkeypatch: pytest.MonkeyPatch, From bc707cec64d5100ecf7fa77d658097995b2d5f04 Mon Sep 17 00:00:00 2001 From: Amit Patil Date: Mon, 25 May 2026 16:49:58 -0700 Subject: [PATCH 3/5] address review and align with upstream provider PR pattern --- .../extensions/sandbox/superserve/sandbox.md | 3 + docs/sandbox/clients.md | 3 + examples/sandbox/extensions/README.md | 29 +++----- .../sandbox/extensions/superserve_runner.py | 4 +- .../extensions/sandbox/superserve/sandbox.py | 8 +-- tests/sandbox/test_client_options.py | 2 + tests/sandbox/test_compatibility_guards.py | 69 +++++++++++++++++++ 7 files changed, 92 insertions(+), 26 deletions(-) create mode 100644 docs/ref/extensions/sandbox/superserve/sandbox.md diff --git a/docs/ref/extensions/sandbox/superserve/sandbox.md b/docs/ref/extensions/sandbox/superserve/sandbox.md new file mode 100644 index 0000000000..0a3d9d1f37 --- /dev/null +++ b/docs/ref/extensions/sandbox/superserve/sandbox.md @@ -0,0 +1,3 @@ +# `Sandbox` + +::: agents.extensions.sandbox.superserve.sandbox diff --git a/docs/sandbox/clients.md b/docs/sandbox/clients.md index bd21da63d3..20105e693a 100644 --- a/docs/sandbox/clients.md +++ b/docs/sandbox/clients.md @@ -96,6 +96,7 @@ For provider-specific setup notes and links for the checked-in extension example | `E2BSandboxClient` | `openai-agents[e2b]` | [E2B runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/e2b_runner.py) | | `ModalSandboxClient` | `openai-agents[modal]` | [Modal runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/modal_runner.py) | | `RunloopSandboxClient` | `openai-agents[runloop]` | [Runloop runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/runloop/runner.py) | +| `SuperserveSandboxClient` | `openai-agents[superserve]` | [Superserve runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/superserve_runner.py) | | `VercelSandboxClient` | `openai-agents[vercel]` | [Vercel runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/vercel_runner.py) | @@ -113,6 +114,7 @@ Hosted sandbox clients expose provider-specific mount strategies. Choose the bac | `DaytonaSandboxClient` | Supports rclone-backed cloud storage mounts with `DaytonaCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. | | `E2BSandboxClient` | Supports rclone-backed cloud storage mounts with `E2BCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. | | `RunloopSandboxClient` | Supports rclone-backed cloud storage mounts with `RunloopCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. | +| `SuperserveSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. | | `VercelSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. | @@ -130,6 +132,7 @@ The table below summarizes which remote storage entries each backend can mount d | `DaytonaSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - | | `E2BSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - | | `RunloopSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - | +| `SuperserveSandboxClient` | - | - | - | - | - | - | | `VercelSandboxClient` | - | - | - | - | - | - | diff --git a/examples/sandbox/extensions/README.md b/examples/sandbox/extensions/README.md index b9a27b2de4..9c98364462 100644 --- a/examples/sandbox/extensions/README.md +++ b/examples/sandbox/extensions/README.md @@ -7,7 +7,7 @@ They intentionally keep the flow simple: 1. Build a tiny manifest in memory. 2. Create a `SandboxAgent` that inspects that workspace through one shell tool. -3. Run the agent against E2B, Modal, Daytona, Cloudflare, Runloop, Blaxel, or Vercel. +3. Run the agent against E2B, Modal, Daytona, Cloudflare, Runloop, Blaxel, Superserve, or Vercel. All of these examples require `OPENAI_API_KEY`, because they call the model through the normal `Runner` path. Each cloud backend also needs its own provider credentials. @@ -261,12 +261,6 @@ export OPENAI_API_KEY=... export SUPERSERVE_API_KEY=... ``` -To target staging instead of production, also set: - -```bash -export SUPERSERVE_BASE_URL=https://api-staging.superserve.ai -``` - ### Run ```bash @@ -275,18 +269,15 @@ uv run python examples/sandbox/extensions/superserve_runner.py --stream Useful flags: -- `--template superserve/python-3.11` — use a different curated template (others: - `superserve/base`, `superserve/node-22`, `superserve/code-interpreter`, - `superserve/python-ml`, `superserve/claude-code`). Team-owned template UUIDs also work. -- `--pause-on-exit` — pause the sandbox on shutdown instead of killing it. Superserve sandboxes - never die on their own by default, so this lets you reconnect with `SuperserveSandboxClient.resume` - later without recreating workspace state. -- `--timeout-seconds 300` — opt into an inactivity timeout (off by default). -- `--skip-snapshot-check` — skip the pause/resume snapshot round-trip verification. - -Pause/resume is a first-class part of the Superserve API surface, so the example exercises both -the standard create→exec→shutdown flow and the explicit -`pause → serialize state → resume → read` round-trip. +- `--template ` -- use a different template; defaults to `superserve/base`. + Other curated templates: `superserve/python-3.11`, `superserve/node-22`, + `superserve/code-interpreter`, `superserve/python-ml`. +- `--pause-on-exit` -- pause the sandbox on shutdown instead of killing it. +- `--timeout-seconds 300` -- inactivity timeout in seconds (off by default). +- `--skip-snapshot-check` -- skip the pause/resume snapshot round-trip verification. + +The example runs a pause/resume round-trip before the agent run to verify that workspace state +survives shutdown. ## Runloop diff --git a/examples/sandbox/extensions/superserve_runner.py b/examples/sandbox/extensions/superserve_runner.py index f462f4ef16..1f6437a460 100644 --- a/examples/sandbox/extensions/superserve_runner.py +++ b/examples/sandbox/extensions/superserve_runner.py @@ -204,9 +204,9 @@ async def main( "--template", default=DEFAULT_TEMPLATE, help=( - "Superserve template name or UUID. Defaults to `superserve/base`. " + "Superserve template to use. Defaults to `superserve/base`. " "Other curated templates: superserve/python-3.11, superserve/node-22, " - "superserve/code-interpreter, superserve/python-ml, superserve/claude-code." + "superserve/code-interpreter, superserve/python-ml." ), ) parser.add_argument( diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py index 00ced23de2..7a64883d86 100644 --- a/src/agents/extensions/sandbox/superserve/sandbox.py +++ b/src/agents/extensions/sandbox/superserve/sandbox.py @@ -65,8 +65,6 @@ _SUPERSERVE_ACTIVE_STATUSES: frozenset[str] = frozenset({"active"}) _SUPERSERVE_RESUMING_STATUSES: frozenset[str] = frozenset({"paused", "resuming"}) _SUPERSERVE_TERMINAL_STATUSES: frozenset[str] = frozenset({"failed"}) -_RESUME_READY_TIMEOUT_S: float = 60.0 -_RESUME_READY_POLL_INTERVAL_S: float = 1.0 logger = logging.getLogger(__name__) @@ -177,9 +175,9 @@ def _resolve_manifest_root(manifest: Manifest | None) -> Manifest: - No manifest → fresh manifest rooted at `/workspace`. - Manifest whose root is the SDK's default placeholder (`Manifest.model_fields["root"].default`) → rewrite the root to the Superserve default `/workspace` for ergonomics. - - Caller-provided non-default root (anywhere on the filesystem) → keep verbatim. We do not - reject arbitrary roots; this mirrors Vercel's behaviour and lets callers stage work outside - `/workspace` deliberately. If you need confinement, set extra path grants on the manifest. + - Caller-provided non-default root (anywhere on the filesystem) → keep verbatim. Arbitrary + roots are accepted so callers can stage work outside `/workspace` deliberately. For + confinement, set extra path grants on the manifest. """ if manifest is None: return Manifest(root=DEFAULT_SUPERSERVE_WORKSPACE_ROOT) diff --git a/tests/sandbox/test_client_options.py b/tests/sandbox/test_client_options.py index 8c71dc4028..f2ebbbf7d2 100644 --- a/tests/sandbox/test_client_options.py +++ b/tests/sandbox/test_client_options.py @@ -8,6 +8,7 @@ from agents.extensions.sandbox.cloudflare import CloudflareSandboxClientOptions from agents.extensions.sandbox.daytona import DaytonaSandboxClientOptions from agents.extensions.sandbox.e2b import E2BSandboxClientOptions +from agents.extensions.sandbox.superserve import SuperserveSandboxClientOptions from agents.sandbox.config import DEFAULT_PYTHON_SANDBOX_IMAGE from agents.sandbox.sandboxes import DockerSandboxClientOptions, UnixLocalSandboxClientOptions from agents.sandbox.session import BaseSandboxClientOptions @@ -69,6 +70,7 @@ def test_sandbox_client_options_exclude_unset_preserves_type_discriminator() -> E2BSandboxClientOptions(sandbox_type="e2b", template="base"), DaytonaSandboxClientOptions(image=DEFAULT_PYTHON_SANDBOX_IMAGE), CloudflareSandboxClientOptions(worker_url="https://example.com"), + SuperserveSandboxClientOptions(template="superserve/base"), ], ) def test_sandbox_client_options_roundtrip_preserves_concrete_type( diff --git a/tests/sandbox/test_compatibility_guards.py b/tests/sandbox/test_compatibility_guards.py index 5a11e5bf77..b854a8c549 100644 --- a/tests/sandbox/test_compatibility_guards.py +++ b/tests/sandbox/test_compatibility_guards.py @@ -324,6 +324,22 @@ def test_core_sandbox_public_export_surface_is_stable() -> None: "_encode_runloop_snapshot_ref", }, ), + ( + "agents.extensions.sandbox.superserve", + { + "DEFAULT_SUPERSERVE_WORKSPACE_ROOT", + "ExecTimeoutError", + "ExecTransportError", + "SuperserveSandboxClient", + "SuperserveSandboxClientOptions", + "SuperserveSandboxSession", + "SuperserveSandboxSessionState", + "SuperserveSandboxTimeouts", + "WorkspaceArchiveReadError", + "WorkspaceArchiveWriteError", + "WorkspaceReadNotFoundError", + }, + ), ( "agents.extensions.sandbox.vercel", { @@ -493,6 +509,23 @@ def test_optional_sandbox_dataclass_constructor_field_order_is_stable( "managed_secrets", ), ), + ( + "agents.extensions.sandbox.superserve", + "SuperserveSandboxClientOptions", + ( + "template", + "name", + "env_vars", + "metadata", + "network", + "timeout_seconds", + "pause_on_exit", + "api_key", + "base_url", + "exposed_ports", + "timeouts", + ), + ), ( "agents.extensions.sandbox.vercel", "VercelSandboxClientOptions", @@ -720,6 +753,31 @@ def test_optional_sandbox_client_options_positional_field_order_is_stable( "secret_refs", ), ), + ( + "agents.extensions.sandbox.superserve", + "SuperserveSandboxSessionState", + ( + "type", + "session_id", + "snapshot", + "manifest", + "exposed_ports", + "snapshot_fingerprint", + "snapshot_fingerprint_version", + "workspace_root_ready", + "sandbox_id", + "template", + "name", + "base_env_vars", + "base_metadata", + "base_network", + "timeout_seconds", + "pause_on_exit", + "base_url", + "api_key", + "timeouts", + ), + ), ( "agents.extensions.sandbox.vercel", "VercelSandboxSessionState", @@ -785,6 +843,12 @@ def test_sandbox_session_state_field_order_is_stable( ), ("agents.extensions.sandbox.daytona", "DaytonaSandboxClientOptions", (), "daytona"), ("agents.extensions.sandbox.runloop", "RunloopSandboxClientOptions", (), "runloop"), + ( + "agents.extensions.sandbox.superserve", + "SuperserveSandboxClientOptions", + (), + "superserve", + ), ("agents.extensions.sandbox.vercel", "VercelSandboxClientOptions", (), "vercel"), ], ) @@ -846,6 +910,11 @@ def test_optional_sandbox_client_options_json_round_trip_preserves_type( "RunloopSandboxSessionState", {"devbox_id": "devbox-123"}, ), + ( + "agents.extensions.sandbox.superserve", + "SuperserveSandboxSessionState", + {"sandbox_id": "sandbox-123"}, + ), ( "agents.extensions.sandbox.vercel", "VercelSandboxSessionState", From 75c94b909d62ce4f44dced391eae3377aa1b3a99 Mon Sep 17 00:00:00 2001 From: Amit Patil Date: Mon, 25 May 2026 17:15:19 -0700 Subject: [PATCH 4/5] address codex review feedback --- .../extensions/sandbox/superserve/sandbox.py | 44 +++++++++++-------- tests/sandbox/test_compatibility_guards.py | 2 - 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py index 7a64883d86..954f5271ab 100644 --- a/src/agents/extensions/sandbox/superserve/sandbox.py +++ b/src/agents/extensions/sandbox/superserve/sandbox.py @@ -266,8 +266,6 @@ class SuperserveSandboxSessionState(SandboxSessionState): base_network: dict[str, object] | None = None timeout_seconds: int | None = None pause_on_exit: bool = False - base_url: str | None = None - api_key: str | None = None timeouts: SuperserveSandboxTimeouts = Field(default_factory=SuperserveSandboxTimeouts) @@ -276,15 +274,21 @@ class SuperserveSandboxSession(BaseSandboxSession): state: SuperserveSandboxSessionState _sandbox: Any | None + _api_key: str | None + _base_url: str | None def __init__( self, *, state: SuperserveSandboxSessionState, sandbox: Any | None = None, + api_key: str | None = None, + base_url: str | None = None, ) -> None: self.state = state self._sandbox = sandbox + self._api_key = api_key + self._base_url = base_url @classmethod def from_state( @@ -292,8 +296,10 @@ def from_state( state: SuperserveSandboxSessionState, *, sandbox: Any | None = None, + api_key: str | None = None, + base_url: str | None = None, ) -> SuperserveSandboxSession: - return cls(state=state, sandbox=sandbox) + return cls(state=state, sandbox=sandbox, api_key=api_key, base_url=base_url) @property def sandbox_id(self) -> str: @@ -366,8 +372,8 @@ async def _ensure_sandbox(self) -> Any: metadata=dict(self.state.base_metadata) or None, env_vars=env_vars or None, network=network, - api_key=self.state.api_key, - base_url=self.state.base_url, + api_key=self._api_key, + base_url=self._base_url, ) except Exception as exc: reason = ( @@ -537,8 +543,12 @@ async def _exec_internal( command_str = shlex.join(normalized) envs = await self._resolved_envs() cwd = sandbox_path_str(self.state.manifest.root) - # Superserve accepts only int seconds; round up so we never undershoot the caller. - timeout_seconds = None if timeout is None else max(1, math.ceil(timeout)) + effective_timeout = ( + float(self.state.timeouts.exec_timeout_unbounded_s) + if timeout is None + else float(timeout) + ) + timeout_seconds = max(1, math.ceil(effective_timeout)) try: result = await sandbox.commands.run( @@ -843,12 +853,10 @@ async def create( base_network=dict(options.network) if options.network is not None else None, timeout_seconds=options.timeout_seconds, pause_on_exit=options.pause_on_exit, - base_url=base_url, - api_key=api_key, timeouts=timeouts, exposed_ports=options.exposed_ports, ) - inner = SuperserveSandboxSession.from_state(state) + inner = SuperserveSandboxSession.from_state(state, api_key=api_key, base_url=base_url) await inner._ensure_sandbox() return self._wrap_session(inner, instrumentation=self._instrumentation) @@ -874,12 +882,8 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession: sup_errors = _import_superserve_errors() not_found_exc = sup_errors.get("not_found") - api_key = state.api_key or self._api_key - base_url = state.base_url or self._base_url - if state.api_key is None and api_key is not None: - state.api_key = api_key - if state.base_url is None and base_url is not None: - state.base_url = base_url + api_key = self._api_key + base_url = self._base_url sandbox: Any | None = None reconnected = False @@ -897,7 +901,9 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession: state.sandbox_id = "" state.workspace_root_ready = False - inner = SuperserveSandboxSession.from_state(state, sandbox=sandbox) + inner = SuperserveSandboxSession.from_state( + state, sandbox=sandbox, api_key=api_key, base_url=base_url + ) if sandbox is None: await inner._ensure_sandbox() inner._set_start_state_preserved(reconnected, system=reconnected) @@ -961,7 +967,9 @@ async def _reattach_sandbox( ) return None, False - probe = SuperserveSandboxSession.from_state(state, sandbox=sandbox) + probe = SuperserveSandboxSession.from_state( + state, sandbox=sandbox, api_key=api_key, base_url=base_url + ) try: await probe._wait_until_active() except WorkspaceStartError as exc: diff --git a/tests/sandbox/test_compatibility_guards.py b/tests/sandbox/test_compatibility_guards.py index b854a8c549..2194791955 100644 --- a/tests/sandbox/test_compatibility_guards.py +++ b/tests/sandbox/test_compatibility_guards.py @@ -773,8 +773,6 @@ def test_optional_sandbox_client_options_positional_field_order_is_stable( "base_network", "timeout_seconds", "pause_on_exit", - "base_url", - "api_key", "timeouts", ), ), From a13c70a4f89eed31796a4da4ef896370d8e49ad8 Mon Sep 17 00:00:00 2001 From: Amit Patil Date: Mon, 25 May 2026 18:17:55 -0700 Subject: [PATCH 5/5] address second codex review pass --- .../extensions/sandbox/superserve/sandbox.py | 13 ++-- tests/extensions/sandbox/test_superserve.py | 73 +++++++++++++++++++ tests/sandbox/test_compatibility_guards.py | 1 + 3 files changed, 79 insertions(+), 8 deletions(-) diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py index 954f5271ab..b8ecfedfe5 100644 --- a/src/agents/extensions/sandbox/superserve/sandbox.py +++ b/src/agents/extensions/sandbox/superserve/sandbox.py @@ -266,6 +266,7 @@ class SuperserveSandboxSessionState(SandboxSessionState): base_network: dict[str, object] | None = None timeout_seconds: int | None = None pause_on_exit: bool = False + base_url: str | None = None timeouts: SuperserveSandboxTimeouts = Field(default_factory=SuperserveSandboxTimeouts) @@ -853,6 +854,7 @@ async def create( base_network=dict(options.network) if options.network is not None else None, timeout_seconds=options.timeout_seconds, pause_on_exit=options.pause_on_exit, + base_url=base_url, timeouts=timeouts, exposed_ports=options.exposed_ports, ) @@ -883,7 +885,7 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession: not_found_exc = sup_errors.get("not_found") api_key = self._api_key - base_url = self._base_url + base_url = state.base_url or self._base_url sandbox: Any | None = None reconnected = False @@ -934,13 +936,8 @@ async def _reattach_sandbox( logger.debug( "superserve sandbox %s not found, will recreate", state.sandbox_id ) - else: - logger.debug( - "superserve connect failed for %s (will recreate): %s", - state.sandbox_id, - exc, - ) - return None, False + return None, False + raise status = getattr(sandbox, "status", None) status_value = getattr(status, "value", status) diff --git a/tests/extensions/sandbox/test_superserve.py b/tests/extensions/sandbox/test_superserve.py index d74126fb61..de8e7bc366 100644 --- a/tests/extensions/sandbox/test_superserve.py +++ b/tests/extensions/sandbox/test_superserve.py @@ -804,6 +804,79 @@ async def test_superserve_resume_recreates_on_unknown_status( assert resumed._inner._workspace_state_preserved_on_start() is False +@pytest.mark.asyncio +async def test_superserve_resume_reraises_transient_connect_errors( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Transient/auth/server errors during connect must not silently orphan the sandbox.""" + superserve_module = _load_superserve_module(monkeypatch) + + class _TransientError(Exception): + status_code = 503 + + async def _flaky_connect(cls: type, sandbox_id: str, **kwargs: object) -> _FakeAsyncSandbox: + raise _TransientError("upstream blip") + + monkeypatch.setattr(_FakeAsyncSandbox, "connect", classmethod(_flaky_connect)) + + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000047", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-flaky", + ) + + client = superserve_module.SuperserveSandboxClient() + with pytest.raises(_TransientError): + await client.resume(state) + + # Should not have fallen back to recreating. + assert _FakeAsyncSandbox.create_calls == [] + + +@pytest.mark.asyncio +async def test_superserve_resume_uses_state_base_url_over_client_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + existing = _FakeAsyncSandbox(sandbox_id="sup-base-url", status="active") + _FakeAsyncSandbox.sandboxes[existing.id] = existing + + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000048", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id=existing.id, + base_url="https://api-staging.superserve.ai", + ) + + client = superserve_module.SuperserveSandboxClient(base_url="https://api.superserve.ai") + await client.resume(state) + + # base_url on state wins over the client default. + assert _FakeAsyncSandbox.connect_calls[0]["base_url"] == "https://api-staging.superserve.ai" + + +@pytest.mark.asyncio +async def test_superserve_exec_uses_unbounded_timeout_when_caller_passes_none( + monkeypatch: pytest.MonkeyPatch, +) -> None: + superserve_module = _load_superserve_module(monkeypatch) + state = superserve_module.SuperserveSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000049", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sup-existing", + timeouts=superserve_module.SuperserveSandboxTimeouts(exec_timeout_unbounded_s=12345), + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing") + session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox) + + await session.exec("echo", "hello", shell=False) + + assert sandbox.commands.calls[0]["timeout_seconds"] == 12345 + + @pytest.mark.asyncio async def test_superserve_resume_recreates_on_failed_status( monkeypatch: pytest.MonkeyPatch, diff --git a/tests/sandbox/test_compatibility_guards.py b/tests/sandbox/test_compatibility_guards.py index 2194791955..74fe40e387 100644 --- a/tests/sandbox/test_compatibility_guards.py +++ b/tests/sandbox/test_compatibility_guards.py @@ -773,6 +773,7 @@ def test_optional_sandbox_client_options_positional_field_order_is_stable( "base_network", "timeout_seconds", "pause_on_exit", + "base_url", "timeouts", ), ),