From 992628d322cc888c3c0e9d8a8a602dfa64045221 Mon Sep 17 00:00:00 2001
From: Amit Patil <meAmitPatil@users.noreply.github.com>
Date: Tue, 19 May 2026 18:53:51 -0700
Subject: [PATCH 1/5] feat: add Superserve sandbox backend

---
 examples/sandbox/extensions/README.md         |  45 +
 .../sandbox/extensions/superserve_runner.py   | 246 ++++++
 pyproject.toml                                |   1 +
 src/agents/extensions/sandbox/__init__.py     |  26 +
 .../extensions/sandbox/superserve/__init__.py |  31 +
 .../extensions/sandbox/superserve/sandbox.py  | 806 ++++++++++++++++++
 tests/extensions/sandbox/test_superserve.py   | 760 +++++++++++++++++
 7 files changed, 1915 insertions(+)
 create mode 100644 examples/sandbox/extensions/superserve_runner.py
 create mode 100644 src/agents/extensions/sandbox/superserve/__init__.py
 create mode 100644 src/agents/extensions/sandbox/superserve/sandbox.py
 create mode 100644 tests/extensions/sandbox/test_superserve.py

diff --git a/examples/sandbox/extensions/README.md b/examples/sandbox/extensions/README.md
index 837d9dfa28..b9a27b2de4 100644
--- a/examples/sandbox/extensions/README.md
+++ b/examples/sandbox/extensions/README.md
@@ -243,6 +243,51 @@ export DAYTONA_API_KEY=...
 uv run python examples/sandbox/extensions/daytona/daytona_runner.py --stream
 ```
 
+## Superserve
+
+### Setup
+
+Install the repo extra:
+
+```bash
+uv sync --extra superserve
+```
+
+Create a Superserve account at <https://console.superserve.ai>, generate an API key, and export
+the required environment variables:
+
+```bash
+export OPENAI_API_KEY=...
+export SUPERSERVE_API_KEY=...
+```
+
+To target staging instead of production, also set:
+
+```bash
+export SUPERSERVE_BASE_URL=https://api-staging.superserve.ai
+```
+
+### Run
+
+```bash
+uv run python examples/sandbox/extensions/superserve_runner.py --stream
+```
+
+Useful flags:
+
+- `--template superserve/python-3.11` — use a different curated template (others:
+  `superserve/base`, `superserve/node-22`, `superserve/code-interpreter`,
+  `superserve/python-ml`, `superserve/claude-code`). Team-owned template UUIDs also work.
+- `--pause-on-exit` — pause the sandbox on shutdown instead of killing it. Superserve sandboxes
+  never die on their own by default, so this lets you reconnect with `SuperserveSandboxClient.resume`
+  later without recreating workspace state.
+- `--timeout-seconds 300` — opt into an inactivity timeout (off by default).
+- `--skip-snapshot-check` — skip the pause/resume snapshot round-trip verification.
+
+Pause/resume is a first-class part of the Superserve API surface, so the example exercises both
+the standard create→exec→shutdown flow and the explicit
+`pause → serialize state → resume → read` round-trip.
+
 ## Runloop
 
 ### Setup
diff --git a/examples/sandbox/extensions/superserve_runner.py b/examples/sandbox/extensions/superserve_runner.py
new file mode 100644
index 0000000000..f462f4ef16
--- /dev/null
+++ b/examples/sandbox/extensions/superserve_runner.py
@@ -0,0 +1,246 @@
+"""
+Minimal Superserve-backed sandbox example for manual validation.
+
+This example mirrors the other cloud extension runners: it creates a tiny workspace, asks a
+sandboxed agent to inspect it through one shell tool, prints a short answer, and verifies that
+pause/resume preserves workspace state.
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import io
+import os
+import sys
+import tempfile
+from pathlib import Path
+from typing import cast
+
+from openai.types.responses import ResponseTextDeltaEvent
+
+from agents import ModelSettings, Runner
+from agents.run import RunConfig
+from agents.sandbox import LocalSnapshotSpec, Manifest, SandboxAgent, SandboxRunConfig
+from agents.sandbox.session import BaseSandboxSession
+
+if __package__ is None or __package__ == "":
+    sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
+
+from examples.sandbox.misc.example_support import text_manifest
+from examples.sandbox.misc.workspace_shell import WorkspaceShellCapability
+
+try:
+    from agents.extensions.sandbox import (
+        DEFAULT_SUPERSERVE_WORKSPACE_ROOT,
+        SuperserveSandboxClient,
+        SuperserveSandboxClientOptions,
+    )
+except Exception as exc:  # pragma: no cover - import path depends on optional extras
+    raise SystemExit(
+        "Superserve sandbox examples require the optional repo extra.\n"
+        "Install it with: uv sync --extra superserve"
+    ) from exc
+
+
+DEFAULT_QUESTION = "Summarize this cloud sandbox workspace in 2 sentences."
+DEFAULT_TEMPLATE = "superserve/base"
+SNAPSHOT_CHECK_PATH = Path("snapshot-check.txt")
+SNAPSHOT_CHECK_CONTENT = "superserve snapshot round-trip ok\n"
+
+
+def _build_manifest() -> Manifest:
+    manifest = text_manifest(
+        {
+            "README.md": (
+                "# Superserve Demo Workspace\n\n"
+                "This workspace exists to validate the Superserve sandbox backend manually.\n"
+            ),
+            "renewal.md": (
+                "# Renewal Notes\n\n"
+                "- Customer: Northwind Health.\n"
+                "- Renewal date: 2026-04-15.\n"
+                "- Risk: unresolved SSO setup.\n"
+            ),
+            "next_steps.md": (
+                "# Next steps\n\n"
+                "1. Finish the SSO fix.\n"
+                "2. Confirm legal language before procurement review.\n"
+            ),
+        }
+    )
+    return Manifest(root=DEFAULT_SUPERSERVE_WORKSPACE_ROOT, entries=manifest.entries)
+
+
+def _require_env(name: str) -> None:
+    if os.environ.get(name):
+        return
+    raise SystemExit(f"{name} must be set before running this example.")
+
+
+async def _read_text(session: BaseSandboxSession, path: Path) -> str:
+    data = await session.read(path)
+    text = cast(str | bytes, data.read())
+    if isinstance(text, bytes):
+        return text.decode("utf-8")
+    return text
+
+
+async def _verify_stop_resume(
+    *,
+    template: str,
+    pause_on_exit: bool,
+    timeout_seconds: int | None,
+) -> None:
+    client = SuperserveSandboxClient()
+    manifest = _build_manifest()
+    with tempfile.TemporaryDirectory(prefix="superserve-snapshot-example-") as snapshot_dir:
+        sandbox = await client.create(
+            manifest=manifest,
+            snapshot=LocalSnapshotSpec(base_path=Path(snapshot_dir)),
+            options=SuperserveSandboxClientOptions(
+                template=template,
+                pause_on_exit=pause_on_exit,
+                timeout_seconds=timeout_seconds,
+            ),
+        )
+
+        try:
+            await sandbox.start()
+            await sandbox.write(
+                SNAPSHOT_CHECK_PATH,
+                io.BytesIO(SNAPSHOT_CHECK_CONTENT.encode("utf-8")),
+            )
+            await sandbox.stop()
+        finally:
+            await sandbox.shutdown()
+
+        resumed = await client.resume(sandbox.state)
+        try:
+            await resumed.start()
+            restored = await _read_text(resumed, SNAPSHOT_CHECK_PATH)
+            if restored != SNAPSHOT_CHECK_CONTENT:
+                raise RuntimeError(
+                    "Snapshot resume verification failed: "
+                    f"expected {SNAPSHOT_CHECK_CONTENT!r}, got {restored!r}"
+                )
+        finally:
+            await resumed.aclose()
+
+    print("snapshot round-trip ok")
+
+
+async def main(
+    *,
+    model: str,
+    question: str,
+    template: str,
+    pause_on_exit: bool,
+    timeout_seconds: int | None,
+    stream: bool,
+    skip_snapshot_check: bool,
+) -> None:
+    _require_env("OPENAI_API_KEY")
+    _require_env("SUPERSERVE_API_KEY")
+
+    if not skip_snapshot_check:
+        await _verify_stop_resume(
+            template=template,
+            pause_on_exit=pause_on_exit,
+            timeout_seconds=timeout_seconds,
+        )
+
+    manifest = _build_manifest()
+    agent = SandboxAgent(
+        name="Superserve Sandbox Assistant",
+        model=model,
+        instructions=(
+            "Answer questions about the sandbox workspace. Inspect the files before answering "
+            "and keep the response concise. "
+            "Do not invent files or statuses that are not present in the workspace. Cite the "
+            "file names you inspected."
+        ),
+        default_manifest=manifest,
+        capabilities=[WorkspaceShellCapability()],
+        model_settings=ModelSettings(tool_choice="required"),
+    )
+
+    client = SuperserveSandboxClient()
+    run_config = RunConfig(
+        sandbox=SandboxRunConfig(
+            client=client,
+            options=SuperserveSandboxClientOptions(
+                template=template,
+                pause_on_exit=pause_on_exit,
+                timeout_seconds=timeout_seconds,
+            ),
+        ),
+        workflow_name="Superserve sandbox example",
+    )
+
+    if not stream:
+        result = await Runner.run(agent, question, run_config=run_config)
+        print(result.final_output)
+        return
+
+    stream_result = Runner.run_streamed(agent, question, run_config=run_config)
+    saw_text_delta = False
+    async for event in stream_result.stream_events():
+        if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
+            if not saw_text_delta:
+                print("assistant> ", end="", flush=True)
+                saw_text_delta = True
+            print(event.data.delta, end="", flush=True)
+
+    if saw_text_delta:
+        print()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", default="gpt-5.5", help="Model name to use.")
+    parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.")
+    parser.add_argument(
+        "--template",
+        default=DEFAULT_TEMPLATE,
+        help=(
+            "Superserve template name or UUID. Defaults to `superserve/base`. "
+            "Other curated templates: superserve/python-3.11, superserve/node-22, "
+            "superserve/code-interpreter, superserve/python-ml, superserve/claude-code."
+        ),
+    )
+    parser.add_argument(
+        "--pause-on-exit",
+        action="store_true",
+        default=False,
+        help="Pause the Superserve sandbox on shutdown instead of killing it.",
+    )
+    parser.add_argument(
+        "--timeout-seconds",
+        type=int,
+        default=None,
+        help=(
+            "Optional inactivity timeout in seconds. Superserve sandboxes do not die on their own "
+            "by default; set this to opt into automatic shutdown."
+        ),
+    )
+    parser.add_argument("--stream", action="store_true", default=False, help="Stream the response.")
+    parser.add_argument(
+        "--skip-snapshot-check",
+        action="store_true",
+        default=False,
+        help="Skip the pause/resume snapshot round-trip verification.",
+    )
+    args = parser.parse_args()
+
+    asyncio.run(
+        main(
+            model=args.model,
+            question=args.question,
+            template=args.template,
+            pause_on_exit=args.pause_on_exit,
+            timeout_seconds=args.timeout_seconds,
+            stream=args.stream,
+            skip_snapshot_check=args.skip_snapshot_check,
+        )
+    )
diff --git a/pyproject.toml b/pyproject.toml
index 4d0122049f..7563a3baf7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,6 +52,7 @@ cloudflare = ["aiohttp>=3.12,<4"]
 e2b = ["e2b==2.20.0", "e2b-code-interpreter==2.4.1"]
 modal = ["modal==1.3.5"]
 runloop = ["runloop_api_client>=1.16.0,<2.0.0"]
+superserve = ["superserve>=0.7.0,<1"]
 vercel = ["vercel>=0.5.6,<0.6"]
 s3 = ["boto3>=1.34"]
 temporal = [
diff --git a/src/agents/extensions/sandbox/__init__.py b/src/agents/extensions/sandbox/__init__.py
index d7b082ba1f..e560f5cf20 100644
--- a/src/agents/extensions/sandbox/__init__.py
+++ b/src/agents/extensions/sandbox/__init__.py
@@ -97,6 +97,20 @@
 except Exception:  # pragma: no cover
     _HAS_RUNLOOP = False
 
+try:
+    from .superserve import (
+        DEFAULT_SUPERSERVE_WORKSPACE_ROOT as DEFAULT_SUPERSERVE_WORKSPACE_ROOT,
+        SuperserveSandboxClient as SuperserveSandboxClient,
+        SuperserveSandboxClientOptions as SuperserveSandboxClientOptions,
+        SuperserveSandboxSession as SuperserveSandboxSession,
+        SuperserveSandboxSessionState as SuperserveSandboxSessionState,
+        SuperserveSandboxTimeouts as SuperserveSandboxTimeouts,
+    )
+
+    _HAS_SUPERSERVE = True
+except Exception:  # pragma: no cover
+    _HAS_SUPERSERVE = False
+
 try:
     from .vercel import (
         VercelSandboxClient as VercelSandboxClient,
@@ -177,6 +191,18 @@
         ]
     )
 
+if _HAS_SUPERSERVE:
+    __all__.extend(
+        [
+            "DEFAULT_SUPERSERVE_WORKSPACE_ROOT",
+            "SuperserveSandboxClient",
+            "SuperserveSandboxClientOptions",
+            "SuperserveSandboxSession",
+            "SuperserveSandboxSessionState",
+            "SuperserveSandboxTimeouts",
+        ]
+    )
+
 if _HAS_VERCEL:
     __all__.extend(
         [
diff --git a/src/agents/extensions/sandbox/superserve/__init__.py b/src/agents/extensions/sandbox/superserve/__init__.py
new file mode 100644
index 0000000000..380c278fe4
--- /dev/null
+++ b/src/agents/extensions/sandbox/superserve/__init__.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+from ....sandbox.errors import (
+    ExecTimeoutError,
+    ExecTransportError,
+    WorkspaceArchiveReadError,
+    WorkspaceArchiveWriteError,
+    WorkspaceReadNotFoundError,
+)
+from .sandbox import (
+    DEFAULT_SUPERSERVE_WORKSPACE_ROOT,
+    SuperserveSandboxClient,
+    SuperserveSandboxClientOptions,
+    SuperserveSandboxSession,
+    SuperserveSandboxSessionState,
+    SuperserveSandboxTimeouts,
+)
+
+__all__ = [
+    "DEFAULT_SUPERSERVE_WORKSPACE_ROOT",
+    "ExecTimeoutError",
+    "ExecTransportError",
+    "SuperserveSandboxClient",
+    "SuperserveSandboxClientOptions",
+    "SuperserveSandboxSession",
+    "SuperserveSandboxSessionState",
+    "SuperserveSandboxTimeouts",
+    "WorkspaceArchiveReadError",
+    "WorkspaceArchiveWriteError",
+    "WorkspaceReadNotFoundError",
+]
diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py
new file mode 100644
index 0000000000..c5973bb349
--- /dev/null
+++ b/src/agents/extensions/sandbox/superserve/sandbox.py
@@ -0,0 +1,806 @@
+"""
+Superserve sandbox (https://superserve.ai) implementation.
+
+This module provides a Superserve-backed sandbox client/session implementation backed by
+`superserve.AsyncSandbox`.
+
+The `superserve` dependency is optional, so package-level exports should guard imports of this
+module. Within this module, Superserve SDK imports happen lazily so users without the extra can
+still import the package.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import io
+import logging
+import uuid
+from pathlib import Path
+from typing import Any, Literal, cast
+
+from pydantic import BaseModel, Field
+
+from ....sandbox.errors import (
+    ConfigurationError,
+    ErrorCode,
+    ExecNonZeroError,
+    ExecTimeoutError,
+    ExecTransportError,
+    ExposedPortUnavailableError,
+    WorkspaceArchiveReadError,
+    WorkspaceArchiveWriteError,
+    WorkspaceReadNotFoundError,
+    WorkspaceStartError,
+    WorkspaceWriteTypeError,
+)
+from ....sandbox.manifest import Manifest
+from ....sandbox.session import SandboxSession, SandboxSessionState
+from ....sandbox.session.base_sandbox_session import BaseSandboxSession
+from ....sandbox.session.dependencies import Dependencies
+from ....sandbox.session.manager import Instrumentation
+from ....sandbox.session.mount_lifecycle import with_ephemeral_mounts_removed
+from ....sandbox.session.runtime_helpers import RESOLVE_WORKSPACE_PATH_HELPER, RuntimeHelperScript
+from ....sandbox.session.sandbox_client import BaseSandboxClient, BaseSandboxClientOptions
+from ....sandbox.snapshot import SnapshotBase, SnapshotSpec, resolve_snapshot
+from ....sandbox.types import ExecResult, ExposedPortEndpoint, User
+from ....sandbox.util.retry import (
+    exception_chain_contains_type,
+    exception_chain_has_status_code,
+    retry_async,
+)
+from ....sandbox.util.tar_utils import UnsafeTarMemberError, validate_tar_bytes
+from ....sandbox.workspace_paths import (
+    coerce_posix_path,
+    posix_path_as_path,
+    sandbox_path_str,
+)
+
+DEFAULT_SUPERSERVE_WORKSPACE_ROOT = "/workspace"
+DEFAULT_SUPERSERVE_TEMPLATE = "superserve/base"
+_DEFAULT_MANIFEST_ROOT = cast(str, Manifest.model_fields["root"].default)
+_SUPERSERVE_TRANSIENT_STATUS_CODES: frozenset[int] = frozenset({408, 425, 429, 500, 502, 503, 504})
+
+logger = logging.getLogger(__name__)
+
+
+def _import_superserve_sdk() -> tuple[Any, Any]:
+    """Lazily import Superserve SDK classes, raising a clear error if missing."""
+    try:
+        from superserve import AsyncSandbox, NetworkConfig
+
+        return AsyncSandbox, NetworkConfig
+    except ImportError as exc:
+        raise ImportError(
+            "SuperserveSandboxClient requires the optional `superserve` dependency.\n"
+            "Install the Superserve extra before using this sandbox backend."
+        ) from exc
+
+
+def _import_superserve_errors() -> dict[str, type[BaseException]]:
+    """Best-effort import of Superserve exception classes for fine-grained mapping."""
+    try:
+        from superserve import (
+            AuthenticationError,
+            ConflictError,
+            NotFoundError,
+            SandboxError,
+            SandboxTimeoutError,
+            ServerError,
+            ValidationError,
+        )
+    except Exception:
+        return {}
+    return {
+        "base": SandboxError,
+        "authentication": AuthenticationError,
+        "validation": ValidationError,
+        "not_found": NotFoundError,
+        "conflict": ConflictError,
+        "timeout": SandboxTimeoutError,
+        "server": ServerError,
+    }
+
+
+def _provider_error_detail(error: BaseException) -> str | None:
+    message = str(error)
+    status = getattr(error, "status_code", None)
+    code = getattr(error, "code", None)
+    parts: list[str] = []
+    if isinstance(status, int):
+        parts.append(f"HTTP {status}")
+    if isinstance(code, str) and code:
+        parts.append(code)
+    if message:
+        parts.append(message)
+    if not parts:
+        return type(error).__name__
+    return ": ".join(parts)
+
+
+def _is_transient_error(exc: BaseException) -> bool:
+    return exception_chain_has_status_code(
+        exc, _SUPERSERVE_TRANSIENT_STATUS_CODES
+    ) or exception_chain_contains_type(exc, (asyncio.TimeoutError,))
+
+
+def _resolve_manifest_root(manifest: Manifest | None) -> Manifest:
+    if manifest is None:
+        return Manifest(root=DEFAULT_SUPERSERVE_WORKSPACE_ROOT)
+    if manifest.root == _DEFAULT_MANIFEST_ROOT:
+        return manifest.model_copy(update={"root": DEFAULT_SUPERSERVE_WORKSPACE_ROOT})
+    return manifest
+
+
+def _resolve_template(value: str | None) -> str:
+    return value or DEFAULT_SUPERSERVE_TEMPLATE
+
+
+class SuperserveSandboxTimeouts(BaseModel):
+    """Timeout configuration for Superserve sandbox operations (seconds)."""
+
+    model_config = {"frozen": True}
+
+    exec_timeout_unbounded_s: int = Field(default=24 * 60 * 60, ge=1)
+    keepalive_s: int = Field(default=10, ge=1)
+    cleanup_s: int = Field(default=30, ge=1)
+    fast_op_s: int = Field(default=30, ge=1)
+    file_upload_s: int = Field(default=300, ge=1)
+    file_download_s: int = Field(default=300, ge=1)
+    workspace_tar_s: int = Field(default=300, ge=1)
+
+
+class SuperserveSandboxClientOptions(BaseSandboxClientOptions):
+    """Client options for the Superserve sandbox backend."""
+
+    type: Literal["superserve"] = "superserve"
+    template: str | None = None
+    name: str | None = None
+    env_vars: dict[str, str] | None = None
+    metadata: dict[str, str] | None = None
+    network: dict[str, object] | None = None
+    timeout_seconds: int | None = None
+    pause_on_exit: bool = False
+    api_key: str | None = None
+    base_url: str | None = None
+    exposed_ports: tuple[int, ...] = ()
+    timeouts: SuperserveSandboxTimeouts | dict[str, object] | None = None
+
+    def __init__(
+        self,
+        template: str | None = None,
+        name: str | None = None,
+        env_vars: dict[str, str] | None = None,
+        metadata: dict[str, str] | None = None,
+        network: dict[str, object] | None = None,
+        timeout_seconds: int | None = None,
+        pause_on_exit: bool = False,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        exposed_ports: tuple[int, ...] = (),
+        timeouts: SuperserveSandboxTimeouts | dict[str, object] | None = None,
+        *,
+        type: Literal["superserve"] = "superserve",
+    ) -> None:
+        super().__init__(
+            type=type,
+            template=template,
+            name=name,
+            env_vars=env_vars,
+            metadata=metadata,
+            network=network,
+            timeout_seconds=timeout_seconds,
+            pause_on_exit=pause_on_exit,
+            api_key=api_key,
+            base_url=base_url,
+            exposed_ports=exposed_ports,
+            timeouts=timeouts,
+        )
+
+
+class SuperserveSandboxSessionState(SandboxSessionState):
+    """Serializable state for a Superserve-backed session."""
+
+    type: Literal["superserve"] = "superserve"
+    sandbox_id: str
+    template: str = DEFAULT_SUPERSERVE_TEMPLATE
+    name: str | None = None
+    base_env_vars: dict[str, str] = Field(default_factory=dict)
+    base_metadata: dict[str, str] = Field(default_factory=dict)
+    base_network: dict[str, object] | None = None
+    timeout_seconds: int | None = None
+    pause_on_exit: bool = False
+    base_url: str | None = None
+    api_key: str | None = None
+    timeouts: SuperserveSandboxTimeouts = Field(default_factory=SuperserveSandboxTimeouts)
+
+
+class SuperserveSandboxSession(BaseSandboxSession):
+    """SandboxSession implementation backed by a Superserve sandbox."""
+
+    state: SuperserveSandboxSessionState
+    _sandbox: Any | None
+
+    def __init__(
+        self,
+        *,
+        state: SuperserveSandboxSessionState,
+        sandbox: Any | None = None,
+    ) -> None:
+        self.state = state
+        self._sandbox = sandbox
+
+    @classmethod
+    def from_state(
+        cls,
+        state: SuperserveSandboxSessionState,
+        *,
+        sandbox: Any | None = None,
+    ) -> SuperserveSandboxSession:
+        return cls(state=state, sandbox=sandbox)
+
+    @property
+    def sandbox_id(self) -> str:
+        return self.state.sandbox_id
+
+    def supports_pty(self) -> bool:
+        return False
+
+    def _reject_user_arg(
+        self, *, op: Literal["exec", "read", "write"], user: str | User
+    ) -> None:
+        user_name = user.name if isinstance(user, User) else user
+        raise ConfigurationError(
+            message=(
+                "SuperserveSandboxSession does not support sandbox-local users; "
+                f"`{op}` must be called without `user`"
+            ),
+            error_code=ErrorCode.SANDBOX_CONFIG_INVALID,
+            op=op,
+            context={"backend": "superserve", "user": user_name},
+        )
+
+    def _prepare_exec_command(
+        self,
+        *command: str | Path,
+        shell: bool | list[str],
+        user: str | User | None,
+    ) -> list[str]:
+        if user is not None:
+            self._reject_user_arg(op="exec", user=user)
+        return super()._prepare_exec_command(*command, shell=shell, user=user)
+
+    async def _validate_path_access(self, path: Path | str, *, for_write: bool = False) -> Path:
+        return await self._validate_remote_path_access(path, for_write=for_write)
+
+    def _runtime_helpers(self) -> tuple[RuntimeHelperScript, ...]:
+        return (RESOLVE_WORKSPACE_PATH_HELPER,)
+
+    async def _resolved_envs(self) -> dict[str, str]:
+        manifest_envs = await self.state.manifest.environment.resolve()
+        resolved: dict[str, str] = {}
+        for key, value in {**self.state.base_env_vars, **manifest_envs}.items():
+            if value is None:
+                continue
+            resolved[key] = value
+        return resolved
+
+    async def _ensure_sandbox(self) -> Any:
+        sandbox = self._sandbox
+        if sandbox is not None:
+            return sandbox
+
+        AsyncSandbox, NetworkConfig = _import_superserve_sdk()
+        env_vars = await self._resolved_envs()
+        network_payload = self.state.base_network
+        network = (
+            NetworkConfig.model_validate(network_payload) if network_payload is not None else None
+        )
+        try:
+            sandbox = await AsyncSandbox.create(
+                name=self.state.name or self.state.session_id.hex,
+                from_template=self.state.template,
+                timeout_seconds=self.state.timeout_seconds,
+                metadata=dict(self.state.base_metadata) or None,
+                env_vars=env_vars or None,
+                network=network,
+                api_key=self.state.api_key,
+                base_url=self.state.base_url,
+            )
+        except Exception as exc:
+            raise WorkspaceStartError(
+                path=self._workspace_root_path(),
+                context={"backend": "superserve", "reason": "create_failed"},
+                cause=exc,
+                message=f"failed to start Superserve sandbox: {_provider_error_detail(exc)}",
+            ) from exc
+
+        self._sandbox = sandbox
+        self.state.sandbox_id = sandbox.id
+        return sandbox
+
+    async def _prepare_backend_workspace(self) -> None:
+        root = self._workspace_root_path()
+        sandbox = await self._ensure_sandbox()
+        try:
+            result = await sandbox.commands.run(
+                f"mkdir -p -- {_shell_quote(root.as_posix())}",
+                timeout_seconds=self.state.timeouts.fast_op_s,
+            )
+        except Exception as exc:
+            raise WorkspaceStartError(
+                path=root,
+                context={"backend": "superserve", "reason": "workspace_root_setup_failed"},
+                cause=exc,
+                message=(
+                    "failed to start session: Superserve workspace root setup failed: "
+                    f"{_provider_error_detail(exc)}"
+                ),
+            ) from exc
+
+        exit_code = int(getattr(result, "exit_code", 0) or 0)
+        if exit_code != 0:
+            stdout = getattr(result, "stdout", "") or ""
+            stderr = getattr(result, "stderr", "") or ""
+            raise WorkspaceStartError(
+                path=root,
+                context={
+                    "backend": "superserve",
+                    "reason": "workspace_root_nonzero_exit",
+                    "exit_code": exit_code,
+                    "stdout": stdout,
+                    "stderr": stderr,
+                },
+                message=(
+                    f"failed to start session: Superserve workspace root setup exited with "
+                    f"{exit_code}"
+                ),
+            )
+
+    async def running(self) -> bool:
+        sandbox = self._sandbox
+        if sandbox is None:
+            return False
+        try:
+            info = await asyncio.wait_for(
+                sandbox.get_info(),
+                timeout=self.state.timeouts.keepalive_s,
+            )
+        except Exception:
+            return False
+        status = getattr(info, "status", None)
+        status_value = getattr(status, "value", status)
+        return status_value == "active"
+
+    async def shutdown(self) -> None:
+        await self._shutdown_backend()
+
+    async def _shutdown_backend(self) -> None:
+        sandbox = self._sandbox
+        if sandbox is None:
+            return
+        try:
+            if self.state.pause_on_exit:
+                await sandbox.pause()
+            else:
+                await sandbox.kill()
+        except Exception:
+            pass
+        finally:
+            self._sandbox = None
+
+    async def _exec_internal(
+        self,
+        *command: str | Path,
+        timeout: float | None = None,
+    ) -> ExecResult:
+        sandbox = await self._ensure_sandbox()
+        sup_errors = _import_superserve_errors()
+        timeout_exc = sup_errors.get("timeout")
+        normalized = [str(part) for part in command]
+        if not normalized:
+            return ExecResult(stdout=b"", stderr=b"", exit_code=0)
+
+        command_str = " ".join(_shell_quote(part) for part in normalized)
+        envs = await self._resolved_envs()
+        cwd = sandbox_path_str(self.state.manifest.root)
+        timeout_seconds = (
+            None
+            if timeout is None
+            else max(1, int(timeout + 0.999))  # round up; Superserve only accepts ints
+        )
+
+        try:
+            result = await sandbox.commands.run(
+                command_str,
+                cwd=cwd,
+                env=envs or None,
+                timeout_seconds=timeout_seconds,
+            )
+        except asyncio.TimeoutError as exc:
+            raise ExecTimeoutError(
+                command=tuple(normalized), timeout_s=timeout, cause=exc
+            ) from exc
+        except Exception as exc:
+            if timeout_exc is not None and isinstance(exc, timeout_exc):
+                raise ExecTimeoutError(
+                    command=tuple(normalized), timeout_s=timeout, cause=exc
+                ) from exc
+            raise ExecTransportError(
+                command=tuple(normalized),
+                context={
+                    "backend": "superserve",
+                    "sandbox_id": self.state.sandbox_id,
+                    "provider_error": _provider_error_detail(exc),
+                },
+                cause=exc,
+            ) from exc
+
+        stdout = (getattr(result, "stdout", "") or "").encode("utf-8", errors="replace")
+        stderr = (getattr(result, "stderr", "") or "").encode("utf-8", errors="replace")
+        exit_code = int(getattr(result, "exit_code", 0) or 0)
+        return ExecResult(stdout=stdout, stderr=stderr, exit_code=exit_code)
+
+    async def _resolve_exposed_port(self, port: int) -> ExposedPortEndpoint:
+        raise ExposedPortUnavailableError(
+            port=port,
+            exposed_ports=self.state.exposed_ports,
+            reason="backend_unavailable",
+            context={
+                "backend": "superserve",
+                "detail": "exposed_ports_not_supported",
+            },
+        )
+
+    async def read(self, path: Path, *, user: str | User | None = None) -> io.IOBase:
+        if user is not None:
+            self._reject_user_arg(op="read", user=user)
+        sup_errors = _import_superserve_errors()
+        not_found_exc = sup_errors.get("not_found")
+
+        normalized_path = await self._validate_path_access(path)
+        sandbox = await self._ensure_sandbox()
+        try:
+            payload = await sandbox.files.read(
+                sandbox_path_str(normalized_path),
+                timeout=self.state.timeouts.file_download_s,
+            )
+        except Exception as exc:
+            if not_found_exc is not None and isinstance(exc, not_found_exc):
+                raise WorkspaceReadNotFoundError(path=normalized_path, cause=exc) from exc
+            raise WorkspaceArchiveReadError(path=normalized_path, cause=exc) from exc
+        return io.BytesIO(payload)
+
+    async def write(
+        self,
+        path: Path,
+        data: io.IOBase,
+        *,
+        user: str | User | None = None,
+    ) -> None:
+        if user is not None:
+            self._reject_user_arg(op="write", user=user)
+
+        normalized_path = await self._validate_path_access(path, for_write=True)
+        payload = data.read()
+        if isinstance(payload, str):
+            payload = payload.encode("utf-8")
+        if not isinstance(payload, bytes | bytearray):
+            raise WorkspaceWriteTypeError(
+                path=normalized_path,
+                actual_type=type(payload).__name__,
+            )
+        try:
+            await self._write_bytes_with_retry(
+                sandbox_path_str(normalized_path), bytes(payload)
+            )
+        except Exception as exc:
+            raise WorkspaceArchiveWriteError(path=normalized_path, cause=exc) from exc
+
+    @retry_async(
+        retry_if=lambda exc, self, _path, _data: _is_transient_error(exc),
+    )
+    async def _write_bytes_with_retry(self, path: str, data: bytes) -> None:
+        sandbox = await self._ensure_sandbox()
+        await sandbox.files.write(path, data, timeout=self.state.timeouts.file_upload_s)
+
+    async def persist_workspace(self) -> io.IOBase:
+        return await with_ephemeral_mounts_removed(
+            self,
+            self._persist_workspace_internal,
+            error_path=self._workspace_root_path(),
+            error_cls=WorkspaceArchiveReadError,
+            operation_error_context_key="snapshot_error_before_remount_corruption",
+        )
+
+    async def _persist_workspace_internal(self) -> io.IOBase:
+        root = self._workspace_root_path()
+        archive_path = posix_path_as_path(
+            coerce_posix_path(f"/tmp/openai-agents-{self.state.session_id.hex}.tar")
+        )
+        excludes = [
+            f"--exclude=./{rel_path.as_posix()}"
+            for rel_path in sorted(
+                self._persist_workspace_skip_relpaths(),
+                key=lambda item: item.as_posix(),
+            )
+        ]
+        tar_command = ("tar", "cf", archive_path.as_posix(), *excludes, ".")
+
+        sandbox = await self._ensure_sandbox()
+        sup_errors = _import_superserve_errors()
+        not_found_exc = sup_errors.get("not_found")
+
+        try:
+            result = await self.exec(*tar_command, shell=False)
+            if not result.ok():
+                raise WorkspaceArchiveReadError(
+                    path=root,
+                    cause=ExecNonZeroError(
+                        result,
+                        command=tar_command,
+                        context={
+                            "backend": "superserve",
+                            "sandbox_id": self.state.sandbox_id,
+                        },
+                    ),
+                )
+
+            try:
+                archive = await sandbox.files.read(
+                    archive_path.as_posix(),
+                    timeout=self.state.timeouts.file_download_s,
+                )
+            except Exception as exc:
+                if not_found_exc is not None and isinstance(exc, not_found_exc):
+                    raise WorkspaceReadNotFoundError(path=archive_path, cause=exc) from exc
+                raise
+
+            return io.BytesIO(archive)
+        except (WorkspaceArchiveReadError, WorkspaceReadNotFoundError):
+            raise
+        except Exception as exc:
+            raise WorkspaceArchiveReadError(path=root, cause=exc) from exc
+        finally:
+            try:
+                await self.exec(
+                    "rm",
+                    "-f",
+                    "--",
+                    archive_path.as_posix(),
+                    shell=False,
+                )
+            except Exception:
+                pass
+
+    async def hydrate_workspace(self, data: io.IOBase) -> None:
+        raw = data.read()
+        if isinstance(raw, str):
+            raw = raw.encode("utf-8")
+        if not isinstance(raw, bytes | bytearray):
+            raise WorkspaceWriteTypeError(
+                path=self._workspace_root_path(),
+                actual_type=type(raw).__name__,
+            )
+
+        await with_ephemeral_mounts_removed(
+            self,
+            lambda: self._hydrate_workspace_internal(bytes(raw)),
+            error_path=self._workspace_root_path(),
+            error_cls=WorkspaceArchiveWriteError,
+            operation_error_context_key="hydrate_error_before_remount_corruption",
+        )
+
+    async def _hydrate_workspace_internal(self, raw: bytes) -> None:
+        root = self._workspace_root_path()
+        archive_path = posix_path_as_path(
+            coerce_posix_path(f"/tmp/openai-agents-{self.state.session_id.hex}.tar")
+        )
+        tar_command = ("tar", "xf", archive_path.as_posix(), "-C", root.as_posix())
+
+        try:
+            validate_tar_bytes(raw, allow_external_symlink_targets=False)
+        except UnsafeTarMemberError as exc:
+            raise WorkspaceArchiveWriteError(
+                path=root,
+                context={
+                    "reason": "unsafe_or_invalid_tar",
+                    "member": exc.member,
+                    "detail": str(exc),
+                },
+                cause=exc,
+            ) from exc
+
+        try:
+            await self.mkdir(root, parents=True)
+            await self._write_bytes_with_retry(archive_path.as_posix(), raw)
+            result = await self.exec(*tar_command, shell=False)
+            if not result.ok():
+                raise WorkspaceArchiveWriteError(
+                    path=root,
+                    cause=ExecNonZeroError(
+                        result,
+                        command=tar_command,
+                        context={
+                            "backend": "superserve",
+                            "sandbox_id": self.state.sandbox_id,
+                        },
+                    ),
+                )
+        except WorkspaceArchiveWriteError:
+            raise
+        except Exception as exc:
+            raise WorkspaceArchiveWriteError(path=root, cause=exc) from exc
+        finally:
+            try:
+                await self.exec(
+                    "rm",
+                    "-f",
+                    "--",
+                    archive_path.as_posix(),
+                    shell=False,
+                )
+            except Exception:
+                pass
+
+
+def _shell_quote(value: str) -> str:
+    """Minimal shlex.quote without importing shlex twice — keeps Vercel/Daytona-style quoting."""
+    import shlex
+
+    return shlex.quote(value)
+
+
+class SuperserveSandboxClient(BaseSandboxClient[SuperserveSandboxClientOptions]):
+    """Superserve-backed sandbox client managing sandbox lifecycle via AsyncSandbox."""
+
+    backend_id = "superserve"
+    _instrumentation: Instrumentation
+    _api_key: str | None
+    _base_url: str | None
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        instrumentation: Instrumentation | None = None,
+        dependencies: Dependencies | None = None,
+    ) -> None:
+        super().__init__()
+        self._api_key = api_key
+        self._base_url = base_url
+        self._instrumentation = instrumentation or Instrumentation()
+        self._dependencies = dependencies
+
+    def _resolve_timeouts(
+        self,
+        value: SuperserveSandboxTimeouts | dict[str, object] | None,
+    ) -> SuperserveSandboxTimeouts:
+        if isinstance(value, SuperserveSandboxTimeouts):
+            return value
+        if value is None:
+            return SuperserveSandboxTimeouts()
+        return SuperserveSandboxTimeouts.model_validate(value)
+
+    async def create(
+        self,
+        *,
+        snapshot: SnapshotSpec | SnapshotBase | None = None,
+        manifest: Manifest | None = None,
+        options: SuperserveSandboxClientOptions,
+    ) -> SandboxSession:
+        resolved_manifest = _resolve_manifest_root(manifest)
+        timeouts = self._resolve_timeouts(options.timeouts)
+        api_key = options.api_key or self._api_key
+        base_url = options.base_url or self._base_url
+        template = _resolve_template(options.template)
+
+        session_id = uuid.uuid4()
+        sandbox_name = options.name or f"openai-agents-{session_id.hex[:12]}"
+        snapshot_instance = resolve_snapshot(snapshot, str(session_id))
+
+        state = SuperserveSandboxSessionState(
+            session_id=session_id,
+            manifest=resolved_manifest,
+            snapshot=snapshot_instance,
+            sandbox_id="",
+            template=template,
+            name=sandbox_name,
+            base_env_vars=dict(options.env_vars or {}),
+            base_metadata=dict(options.metadata or {}),
+            base_network=dict(options.network) if options.network is not None else None,
+            timeout_seconds=options.timeout_seconds,
+            pause_on_exit=options.pause_on_exit,
+            base_url=base_url,
+            api_key=api_key,
+            timeouts=timeouts,
+            exposed_ports=options.exposed_ports,
+        )
+        inner = SuperserveSandboxSession.from_state(state)
+        await inner._ensure_sandbox()
+        return self._wrap_session(inner, instrumentation=self._instrumentation)
+
+    async def delete(self, session: SandboxSession) -> SandboxSession:
+        inner = session._inner
+        if not isinstance(inner, SuperserveSandboxSession):
+            raise TypeError(
+                "SuperserveSandboxClient.delete expects a SuperserveSandboxSession"
+            )
+        try:
+            await inner.shutdown()
+        except Exception:
+            pass
+        return session
+
+    async def resume(self, state: SandboxSessionState) -> SandboxSession:
+        if not isinstance(state, SuperserveSandboxSessionState):
+            raise TypeError(
+                "SuperserveSandboxClient.resume expects a SuperserveSandboxSessionState"
+            )
+
+        AsyncSandbox, _ = _import_superserve_sdk()
+        sup_errors = _import_superserve_errors()
+        not_found_exc = sup_errors.get("not_found")
+
+        api_key = state.api_key or self._api_key
+        base_url = state.base_url or self._base_url
+        if state.api_key is None and api_key is not None:
+            state.api_key = api_key
+        if state.base_url is None and base_url is not None:
+            state.base_url = base_url
+
+        sandbox: Any | None = None
+        reconnected = False
+
+        if state.sandbox_id:
+            try:
+                sandbox = await AsyncSandbox.connect(
+                    state.sandbox_id,
+                    api_key=api_key,
+                    base_url=base_url,
+                )
+                status = getattr(sandbox, "status", None)
+                status_value = getattr(status, "value", status)
+                if status_value == "paused":
+                    await sandbox.resume()
+                elif status_value == "resuming":
+                    await sandbox.resume()
+                elif status_value == "failed":
+                    sandbox = None
+                # else status_value == "active" → already running
+                if sandbox is not None:
+                    reconnected = True
+            except Exception as exc:
+                if not_found_exc is not None and isinstance(exc, not_found_exc):
+                    logger.debug(
+                        "superserve sandbox %s not found, will recreate", state.sandbox_id
+                    )
+                else:
+                    logger.debug(
+                        "superserve connect/resume failed (will recreate): %s", exc
+                    )
+                sandbox = None
+
+        if sandbox is None:
+            state.sandbox_id = ""
+            state.workspace_root_ready = False
+
+        inner = SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+        if sandbox is None:
+            await inner._ensure_sandbox()
+        inner._set_start_state_preserved(reconnected, system=reconnected)
+        return self._wrap_session(inner, instrumentation=self._instrumentation)
+
+    def deserialize_session_state(self, payload: dict[str, object]) -> SandboxSessionState:
+        return SuperserveSandboxSessionState.model_validate(payload)
+
+
+__all__ = [
+    "DEFAULT_SUPERSERVE_TEMPLATE",
+    "DEFAULT_SUPERSERVE_WORKSPACE_ROOT",
+    "SuperserveSandboxClient",
+    "SuperserveSandboxClientOptions",
+    "SuperserveSandboxSession",
+    "SuperserveSandboxSessionState",
+    "SuperserveSandboxTimeouts",
+]
diff --git a/tests/extensions/sandbox/test_superserve.py b/tests/extensions/sandbox/test_superserve.py
new file mode 100644
index 0000000000..01540cd054
--- /dev/null
+++ b/tests/extensions/sandbox/test_superserve.py
@@ -0,0 +1,760 @@
+from __future__ import annotations
+
+import importlib
+import io
+import sys
+import tarfile
+import types
+from pathlib import Path
+from typing import Any, cast
+
+import pytest
+from pydantic import BaseModel
+
+from agents.sandbox import Manifest
+from agents.sandbox.entries import File
+from agents.sandbox.errors import (
+    ConfigurationError,
+    ExposedPortUnavailableError,
+    InvalidManifestPathError,
+)
+from agents.sandbox.snapshot import NoopSnapshot
+from agents.sandbox.types import User
+from tests._fake_workspace_paths import resolve_fake_workspace_path
+
+
+class _FakeCommandResult:
+    def __init__(self, *, stdout: str = "", stderr: str = "", exit_code: int = 0) -> None:
+        self.stdout = stdout
+        self.stderr = stderr
+        self.exit_code = exit_code
+
+
+class _FakeSandboxInfo(BaseModel):
+    status: str = "active"
+
+
+class _FakeNetworkConfig(BaseModel):
+    allow_out: list[str] | None = None
+    deny_out: list[str] | None = None
+
+
+class _SuperserveNotFoundError(Exception):
+    status_code = 404
+
+
+class _SuperserveAuthenticationError(Exception):
+    status_code = 401
+
+
+class _SuperserveValidationError(Exception):
+    status_code = 400
+
+
+class _SuperserveConflictError(Exception):
+    status_code = 409
+
+
+class _SuperserveServerError(Exception):
+    status_code = 500
+
+
+class _SuperserveSandboxTimeoutError(Exception):
+    pass
+
+
+class _SuperserveSandboxError(Exception):
+    pass
+
+
+class _FakeCommands:
+    def __init__(self, sandbox: _FakeAsyncSandbox) -> None:
+        self._sandbox = sandbox
+        self.calls: list[dict[str, object]] = []
+
+    async def run(
+        self,
+        command: str,
+        *,
+        cwd: str | None = None,
+        env: dict[str, str] | None = None,
+        timeout_seconds: int | None = None,
+        on_stdout: object | None = None,
+        on_stderr: object | None = None,
+    ) -> _FakeCommandResult:
+        _ = (on_stdout, on_stderr)
+        self.calls.append(
+            {
+                "command": command,
+                "cwd": cwd,
+                "env": dict(env) if env is not None else None,
+                "timeout_seconds": timeout_seconds,
+            }
+        )
+        # Test hooks can override the next result or throw.
+        if self._sandbox.command_failures:
+            raise self._sandbox.command_failures.pop(0)
+        next_result = (
+            self._sandbox.command_results.pop(0) if self._sandbox.command_results else None
+        )
+        if next_result is not None:
+            return next_result
+
+        # Handle workspace-path resolution helper used by the base session
+        # for `_validate_remote_path_access`.
+        resolved = resolve_fake_workspace_path(
+            command,
+            symlinks=self._sandbox.symlinks,
+            home_dir="/workspace",
+        )
+        if resolved is not None:
+            return _FakeCommandResult(
+                exit_code=resolved.exit_code,
+                stdout=resolved.stdout,
+                stderr=resolved.stderr,
+            )
+
+        # Built-in handlers for common shell shapes used by the session.
+        if command.startswith("mkdir -p"):
+            return _FakeCommandResult(exit_code=0)
+        if command.startswith("tar cf"):
+            # tar cf <path> [--exclude=./X ...] .
+            tokens = command.split()
+            archive_path = tokens[2]
+            include_root = tokens[-1] == "."
+            exclusions = {
+                token.removeprefix("--exclude=./")
+                for token in tokens
+                if token.startswith("--exclude=./")
+            }
+            cwd_eff = cwd or "/"
+            buffer = io.BytesIO()
+            with tarfile.open(fileobj=buffer, mode="w") as archive:
+                for path, content in sorted(self._sandbox._file_store.items()):
+                    if not path.startswith(cwd_eff.rstrip("/") + "/"):
+                        continue
+                    rel_path = path[len(cwd_eff.rstrip("/")) + 1 :]
+                    if any(
+                        rel_path == exclusion or rel_path.startswith(f"{exclusion}/")
+                        for exclusion in exclusions
+                    ):
+                        continue
+                    info = tarfile.TarInfo(name=rel_path if include_root else path)
+                    info.size = len(content)
+                    archive.addfile(info, io.BytesIO(content))
+            self._sandbox._file_store[archive_path] = buffer.getvalue()
+            return _FakeCommandResult(exit_code=0)
+        if command.startswith("tar xf"):
+            tokens = command.split()
+            archive_path = tokens[2]
+            destination = tokens[-1]
+            raw = self._sandbox._file_store.get(archive_path)
+            if raw is None:
+                return _FakeCommandResult(exit_code=1, stderr="archive missing")
+            with tarfile.open(fileobj=io.BytesIO(raw), mode="r") as archive:
+                for member in archive.getmembers():
+                    if not member.isfile():
+                        continue
+                    extracted = archive.extractfile(member)
+                    assert extracted is not None
+                    self._sandbox._file_store[
+                        f"{destination.rstrip('/')}/{member.name}"
+                    ] = extracted.read()
+            return _FakeCommandResult(exit_code=0)
+        if command.startswith("rm -f --"):
+            for token in command.split()[3:]:
+                self._sandbox._file_store.pop(token, None)
+            return _FakeCommandResult(exit_code=0)
+        return _FakeCommandResult(exit_code=0)
+
+
+class _FakeFiles:
+    def __init__(self, sandbox: _FakeAsyncSandbox) -> None:
+        self._sandbox = sandbox
+        self.write_calls: list[tuple[str, bytes]] = []
+        self.read_calls: list[str] = []
+
+    async def write(self, path: str, content: bytes | str, *, timeout: float | None = None) -> None:
+        _ = timeout
+        if self._sandbox.write_failures:
+            raise self._sandbox.write_failures.pop(0)
+        payload = content.encode("utf-8") if isinstance(content, str) else bytes(content)
+        self.write_calls.append((path, payload))
+        self._sandbox._file_store[path] = payload
+
+    async def read(self, path: str, *, timeout: float | None = None) -> bytes:
+        _ = timeout
+        self.read_calls.append(path)
+        if path not in self._sandbox._file_store:
+            raise _SuperserveNotFoundError(f"missing {path}")
+        return self._sandbox._file_store[path]
+
+
+class _FakeAsyncSandbox:
+    create_calls: list[dict[str, object]] = []
+    connect_calls: list[dict[str, object]] = []
+    sandboxes: dict[str, _FakeAsyncSandbox] = {}
+    fail_connect_ids: set[str] = set()
+    create_failures: list[BaseException] = []
+
+    def __init__(self, *, sandbox_id: str, status: str = "active") -> None:
+        self.id = sandbox_id
+        self.name = sandbox_id
+        self.status = status
+        self.metadata: dict[str, str] = {}
+        self._file_store: dict[str, bytes] = {}
+        self.symlinks: dict[str, str] = {}
+        self.command_results: list[_FakeCommandResult] = []
+        self.command_failures: list[BaseException] = []
+        self.write_failures: list[BaseException] = []
+        self.pause_calls = 0
+        self.resume_calls = 0
+        self.kill_calls = 0
+        self.commands = _FakeCommands(self)
+        self.files = _FakeFiles(self)
+
+    @classmethod
+    def reset(cls) -> None:
+        cls.create_calls = []
+        cls.connect_calls = []
+        cls.sandboxes = {}
+        cls.fail_connect_ids = set()
+        cls.create_failures = []
+
+    @classmethod
+    async def create(cls, **kwargs: object) -> _FakeAsyncSandbox:
+        cls.create_calls.append(dict(kwargs))
+        if cls.create_failures:
+            raise cls.create_failures.pop(0)
+        sandbox_id = f"sup-{len(cls.create_calls)}"
+        sandbox = cls(sandbox_id=sandbox_id)
+        sandbox.metadata = dict(cast(dict[str, str], kwargs.get("metadata") or {}))
+        cls.sandboxes[sandbox_id] = sandbox
+        return sandbox
+
+    @classmethod
+    async def connect(cls, sandbox_id: str, **kwargs: object) -> _FakeAsyncSandbox:
+        cls.connect_calls.append({"sandbox_id": sandbox_id, **kwargs})
+        if sandbox_id in cls.fail_connect_ids:
+            raise _SuperserveNotFoundError(f"sandbox {sandbox_id} not found")
+        sandbox = cls.sandboxes.get(sandbox_id)
+        if sandbox is None:
+            raise _SuperserveNotFoundError(f"sandbox {sandbox_id} not found")
+        return sandbox
+
+    async def get_info(self) -> _FakeSandboxInfo:
+        return _FakeSandboxInfo(status=self.status)
+
+    async def pause(self) -> None:
+        self.pause_calls += 1
+        self.status = "paused"
+
+    async def resume(self) -> None:
+        self.resume_calls += 1
+        self.status = "active"
+
+    async def kill(self) -> None:
+        self.kill_calls += 1
+        self.status = "deleted"
+
+def _load_superserve_module(monkeypatch: pytest.MonkeyPatch) -> Any:
+    _FakeAsyncSandbox.reset()
+
+    fake_module = types.ModuleType("superserve")
+    fake_module.AsyncSandbox = _FakeAsyncSandbox  # type: ignore[attr-defined]
+    fake_module.NetworkConfig = _FakeNetworkConfig  # type: ignore[attr-defined]
+    fake_module.NotFoundError = _SuperserveNotFoundError  # type: ignore[attr-defined]
+    fake_module.AuthenticationError = _SuperserveAuthenticationError  # type: ignore[attr-defined]
+    fake_module.ValidationError = _SuperserveValidationError  # type: ignore[attr-defined]
+    fake_module.ConflictError = _SuperserveConflictError  # type: ignore[attr-defined]
+    fake_module.ServerError = _SuperserveServerError  # type: ignore[attr-defined]
+    fake_module.SandboxTimeoutError = _SuperserveSandboxTimeoutError  # type: ignore[attr-defined]
+    fake_module.SandboxError = _SuperserveSandboxError  # type: ignore[attr-defined]
+
+    monkeypatch.setitem(sys.modules, "superserve", fake_module)
+    sys.modules.pop("agents.extensions.sandbox.superserve.sandbox", None)
+    sys.modules.pop("agents.extensions.sandbox.superserve", None)
+
+    return importlib.import_module("agents.extensions.sandbox.superserve.sandbox")
+
+
+# ---------------------------------------------------------------------------
+# Package re-exports & basic shape
+# ---------------------------------------------------------------------------
+
+
+def test_superserve_package_re_exports_backend_symbols(monkeypatch: pytest.MonkeyPatch) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    package_module = importlib.import_module("agents.extensions.sandbox.superserve")
+
+    assert package_module.SuperserveSandboxClient is superserve_module.SuperserveSandboxClient
+    assert (
+        package_module.SuperserveSandboxSessionState
+        is superserve_module.SuperserveSandboxSessionState
+    )
+    assert (
+        package_module.DEFAULT_SUPERSERVE_WORKSPACE_ROOT
+        == superserve_module.DEFAULT_SUPERSERVE_WORKSPACE_ROOT
+    )
+
+
+def test_superserve_supports_pty_is_false(monkeypatch: pytest.MonkeyPatch) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000001",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+    )
+    session = superserve_module.SuperserveSandboxSession.from_state(state)
+    assert not session.supports_pty()
+
+
+def test_superserve_options_round_trip(monkeypatch: pytest.MonkeyPatch) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    options = superserve_module.SuperserveSandboxClientOptions(
+        template="superserve/python-3.11",
+        env_vars={"HELLO": "world"},
+        metadata={"team": "agents"},
+        pause_on_exit=True,
+        timeout_seconds=300,
+    )
+    dumped = options.model_dump(mode="json")
+    rebuilt = superserve_module.SuperserveSandboxClientOptions.model_validate(dumped)
+    assert rebuilt.template == "superserve/python-3.11"
+    assert rebuilt.env_vars == {"HELLO": "world"}
+    assert rebuilt.metadata == {"team": "agents"}
+    assert rebuilt.pause_on_exit is True
+    assert rebuilt.timeout_seconds == 300
+
+
+def test_superserve_session_state_round_trip(monkeypatch: pytest.MonkeyPatch) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000099",
+        manifest=Manifest(root="/workspace"),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+        template="superserve/node-22",
+        pause_on_exit=True,
+        base_env_vars={"FLAG": "1"},
+    )
+    payload = state.model_dump(mode="json")
+    client = superserve_module.SuperserveSandboxClient()
+    rebuilt = client.deserialize_session_state(payload)
+    assert isinstance(rebuilt, superserve_module.SuperserveSandboxSessionState)
+    assert rebuilt.sandbox_id == "sup-existing"
+    assert rebuilt.template == "superserve/node-22"
+    assert rebuilt.pause_on_exit is True
+    assert rebuilt.base_env_vars == {"FLAG": "1"}
+
+
+# ---------------------------------------------------------------------------
+# create()
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_superserve_create_passes_provider_options(monkeypatch: pytest.MonkeyPatch) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    client = superserve_module.SuperserveSandboxClient()
+
+    session = await client.create(
+        manifest=Manifest(),
+        options=superserve_module.SuperserveSandboxClientOptions(
+            template="superserve/python-3.11",
+            env_vars={"HELLO": "world"},
+            metadata={"team": "agents"},
+            timeout_seconds=600,
+        ),
+    )
+
+    assert len(_FakeAsyncSandbox.create_calls) == 1
+    call = _FakeAsyncSandbox.create_calls[0]
+    assert call["from_template"] == "superserve/python-3.11"
+    assert call["env_vars"] == {"HELLO": "world"}
+    assert call["metadata"] == {"team": "agents"}
+    assert call["timeout_seconds"] == 600
+    assert session._inner.state.sandbox_id == "sup-1"
+    assert (
+        session._inner.state.manifest.root
+        == superserve_module.DEFAULT_SUPERSERVE_WORKSPACE_ROOT
+    )
+    assert session._inner.state.template == "superserve/python-3.11"
+
+
+@pytest.mark.asyncio
+async def test_superserve_create_uses_default_template(monkeypatch: pytest.MonkeyPatch) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    client = superserve_module.SuperserveSandboxClient()
+
+    session = await client.create(
+        manifest=Manifest(),
+        options=superserve_module.SuperserveSandboxClientOptions(),
+    )
+
+    call = _FakeAsyncSandbox.create_calls[0]
+    assert call["from_template"] == superserve_module.DEFAULT_SUPERSERVE_TEMPLATE
+    assert session._inner.state.template == superserve_module.DEFAULT_SUPERSERVE_TEMPLATE
+
+
+@pytest.mark.asyncio
+async def test_superserve_create_allows_manifest_root_outside_provider_workspace(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    client = superserve_module.SuperserveSandboxClient()
+
+    session = await client.create(
+        manifest=Manifest(root="/tmp/outside"),
+        options=superserve_module.SuperserveSandboxClientOptions(),
+    )
+
+    assert session._inner.state.manifest.root == "/tmp/outside"
+
+
+# ---------------------------------------------------------------------------
+# exec / read / write
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_superserve_exec_propagates_command_result(monkeypatch: pytest.MonkeyPatch) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000002",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    sandbox.command_results.append(
+        _FakeCommandResult(stdout="hello\n", stderr="warn\n", exit_code=0)
+    )
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    result = await session.exec("echo", "hello", shell=False)
+
+    assert result.ok()
+    assert result.stdout == b"hello\n"
+    assert result.stderr == b"warn\n"
+    assert sandbox.commands.calls[0]["cwd"] == "/workspace"
+
+
+@pytest.mark.asyncio
+async def test_superserve_exec_translates_timeout_and_transport(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000003",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    sandbox.command_failures.append(_SuperserveSandboxTimeoutError("slow"))
+    sandbox.command_failures.append(_SuperserveServerError("boom"))
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    with pytest.raises(superserve_module.ExecTimeoutError):
+        await session.exec("sleep", "1000", shell=False)
+    with pytest.raises(superserve_module.ExecTransportError):
+        await session.exec("true", shell=False)
+
+
+@pytest.mark.asyncio
+async def test_superserve_read_and_write_round_trip(monkeypatch: pytest.MonkeyPatch) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000004",
+        manifest=Manifest(root="/workspace"),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    await session.write(Path("notes.txt"), io.BytesIO(b"payload"))
+    payload = await session.read(Path("notes.txt"))
+
+    assert sandbox.files.write_calls == [("/workspace/notes.txt", b"payload")]
+    assert payload.read() == b"payload"
+
+
+@pytest.mark.asyncio
+async def test_superserve_read_missing_file_raises_not_found(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000005",
+        manifest=Manifest(root="/workspace"),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    with pytest.raises(superserve_module.WorkspaceReadNotFoundError):
+        await session.read(Path("nope.txt"))
+
+
+@pytest.mark.asyncio
+async def test_superserve_exec_read_write_reject_path_escape(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    client = superserve_module.SuperserveSandboxClient()
+
+    session = await client.create(
+        manifest=Manifest(root="/workspace/project"),
+        options=superserve_module.SuperserveSandboxClientOptions(),
+    )
+
+    with pytest.raises(InvalidManifestPathError):
+        await session.read("../outside.txt")
+    with pytest.raises(InvalidManifestPathError):
+        await session.write("/etc/passwd", io.BytesIO(b"nope"))
+
+
+@pytest.mark.asyncio
+async def test_superserve_rejects_sandbox_local_user_arguments(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    client = superserve_module.SuperserveSandboxClient()
+    session = await client.create(
+        manifest=Manifest(root="/workspace/project"),
+        options=superserve_module.SuperserveSandboxClientOptions(),
+    )
+
+    with pytest.raises(ConfigurationError, match="does not support sandbox-local users"):
+        await session.exec("pwd", user="sandbox-user")
+    with pytest.raises(ConfigurationError, match="does not support sandbox-local users"):
+        await session.read("notes.txt", user=User(name="sandbox-user"))
+    with pytest.raises(ConfigurationError, match="does not support sandbox-local users"):
+        await session.write("notes.txt", io.BytesIO(b"payload"), user="sandbox-user")
+
+
+# ---------------------------------------------------------------------------
+# Workspace setup / manifest materialization
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_superserve_start_creates_workspace_and_materializes_manifest(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000010",
+        manifest=Manifest(
+            root="/workspace",
+            entries={"notes.txt": File(content=b"payload")},
+        ),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    await session.start()
+    payload = await session.read(Path("notes.txt"))
+
+    # First exec is the workspace-root mkdir.
+    assert sandbox.commands.calls[0]["command"].startswith("mkdir -p")
+    assert ("/workspace/notes.txt", b"payload") in sandbox.files.write_calls
+    assert session.state.workspace_root_ready is True
+    assert payload.read() == b"payload"
+
+
+# ---------------------------------------------------------------------------
+# Exposed ports (v1: unsupported)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_superserve_resolve_exposed_port_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000020",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+        exposed_ports=(3000,),
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    with pytest.raises(ExposedPortUnavailableError) as exc_info:
+        await session.resolve_exposed_port(3000)
+
+    assert exc_info.value.context["backend"] == "superserve"
+
+
+# ---------------------------------------------------------------------------
+# Shutdown semantics: pause-on-exit vs kill
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_superserve_shutdown_pauses_when_pause_on_exit_true(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000030",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+        pause_on_exit=True,
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    await session.shutdown()
+
+    assert sandbox.pause_calls == 1
+    assert sandbox.kill_calls == 0
+
+
+@pytest.mark.asyncio
+async def test_superserve_shutdown_kills_when_pause_on_exit_false(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000031",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+        pause_on_exit=False,
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    await session.shutdown()
+
+    assert sandbox.kill_calls == 1
+    assert sandbox.pause_calls == 0
+
+
+# ---------------------------------------------------------------------------
+# Resume contract
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_superserve_resume_reconnects_active_sandbox(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    existing = _FakeAsyncSandbox(sandbox_id="sup-existing", status="active")
+    _FakeAsyncSandbox.sandboxes[existing.id] = existing
+
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000040",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id=existing.id,
+    )
+
+    client = superserve_module.SuperserveSandboxClient()
+    resumed = await client.resume(state)
+
+    assert _FakeAsyncSandbox.connect_calls[0]["sandbox_id"] == existing.id
+    assert resumed._inner.state.sandbox_id == existing.id
+    assert _FakeAsyncSandbox.create_calls == []
+    # Already active, no resume()
+    assert existing.resume_calls == 0
+
+
+@pytest.mark.asyncio
+async def test_superserve_resume_calls_resume_for_paused_sandbox(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    existing = _FakeAsyncSandbox(sandbox_id="sup-paused", status="paused")
+    _FakeAsyncSandbox.sandboxes[existing.id] = existing
+
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000041",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id=existing.id,
+    )
+
+    client = superserve_module.SuperserveSandboxClient()
+    resumed = await client.resume(state)
+
+    assert existing.resume_calls == 1
+    assert resumed._inner.state.sandbox_id == existing.id
+    assert _FakeAsyncSandbox.create_calls == []
+
+
+@pytest.mark.asyncio
+async def test_superserve_resume_falls_back_to_create_on_not_found(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    _FakeAsyncSandbox.fail_connect_ids.add("sup-missing")
+
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000042",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-missing",
+        template="superserve/python-3.11",
+    )
+
+    client = superserve_module.SuperserveSandboxClient()
+    resumed = await client.resume(state)
+
+    assert _FakeAsyncSandbox.connect_calls[0]["sandbox_id"] == "sup-missing"
+    assert len(_FakeAsyncSandbox.create_calls) == 1
+    assert _FakeAsyncSandbox.create_calls[0]["from_template"] == "superserve/python-3.11"
+    # New backend ID
+    assert resumed._inner.state.sandbox_id != "sup-missing"
+    # System state is no longer preserved after a recreate.
+    assert resumed._inner._workspace_state_preserved_on_start() is False
+
+
+# ---------------------------------------------------------------------------
+# Workspace tar round-trip
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_superserve_persist_and_hydrate_workspace_round_trip(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000050",
+        manifest=Manifest(root="/workspace"),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    sandbox._file_store["/workspace/notes.txt"] = b"payload"
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    persisted = await session.persist_workspace()
+    raw = persisted.read()
+    assert isinstance(raw, bytes)
+    assert raw  # non-empty tar
+
+    # Hydrate into a *new* sandbox; verify the file lands at the expected path.
+    other_sandbox = _FakeAsyncSandbox(sandbox_id="sup-other")
+    other_state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000051",
+        manifest=Manifest(root="/workspace"),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-other",
+    )
+    other = superserve_module.SuperserveSandboxSession.from_state(
+        other_state, sandbox=other_sandbox
+    )
+    await other.hydrate_workspace(io.BytesIO(raw))
+    assert other_sandbox._file_store["/workspace/notes.txt"] == b"payload"

From b475c06bb6f5918d3013f6a18a6a109f987c8ac4 Mon Sep 17 00:00:00 2001
From: Amit Patil <meAmitPatil@users.noreply.github.com>
Date: Wed, 20 May 2026 19:17:37 -0700
Subject: [PATCH 2/5] refactor: address Superserve sandbox backend review
 feedback

---
 .../extensions/sandbox/superserve/sandbox.py  | 301 +++++++++++++++---
 tests/extensions/sandbox/test_superserve.py   | 180 +++++++++++
 2 files changed, 428 insertions(+), 53 deletions(-)

diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py
index c5973bb349..00ced23de2 100644
--- a/src/agents/extensions/sandbox/superserve/sandbox.py
+++ b/src/agents/extensions/sandbox/superserve/sandbox.py
@@ -14,6 +14,9 @@
 import asyncio
 import io
 import logging
+import math
+import shlex
+import time
 import uuid
 from pathlib import Path
 from typing import Any, Literal, cast
@@ -59,6 +62,11 @@
 DEFAULT_SUPERSERVE_TEMPLATE = "superserve/base"
 _DEFAULT_MANIFEST_ROOT = cast(str, Manifest.model_fields["root"].default)
 _SUPERSERVE_TRANSIENT_STATUS_CODES: frozenset[int] = frozenset({408, 425, 429, 500, 502, 503, 504})
+_SUPERSERVE_ACTIVE_STATUSES: frozenset[str] = frozenset({"active"})
+_SUPERSERVE_RESUMING_STATUSES: frozenset[str] = frozenset({"paused", "resuming"})
+_SUPERSERVE_TERMINAL_STATUSES: frozenset[str] = frozenset({"failed"})
+_RESUME_READY_TIMEOUT_S: float = 60.0
+_RESUME_READY_POLL_INTERVAL_S: float = 1.0
 
 logger = logging.getLogger(__name__)
 
@@ -117,6 +125,46 @@ def _provider_error_detail(error: BaseException) -> str | None:
     return ": ".join(parts)
 
 
+def _superserve_error_context(error: BaseException) -> dict[str, object]:
+    """Structured error context — split status/code/message so consumers don't parse strings."""
+    context: dict[str, object] = {
+        "backend": "superserve",
+        "cause_type": type(error).__name__,
+    }
+    message = str(error)
+    if message:
+        context["provider_message"] = message
+    status = getattr(error, "status_code", None)
+    if isinstance(status, int):
+        context["http_status"] = status
+    code = getattr(error, "code", None)
+    if isinstance(code, str) and code:
+        context["provider_code"] = code
+    return context
+
+
+def _superserve_exec_transport_error(
+    *,
+    command: tuple[str | Path, ...],
+    cause: BaseException,
+    sandbox_id: str | None = None,
+) -> ExecTransportError:
+    context = _superserve_error_context(cause)
+    if sandbox_id:
+        context["sandbox_id"] = sandbox_id
+    detail = _provider_error_detail(cause)
+    message = "Superserve exec failed"
+    if detail:
+        message = f"{message}: {detail}"
+    return ExecTransportError(command=command, context=context, cause=cause, message=message)
+
+
+def _is_superserve_conflict(error: BaseException, conflict_exc: type[BaseException] | None) -> bool:
+    if conflict_exc is not None and isinstance(error, conflict_exc):
+        return True
+    return exception_chain_has_status_code(error, frozenset({409}))
+
+
 def _is_transient_error(exc: BaseException) -> bool:
     return exception_chain_has_status_code(
         exc, _SUPERSERVE_TRANSIENT_STATUS_CODES
@@ -124,6 +172,15 @@ def _is_transient_error(exc: BaseException) -> bool:
 
 
 def _resolve_manifest_root(manifest: Manifest | None) -> Manifest:
+    """Resolve the manifest root for a Superserve sandbox.
+
+    - No manifest → fresh manifest rooted at `/workspace`.
+    - Manifest whose root is the SDK's default placeholder (`Manifest.model_fields["root"].default`)
+      → rewrite the root to the Superserve default `/workspace` for ergonomics.
+    - Caller-provided non-default root (anywhere on the filesystem) → keep verbatim. We do not
+      reject arbitrary roots; this mirrors Vercel's behaviour and lets callers stage work outside
+      `/workspace` deliberately. If you need confinement, set extra path grants on the manifest.
+    """
     if manifest is None:
         return Manifest(root=DEFAULT_SUPERSERVE_WORKSPACE_ROOT)
     if manifest.root == _DEFAULT_MANIFEST_ROOT:
@@ -147,6 +204,8 @@ class SuperserveSandboxTimeouts(BaseModel):
     file_upload_s: int = Field(default=300, ge=1)
     file_download_s: int = Field(default=300, ge=1)
     workspace_tar_s: int = Field(default=300, ge=1)
+    resume_ready_timeout_s: int = Field(default=60, ge=1)
+    resume_ready_poll_interval_s: float = Field(default=1.0, gt=0)
 
 
 class SuperserveSandboxClientOptions(BaseSandboxClientOptions):
@@ -275,6 +334,10 @@ async def _validate_path_access(self, path: Path | str, *, for_write: bool = Fal
     def _runtime_helpers(self) -> tuple[RuntimeHelperScript, ...]:
         return (RESOLVE_WORKSPACE_PATH_HELPER,)
 
+    def _current_runtime_helper_cache_key(self) -> object | None:
+        """Invalidate helper-script cache when the backing sandbox is swapped on resume."""
+        return self.state.sandbox_id or None
+
     async def _resolved_envs(self) -> dict[str, str]:
         manifest_envs = await self.state.manifest.environment.resolve()
         resolved: dict[str, str] = {}
@@ -290,6 +353,8 @@ async def _ensure_sandbox(self) -> Any:
             return sandbox
 
         AsyncSandbox, NetworkConfig = _import_superserve_sdk()
+        sup_errors = _import_superserve_errors()
+        conflict_exc = sup_errors.get("conflict")
         env_vars = await self._resolved_envs()
         network_payload = self.state.base_network
         network = (
@@ -307,9 +372,14 @@ async def _ensure_sandbox(self) -> Any:
                 base_url=self.state.base_url,
             )
         except Exception as exc:
+            reason = (
+                "name_collision" if _is_superserve_conflict(exc, conflict_exc) else "create_failed"
+            )
+            context = _superserve_error_context(exc)
+            context["reason"] = reason
             raise WorkspaceStartError(
                 path=self._workspace_root_path(),
-                context={"backend": "superserve", "reason": "create_failed"},
+                context=context,
                 cause=exc,
                 message=f"failed to start Superserve sandbox: {_provider_error_detail(exc)}",
             ) from exc
@@ -318,18 +388,84 @@ async def _ensure_sandbox(self) -> Any:
         self.state.sandbox_id = sandbox.id
         return sandbox
 
+    async def _wait_until_active(
+        self,
+        *,
+        timeout_s: float | None = None,
+        poll_interval_s: float | None = None,
+    ) -> None:
+        """Poll get_info() until status is `active`, or raise.
+
+        Used after `await sandbox.resume()` to guarantee the sandbox is ready before the caller
+        runs the first exec. Superserve's resume() returns once the API has accepted the request;
+        the sandbox may still be in `resuming` for a short window.
+        """
+        sandbox = self._sandbox
+        if sandbox is None:
+            return
+        deadline = time.monotonic() + (timeout_s or self.state.timeouts.resume_ready_timeout_s)
+        interval = poll_interval_s or self.state.timeouts.resume_ready_poll_interval_s
+        last_status: str | None = None
+        while True:
+            try:
+                info = await asyncio.wait_for(
+                    sandbox.get_info(),
+                    timeout=self.state.timeouts.keepalive_s,
+                )
+            except Exception as exc:
+                raise WorkspaceStartError(
+                    path=self._workspace_root_path(),
+                    context=_superserve_error_context(exc) | {"reason": "wait_until_active_failed"},
+                    cause=exc,
+                    message=f"failed to confirm sandbox active: {_provider_error_detail(exc)}",
+                ) from exc
+            status = getattr(info, "status", None)
+            last_status = getattr(status, "value", status)
+            if last_status in _SUPERSERVE_ACTIVE_STATUSES:
+                return
+            if last_status in _SUPERSERVE_TERMINAL_STATUSES:
+                raise WorkspaceStartError(
+                    path=self._workspace_root_path(),
+                    context={
+                        "backend": "superserve",
+                        "reason": "sandbox_failed_during_resume",
+                        "sandbox_status": last_status,
+                    },
+                    message=(
+                        f"sandbox reached terminal status {last_status!r} during resume"
+                    ),
+                )
+            if time.monotonic() >= deadline:
+                raise WorkspaceStartError(
+                    path=self._workspace_root_path(),
+                    context={
+                        "backend": "superserve",
+                        "reason": "wait_until_active_timeout",
+                        "sandbox_status": last_status,
+                        "timeout_s": timeout_s or self.state.timeouts.resume_ready_timeout_s,
+                    },
+                    message=(
+                        f"sandbox did not become active within "
+                        f"{timeout_s or self.state.timeouts.resume_ready_timeout_s}s "
+                        f"(last status: {last_status!r})"
+                    ),
+                )
+            await asyncio.sleep(interval)
+
     async def _prepare_backend_workspace(self) -> None:
         root = self._workspace_root_path()
         sandbox = await self._ensure_sandbox()
         try:
             result = await sandbox.commands.run(
-                f"mkdir -p -- {_shell_quote(root.as_posix())}",
+                f"mkdir -p -- {shlex.quote(root.as_posix())}",
                 timeout_seconds=self.state.timeouts.fast_op_s,
             )
         except Exception as exc:
+            context = _superserve_error_context(exc)
+            context["reason"] = "workspace_root_setup_failed"
             raise WorkspaceStartError(
                 path=root,
-                context={"backend": "superserve", "reason": "workspace_root_setup_failed"},
+                context=context,
                 cause=exc,
                 message=(
                     "failed to start session: Superserve workspace root setup failed: "
@@ -400,14 +536,11 @@ async def _exec_internal(
         if not normalized:
             return ExecResult(stdout=b"", stderr=b"", exit_code=0)
 
-        command_str = " ".join(_shell_quote(part) for part in normalized)
+        command_str = shlex.join(normalized)
         envs = await self._resolved_envs()
         cwd = sandbox_path_str(self.state.manifest.root)
-        timeout_seconds = (
-            None
-            if timeout is None
-            else max(1, int(timeout + 0.999))  # round up; Superserve only accepts ints
-        )
+        # Superserve accepts only int seconds; round up so we never undershoot the caller.
+        timeout_seconds = None if timeout is None else max(1, math.ceil(timeout))
 
         try:
             result = await sandbox.commands.run(
@@ -425,14 +558,10 @@ async def _exec_internal(
                 raise ExecTimeoutError(
                     command=tuple(normalized), timeout_s=timeout, cause=exc
                 ) from exc
-            raise ExecTransportError(
+            raise _superserve_exec_transport_error(
                 command=tuple(normalized),
-                context={
-                    "backend": "superserve",
-                    "sandbox_id": self.state.sandbox_id,
-                    "provider_error": _provider_error_detail(exc),
-                },
                 cause=exc,
+                sandbox_id=self.state.sandbox_id,
             ) from exc
 
         stdout = (getattr(result, "stdout", "") or "").encode("utf-8", errors="replace")
@@ -506,16 +635,22 @@ async def _write_bytes_with_retry(self, path: str, data: bytes) -> None:
     async def persist_workspace(self) -> io.IOBase:
         return await with_ephemeral_mounts_removed(
             self,
-            self._persist_workspace_internal,
+            self._persist_workspace_with_retry,
             error_path=self._workspace_root_path(),
             error_cls=WorkspaceArchiveReadError,
             operation_error_context_key="snapshot_error_before_remount_corruption",
         )
 
+    @retry_async(retry_if=lambda exc, self: _is_transient_error(exc))
+    async def _persist_workspace_with_retry(self) -> io.IOBase:
+        return await self._persist_workspace_internal()
+
     async def _persist_workspace_internal(self) -> io.IOBase:
         root = self._workspace_root_path()
         archive_path = posix_path_as_path(
-            coerce_posix_path(f"/tmp/openai-agents-{self.state.session_id.hex}.tar")
+            coerce_posix_path(
+                f"/tmp/openai-agents-persist-{self.state.session_id.hex}.tar"
+            )
         )
         excludes = [
             f"--exclude=./{rel_path.as_posix()}"
@@ -584,16 +719,22 @@ async def hydrate_workspace(self, data: io.IOBase) -> None:
 
         await with_ephemeral_mounts_removed(
             self,
-            lambda: self._hydrate_workspace_internal(bytes(raw)),
+            lambda: self._hydrate_workspace_with_retry(bytes(raw)),
             error_path=self._workspace_root_path(),
             error_cls=WorkspaceArchiveWriteError,
             operation_error_context_key="hydrate_error_before_remount_corruption",
         )
 
+    @retry_async(retry_if=lambda exc, self, _raw: _is_transient_error(exc))
+    async def _hydrate_workspace_with_retry(self, raw: bytes) -> None:
+        await self._hydrate_workspace_internal(raw)
+
     async def _hydrate_workspace_internal(self, raw: bytes) -> None:
         root = self._workspace_root_path()
         archive_path = posix_path_as_path(
-            coerce_posix_path(f"/tmp/openai-agents-{self.state.session_id.hex}.tar")
+            coerce_posix_path(
+                f"/tmp/openai-agents-hydrate-{self.state.session_id.hex}.tar"
+            )
         )
         tar_command = ("tar", "xf", archive_path.as_posix(), "-C", root.as_posix())
 
@@ -643,13 +784,6 @@ async def _hydrate_workspace_internal(self, raw: bytes) -> None:
                 pass
 
 
-def _shell_quote(value: str) -> str:
-    """Minimal shlex.quote without importing shlex twice — keeps Vercel/Daytona-style quoting."""
-    import shlex
-
-    return shlex.quote(value)
-
-
 class SuperserveSandboxClient(BaseSandboxClient[SuperserveSandboxClientOptions]):
     """Superserve-backed sandbox client managing sandbox lifecycle via AsyncSandbox."""
 
@@ -753,33 +887,13 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
         reconnected = False
 
         if state.sandbox_id:
-            try:
-                sandbox = await AsyncSandbox.connect(
-                    state.sandbox_id,
-                    api_key=api_key,
-                    base_url=base_url,
-                )
-                status = getattr(sandbox, "status", None)
-                status_value = getattr(status, "value", status)
-                if status_value == "paused":
-                    await sandbox.resume()
-                elif status_value == "resuming":
-                    await sandbox.resume()
-                elif status_value == "failed":
-                    sandbox = None
-                # else status_value == "active" → already running
-                if sandbox is not None:
-                    reconnected = True
-            except Exception as exc:
-                if not_found_exc is not None and isinstance(exc, not_found_exc):
-                    logger.debug(
-                        "superserve sandbox %s not found, will recreate", state.sandbox_id
-                    )
-                else:
-                    logger.debug(
-                        "superserve connect/resume failed (will recreate): %s", exc
-                    )
-                sandbox = None
+            sandbox, reconnected = await self._reattach_sandbox(
+                AsyncSandbox=AsyncSandbox,
+                state=state,
+                api_key=api_key,
+                base_url=base_url,
+                not_found_exc=not_found_exc,
+            )
 
         if sandbox is None:
             state.sandbox_id = ""
@@ -791,6 +905,87 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
         inner._set_start_state_preserved(reconnected, system=reconnected)
         return self._wrap_session(inner, instrumentation=self._instrumentation)
 
+    async def _reattach_sandbox(
+        self,
+        *,
+        AsyncSandbox: Any,
+        state: SuperserveSandboxSessionState,
+        api_key: str | None,
+        base_url: str | None,
+        not_found_exc: type[BaseException] | None,
+    ) -> tuple[Any | None, bool]:
+        """Try to reattach to an existing Superserve sandbox by id.
+
+        Returns (sandbox, reconnected). On any failure path, returns (None, False) so the caller
+        falls back to recreating from scratch.
+        """
+        try:
+            sandbox = await AsyncSandbox.connect(
+                state.sandbox_id,
+                api_key=api_key,
+                base_url=base_url,
+            )
+        except Exception as exc:
+            if not_found_exc is not None and isinstance(exc, not_found_exc):
+                logger.debug(
+                    "superserve sandbox %s not found, will recreate", state.sandbox_id
+                )
+            else:
+                logger.debug(
+                    "superserve connect failed for %s (will recreate): %s",
+                    state.sandbox_id,
+                    exc,
+                )
+            return None, False
+
+        status = getattr(sandbox, "status", None)
+        status_value = getattr(status, "value", status)
+
+        if status_value in _SUPERSERVE_TERMINAL_STATUSES:
+            logger.debug(
+                "superserve sandbox %s is in terminal status %r; recreating",
+                state.sandbox_id,
+                status_value,
+            )
+            return None, False
+
+        if status_value in _SUPERSERVE_RESUMING_STATUSES:
+            # Only call resume() if the sandbox is paused; for `resuming` just wait. Calling
+            # resume() while resume is in flight typically 409s on the API.
+            if status_value == "paused":
+                try:
+                    await sandbox.resume()
+                except Exception as exc:
+                    logger.debug(
+                        "superserve resume() failed for %s, will recreate: %s",
+                        state.sandbox_id,
+                        exc,
+                    )
+                    return None, False
+
+            probe = SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+            try:
+                await probe._wait_until_active()
+            except WorkspaceStartError as exc:
+                logger.debug(
+                    "superserve sandbox %s did not become active after resume: %s",
+                    state.sandbox_id,
+                    exc,
+                )
+                return None, False
+            return sandbox, True
+
+        if status_value in _SUPERSERVE_ACTIVE_STATUSES:
+            return sandbox, True
+
+        # Unknown or transitional status (e.g. "stopping", future enum values) — don't trust it.
+        logger.debug(
+            "superserve sandbox %s has unrecognized status %r; recreating",
+            state.sandbox_id,
+            status_value,
+        )
+        return None, False
+
     def deserialize_session_state(self, payload: dict[str, object]) -> SandboxSessionState:
         return SuperserveSandboxSessionState.model_validate(payload)
 
diff --git a/tests/extensions/sandbox/test_superserve.py b/tests/extensions/sandbox/test_superserve.py
index 01540cd054..d74126fb61 100644
--- a/tests/extensions/sandbox/test_superserve.py
+++ b/tests/extensions/sandbox/test_superserve.py
@@ -257,6 +257,10 @@ async def kill(self) -> None:
         self.kill_calls += 1
         self.status = "deleted"
 
+async def _noop_sleep(*_args: object, **_kwargs: object) -> None:
+    return None
+
+
 def _load_superserve_module(monkeypatch: pytest.MonkeyPatch) -> Any:
     _FakeAsyncSandbox.reset()
 
@@ -693,6 +697,182 @@ async def test_superserve_resume_calls_resume_for_paused_sandbox(
     assert _FakeAsyncSandbox.create_calls == []
 
 
+@pytest.mark.asyncio
+async def test_superserve_resume_polls_until_active_after_resume_call(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    existing = _FakeAsyncSandbox(sandbox_id="sup-paused-poll", status="paused")
+    _FakeAsyncSandbox.sandboxes[existing.id] = existing
+
+    # Make sandbox.resume() leave the status at "resuming" so _wait_until_active has to poll.
+    original_resume = _FakeAsyncSandbox.resume
+
+    async def _slow_resume(self: _FakeAsyncSandbox) -> None:
+        self.resume_calls += 1
+        self.status = "resuming"
+
+    monkeypatch.setattr(_FakeAsyncSandbox, "resume", _slow_resume)
+
+    # On the second get_info call, flip status to "active" so polling succeeds.
+    get_info_count = {"n": 0}
+
+    async def _get_info_then_active(self: _FakeAsyncSandbox) -> _FakeSandboxInfo:
+        get_info_count["n"] += 1
+        if get_info_count["n"] >= 2:
+            self.status = "active"
+        return _FakeSandboxInfo(status=self.status)
+
+    monkeypatch.setattr(_FakeAsyncSandbox, "get_info", _get_info_then_active)
+
+    # Tighten the poll cadence so the test doesn't actually sleep.
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000043",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id=existing.id,
+        timeouts=superserve_module.SuperserveSandboxTimeouts(
+            resume_ready_poll_interval_s=0.001,
+            resume_ready_timeout_s=5,
+        ),
+    )
+
+    client = superserve_module.SuperserveSandboxClient()
+    resumed = await client.resume(state)
+
+    # Restore original method to avoid leaking into other tests.
+    monkeypatch.setattr(_FakeAsyncSandbox, "resume", original_resume)
+
+    assert existing.resume_calls == 1
+    assert get_info_count["n"] >= 2  # polled at least twice
+    assert resumed._inner.state.sandbox_id == existing.id
+    assert existing.status == "active"
+
+
+@pytest.mark.asyncio
+async def test_superserve_resume_skips_resume_call_when_already_resuming(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    existing = _FakeAsyncSandbox(sandbox_id="sup-already-resuming", status="resuming")
+    _FakeAsyncSandbox.sandboxes[existing.id] = existing
+
+    # Flip to active on first get_info so the poll exits immediately.
+    async def _get_info_active(self: _FakeAsyncSandbox) -> _FakeSandboxInfo:
+        self.status = "active"
+        return _FakeSandboxInfo(status="active")
+
+    monkeypatch.setattr(_FakeAsyncSandbox, "get_info", _get_info_active)
+
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000044",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id=existing.id,
+    )
+
+    client = superserve_module.SuperserveSandboxClient()
+    resumed = await client.resume(state)
+
+    # Critical: do NOT call resume() when status is already "resuming".
+    assert existing.resume_calls == 0
+    assert resumed._inner.state.sandbox_id == existing.id
+
+
+@pytest.mark.asyncio
+async def test_superserve_resume_recreates_on_unknown_status(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    existing = _FakeAsyncSandbox(sandbox_id="sup-stopping", status="stopping")
+    _FakeAsyncSandbox.sandboxes[existing.id] = existing
+
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000045",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id=existing.id,
+        template="superserve/python-3.11",
+    )
+
+    client = superserve_module.SuperserveSandboxClient()
+    resumed = await client.resume(state)
+
+    # Unknown/stopping → recreate.
+    assert len(_FakeAsyncSandbox.create_calls) == 1
+    assert resumed._inner.state.sandbox_id != existing.id
+    assert resumed._inner._workspace_state_preserved_on_start() is False
+
+
+@pytest.mark.asyncio
+async def test_superserve_resume_recreates_on_failed_status(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    existing = _FakeAsyncSandbox(sandbox_id="sup-failed", status="failed")
+    _FakeAsyncSandbox.sandboxes[existing.id] = existing
+
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000046",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id=existing.id,
+    )
+
+    client = superserve_module.SuperserveSandboxClient()
+    resumed = await client.resume(state)
+
+    assert len(_FakeAsyncSandbox.create_calls) == 1
+    assert resumed._inner.state.sandbox_id != existing.id
+    # Original sandbox never had resume() called on it.
+    assert existing.resume_calls == 0
+
+
+# ---------------------------------------------------------------------------
+# Error classification
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_superserve_create_classifies_conflict_error(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    _FakeAsyncSandbox.create_failures = [_SuperserveConflictError("name already exists")]
+
+    client = superserve_module.SuperserveSandboxClient()
+    with pytest.raises(Exception) as exc_info:
+        await client.create(
+            manifest=Manifest(),
+            options=superserve_module.SuperserveSandboxClientOptions(name="duplicate-name"),
+        )
+    assert exc_info.value.context.get("reason") == "name_collision"
+    assert exc_info.value.context.get("http_status") == 409
+
+
+def test_superserve_runtime_helper_cache_key_is_sandbox_id(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000060",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-cache-key",
+    )
+    session = superserve_module.SuperserveSandboxSession.from_state(state)
+    assert session._current_runtime_helper_cache_key() == "sup-cache-key"
+
+    empty_state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000061",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="",
+    )
+    empty_session = superserve_module.SuperserveSandboxSession.from_state(empty_state)
+    assert empty_session._current_runtime_helper_cache_key() is None
+
+
 @pytest.mark.asyncio
 async def test_superserve_resume_falls_back_to_create_on_not_found(
     monkeypatch: pytest.MonkeyPatch,

From bc707cec64d5100ecf7fa77d658097995b2d5f04 Mon Sep 17 00:00:00 2001
From: Amit Patil <meAmitPatil@users.noreply.github.com>
Date: Mon, 25 May 2026 16:49:58 -0700
Subject: [PATCH 3/5] address review and align with upstream provider PR
 pattern

---
 .../extensions/sandbox/superserve/sandbox.md  |  3 +
 docs/sandbox/clients.md                       |  3 +
 examples/sandbox/extensions/README.md         | 29 +++-----
 .../sandbox/extensions/superserve_runner.py   |  4 +-
 .../extensions/sandbox/superserve/sandbox.py  |  8 +--
 tests/sandbox/test_client_options.py          |  2 +
 tests/sandbox/test_compatibility_guards.py    | 69 +++++++++++++++++++
 7 files changed, 92 insertions(+), 26 deletions(-)
 create mode 100644 docs/ref/extensions/sandbox/superserve/sandbox.md

diff --git a/docs/ref/extensions/sandbox/superserve/sandbox.md b/docs/ref/extensions/sandbox/superserve/sandbox.md
new file mode 100644
index 0000000000..0a3d9d1f37
--- /dev/null
+++ b/docs/ref/extensions/sandbox/superserve/sandbox.md
@@ -0,0 +1,3 @@
+# `Sandbox`
+
+::: agents.extensions.sandbox.superserve.sandbox
diff --git a/docs/sandbox/clients.md b/docs/sandbox/clients.md
index bd21da63d3..20105e693a 100644
--- a/docs/sandbox/clients.md
+++ b/docs/sandbox/clients.md
@@ -96,6 +96,7 @@ For provider-specific setup notes and links for the checked-in extension example
 | `E2BSandboxClient` | `openai-agents[e2b]` | [E2B runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/e2b_runner.py) |
 | `ModalSandboxClient` | `openai-agents[modal]` | [Modal runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/modal_runner.py) |
 | `RunloopSandboxClient` | `openai-agents[runloop]` | [Runloop runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/runloop/runner.py) |
+| `SuperserveSandboxClient` | `openai-agents[superserve]` | [Superserve runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/superserve_runner.py) |
 | `VercelSandboxClient` | `openai-agents[vercel]` | [Vercel runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/vercel_runner.py) |
 
 </div>
@@ -113,6 +114,7 @@ Hosted sandbox clients expose provider-specific mount strategies. Choose the bac
 | `DaytonaSandboxClient` | Supports rclone-backed cloud storage mounts with `DaytonaCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. |
 | `E2BSandboxClient` | Supports rclone-backed cloud storage mounts with `E2BCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. |
 | `RunloopSandboxClient` | Supports rclone-backed cloud storage mounts with `RunloopCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. |
+| `SuperserveSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. |
 | `VercelSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. |
 
 </div>
@@ -130,6 +132,7 @@ The table below summarizes which remote storage entries each backend can mount d
 | `DaytonaSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - |
 | `E2BSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - |
 | `RunloopSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - |
+| `SuperserveSandboxClient` | - | - | - | - | - | - |
 | `VercelSandboxClient` | - | - | - | - | - | - |
 
 </div>
diff --git a/examples/sandbox/extensions/README.md b/examples/sandbox/extensions/README.md
index b9a27b2de4..9c98364462 100644
--- a/examples/sandbox/extensions/README.md
+++ b/examples/sandbox/extensions/README.md
@@ -7,7 +7,7 @@ They intentionally keep the flow simple:
 
 1. Build a tiny manifest in memory.
 2. Create a `SandboxAgent` that inspects that workspace through one shell tool.
-3. Run the agent against E2B, Modal, Daytona, Cloudflare, Runloop, Blaxel, or Vercel.
+3. Run the agent against E2B, Modal, Daytona, Cloudflare, Runloop, Blaxel, Superserve, or Vercel.
 
 All of these examples require `OPENAI_API_KEY`, because they call the model through the normal
 `Runner` path. Each cloud backend also needs its own provider credentials.
@@ -261,12 +261,6 @@ export OPENAI_API_KEY=...
 export SUPERSERVE_API_KEY=...
 ```
 
-To target staging instead of production, also set:
-
-```bash
-export SUPERSERVE_BASE_URL=https://api-staging.superserve.ai
-```
-
 ### Run
 
 ```bash
@@ -275,18 +269,15 @@ uv run python examples/sandbox/extensions/superserve_runner.py --stream
 
 Useful flags:
 
-- `--template superserve/python-3.11` — use a different curated template (others:
-  `superserve/base`, `superserve/node-22`, `superserve/code-interpreter`,
-  `superserve/python-ml`, `superserve/claude-code`). Team-owned template UUIDs also work.
-- `--pause-on-exit` — pause the sandbox on shutdown instead of killing it. Superserve sandboxes
-  never die on their own by default, so this lets you reconnect with `SuperserveSandboxClient.resume`
-  later without recreating workspace state.
-- `--timeout-seconds 300` — opt into an inactivity timeout (off by default).
-- `--skip-snapshot-check` — skip the pause/resume snapshot round-trip verification.
-
-Pause/resume is a first-class part of the Superserve API surface, so the example exercises both
-the standard create→exec→shutdown flow and the explicit
-`pause → serialize state → resume → read` round-trip.
+- `--template <name>` -- use a different template; defaults to `superserve/base`.
+  Other curated templates: `superserve/python-3.11`, `superserve/node-22`,
+  `superserve/code-interpreter`, `superserve/python-ml`.
+- `--pause-on-exit` -- pause the sandbox on shutdown instead of killing it.
+- `--timeout-seconds 300` -- inactivity timeout in seconds (off by default).
+- `--skip-snapshot-check` -- skip the pause/resume snapshot round-trip verification.
+
+The example runs a pause/resume round-trip before the agent run to verify that workspace state
+survives shutdown.
 
 ## Runloop
 
diff --git a/examples/sandbox/extensions/superserve_runner.py b/examples/sandbox/extensions/superserve_runner.py
index f462f4ef16..1f6437a460 100644
--- a/examples/sandbox/extensions/superserve_runner.py
+++ b/examples/sandbox/extensions/superserve_runner.py
@@ -204,9 +204,9 @@ async def main(
         "--template",
         default=DEFAULT_TEMPLATE,
         help=(
-            "Superserve template name or UUID. Defaults to `superserve/base`. "
+            "Superserve template to use. Defaults to `superserve/base`. "
             "Other curated templates: superserve/python-3.11, superserve/node-22, "
-            "superserve/code-interpreter, superserve/python-ml, superserve/claude-code."
+            "superserve/code-interpreter, superserve/python-ml."
         ),
     )
     parser.add_argument(
diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py
index 00ced23de2..7a64883d86 100644
--- a/src/agents/extensions/sandbox/superserve/sandbox.py
+++ b/src/agents/extensions/sandbox/superserve/sandbox.py
@@ -65,8 +65,6 @@
 _SUPERSERVE_ACTIVE_STATUSES: frozenset[str] = frozenset({"active"})
 _SUPERSERVE_RESUMING_STATUSES: frozenset[str] = frozenset({"paused", "resuming"})
 _SUPERSERVE_TERMINAL_STATUSES: frozenset[str] = frozenset({"failed"})
-_RESUME_READY_TIMEOUT_S: float = 60.0
-_RESUME_READY_POLL_INTERVAL_S: float = 1.0
 
 logger = logging.getLogger(__name__)
 
@@ -177,9 +175,9 @@ def _resolve_manifest_root(manifest: Manifest | None) -> Manifest:
     - No manifest → fresh manifest rooted at `/workspace`.
     - Manifest whose root is the SDK's default placeholder (`Manifest.model_fields["root"].default`)
       → rewrite the root to the Superserve default `/workspace` for ergonomics.
-    - Caller-provided non-default root (anywhere on the filesystem) → keep verbatim. We do not
-      reject arbitrary roots; this mirrors Vercel's behaviour and lets callers stage work outside
-      `/workspace` deliberately. If you need confinement, set extra path grants on the manifest.
+    - Caller-provided non-default root (anywhere on the filesystem) → keep verbatim. Arbitrary
+      roots are accepted so callers can stage work outside `/workspace` deliberately. For
+      confinement, set extra path grants on the manifest.
     """
     if manifest is None:
         return Manifest(root=DEFAULT_SUPERSERVE_WORKSPACE_ROOT)
diff --git a/tests/sandbox/test_client_options.py b/tests/sandbox/test_client_options.py
index 8c71dc4028..f2ebbbf7d2 100644
--- a/tests/sandbox/test_client_options.py
+++ b/tests/sandbox/test_client_options.py
@@ -8,6 +8,7 @@
 from agents.extensions.sandbox.cloudflare import CloudflareSandboxClientOptions
 from agents.extensions.sandbox.daytona import DaytonaSandboxClientOptions
 from agents.extensions.sandbox.e2b import E2BSandboxClientOptions
+from agents.extensions.sandbox.superserve import SuperserveSandboxClientOptions
 from agents.sandbox.config import DEFAULT_PYTHON_SANDBOX_IMAGE
 from agents.sandbox.sandboxes import DockerSandboxClientOptions, UnixLocalSandboxClientOptions
 from agents.sandbox.session import BaseSandboxClientOptions
@@ -69,6 +70,7 @@ def test_sandbox_client_options_exclude_unset_preserves_type_discriminator() ->
         E2BSandboxClientOptions(sandbox_type="e2b", template="base"),
         DaytonaSandboxClientOptions(image=DEFAULT_PYTHON_SANDBOX_IMAGE),
         CloudflareSandboxClientOptions(worker_url="https://example.com"),
+        SuperserveSandboxClientOptions(template="superserve/base"),
     ],
 )
 def test_sandbox_client_options_roundtrip_preserves_concrete_type(
diff --git a/tests/sandbox/test_compatibility_guards.py b/tests/sandbox/test_compatibility_guards.py
index 5a11e5bf77..b854a8c549 100644
--- a/tests/sandbox/test_compatibility_guards.py
+++ b/tests/sandbox/test_compatibility_guards.py
@@ -324,6 +324,22 @@ def test_core_sandbox_public_export_surface_is_stable() -> None:
                 "_encode_runloop_snapshot_ref",
             },
         ),
+        (
+            "agents.extensions.sandbox.superserve",
+            {
+                "DEFAULT_SUPERSERVE_WORKSPACE_ROOT",
+                "ExecTimeoutError",
+                "ExecTransportError",
+                "SuperserveSandboxClient",
+                "SuperserveSandboxClientOptions",
+                "SuperserveSandboxSession",
+                "SuperserveSandboxSessionState",
+                "SuperserveSandboxTimeouts",
+                "WorkspaceArchiveReadError",
+                "WorkspaceArchiveWriteError",
+                "WorkspaceReadNotFoundError",
+            },
+        ),
         (
             "agents.extensions.sandbox.vercel",
             {
@@ -493,6 +509,23 @@ def test_optional_sandbox_dataclass_constructor_field_order_is_stable(
                 "managed_secrets",
             ),
         ),
+        (
+            "agents.extensions.sandbox.superserve",
+            "SuperserveSandboxClientOptions",
+            (
+                "template",
+                "name",
+                "env_vars",
+                "metadata",
+                "network",
+                "timeout_seconds",
+                "pause_on_exit",
+                "api_key",
+                "base_url",
+                "exposed_ports",
+                "timeouts",
+            ),
+        ),
         (
             "agents.extensions.sandbox.vercel",
             "VercelSandboxClientOptions",
@@ -720,6 +753,31 @@ def test_optional_sandbox_client_options_positional_field_order_is_stable(
                 "secret_refs",
             ),
         ),
+        (
+            "agents.extensions.sandbox.superserve",
+            "SuperserveSandboxSessionState",
+            (
+                "type",
+                "session_id",
+                "snapshot",
+                "manifest",
+                "exposed_ports",
+                "snapshot_fingerprint",
+                "snapshot_fingerprint_version",
+                "workspace_root_ready",
+                "sandbox_id",
+                "template",
+                "name",
+                "base_env_vars",
+                "base_metadata",
+                "base_network",
+                "timeout_seconds",
+                "pause_on_exit",
+                "base_url",
+                "api_key",
+                "timeouts",
+            ),
+        ),
         (
             "agents.extensions.sandbox.vercel",
             "VercelSandboxSessionState",
@@ -785,6 +843,12 @@ def test_sandbox_session_state_field_order_is_stable(
         ),
         ("agents.extensions.sandbox.daytona", "DaytonaSandboxClientOptions", (), "daytona"),
         ("agents.extensions.sandbox.runloop", "RunloopSandboxClientOptions", (), "runloop"),
+        (
+            "agents.extensions.sandbox.superserve",
+            "SuperserveSandboxClientOptions",
+            (),
+            "superserve",
+        ),
         ("agents.extensions.sandbox.vercel", "VercelSandboxClientOptions", (), "vercel"),
     ],
 )
@@ -846,6 +910,11 @@ def test_optional_sandbox_client_options_json_round_trip_preserves_type(
             "RunloopSandboxSessionState",
             {"devbox_id": "devbox-123"},
         ),
+        (
+            "agents.extensions.sandbox.superserve",
+            "SuperserveSandboxSessionState",
+            {"sandbox_id": "sandbox-123"},
+        ),
         (
             "agents.extensions.sandbox.vercel",
             "VercelSandboxSessionState",

From 75c94b909d62ce4f44dced391eae3377aa1b3a99 Mon Sep 17 00:00:00 2001
From: Amit Patil <meAmitPatil@users.noreply.github.com>
Date: Mon, 25 May 2026 17:15:19 -0700
Subject: [PATCH 4/5] address codex review feedback

---
 .../extensions/sandbox/superserve/sandbox.py  | 44 +++++++++++--------
 tests/sandbox/test_compatibility_guards.py    |  2 -
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py
index 7a64883d86..954f5271ab 100644
--- a/src/agents/extensions/sandbox/superserve/sandbox.py
+++ b/src/agents/extensions/sandbox/superserve/sandbox.py
@@ -266,8 +266,6 @@ class SuperserveSandboxSessionState(SandboxSessionState):
     base_network: dict[str, object] | None = None
     timeout_seconds: int | None = None
     pause_on_exit: bool = False
-    base_url: str | None = None
-    api_key: str | None = None
     timeouts: SuperserveSandboxTimeouts = Field(default_factory=SuperserveSandboxTimeouts)
 
 
@@ -276,15 +274,21 @@ class SuperserveSandboxSession(BaseSandboxSession):
 
     state: SuperserveSandboxSessionState
     _sandbox: Any | None
+    _api_key: str | None
+    _base_url: str | None
 
     def __init__(
         self,
         *,
         state: SuperserveSandboxSessionState,
         sandbox: Any | None = None,
+        api_key: str | None = None,
+        base_url: str | None = None,
     ) -> None:
         self.state = state
         self._sandbox = sandbox
+        self._api_key = api_key
+        self._base_url = base_url
 
     @classmethod
     def from_state(
@@ -292,8 +296,10 @@ def from_state(
         state: SuperserveSandboxSessionState,
         *,
         sandbox: Any | None = None,
+        api_key: str | None = None,
+        base_url: str | None = None,
     ) -> SuperserveSandboxSession:
-        return cls(state=state, sandbox=sandbox)
+        return cls(state=state, sandbox=sandbox, api_key=api_key, base_url=base_url)
 
     @property
     def sandbox_id(self) -> str:
@@ -366,8 +372,8 @@ async def _ensure_sandbox(self) -> Any:
                 metadata=dict(self.state.base_metadata) or None,
                 env_vars=env_vars or None,
                 network=network,
-                api_key=self.state.api_key,
-                base_url=self.state.base_url,
+                api_key=self._api_key,
+                base_url=self._base_url,
             )
         except Exception as exc:
             reason = (
@@ -537,8 +543,12 @@ async def _exec_internal(
         command_str = shlex.join(normalized)
         envs = await self._resolved_envs()
         cwd = sandbox_path_str(self.state.manifest.root)
-        # Superserve accepts only int seconds; round up so we never undershoot the caller.
-        timeout_seconds = None if timeout is None else max(1, math.ceil(timeout))
+        effective_timeout = (
+            float(self.state.timeouts.exec_timeout_unbounded_s)
+            if timeout is None
+            else float(timeout)
+        )
+        timeout_seconds = max(1, math.ceil(effective_timeout))
 
         try:
             result = await sandbox.commands.run(
@@ -843,12 +853,10 @@ async def create(
             base_network=dict(options.network) if options.network is not None else None,
             timeout_seconds=options.timeout_seconds,
             pause_on_exit=options.pause_on_exit,
-            base_url=base_url,
-            api_key=api_key,
             timeouts=timeouts,
             exposed_ports=options.exposed_ports,
         )
-        inner = SuperserveSandboxSession.from_state(state)
+        inner = SuperserveSandboxSession.from_state(state, api_key=api_key, base_url=base_url)
         await inner._ensure_sandbox()
         return self._wrap_session(inner, instrumentation=self._instrumentation)
 
@@ -874,12 +882,8 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
         sup_errors = _import_superserve_errors()
         not_found_exc = sup_errors.get("not_found")
 
-        api_key = state.api_key or self._api_key
-        base_url = state.base_url or self._base_url
-        if state.api_key is None and api_key is not None:
-            state.api_key = api_key
-        if state.base_url is None and base_url is not None:
-            state.base_url = base_url
+        api_key = self._api_key
+        base_url = self._base_url
 
         sandbox: Any | None = None
         reconnected = False
@@ -897,7 +901,9 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
             state.sandbox_id = ""
             state.workspace_root_ready = False
 
-        inner = SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+        inner = SuperserveSandboxSession.from_state(
+            state, sandbox=sandbox, api_key=api_key, base_url=base_url
+        )
         if sandbox is None:
             await inner._ensure_sandbox()
         inner._set_start_state_preserved(reconnected, system=reconnected)
@@ -961,7 +967,9 @@ async def _reattach_sandbox(
                     )
                     return None, False
 
-            probe = SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+            probe = SuperserveSandboxSession.from_state(
+                state, sandbox=sandbox, api_key=api_key, base_url=base_url
+            )
             try:
                 await probe._wait_until_active()
             except WorkspaceStartError as exc:
diff --git a/tests/sandbox/test_compatibility_guards.py b/tests/sandbox/test_compatibility_guards.py
index b854a8c549..2194791955 100644
--- a/tests/sandbox/test_compatibility_guards.py
+++ b/tests/sandbox/test_compatibility_guards.py
@@ -773,8 +773,6 @@ def test_optional_sandbox_client_options_positional_field_order_is_stable(
                 "base_network",
                 "timeout_seconds",
                 "pause_on_exit",
-                "base_url",
-                "api_key",
                 "timeouts",
             ),
         ),

From a13c70a4f89eed31796a4da4ef896370d8e49ad8 Mon Sep 17 00:00:00 2001
From: Amit Patil <meAmitPatil@users.noreply.github.com>
Date: Mon, 25 May 2026 18:17:55 -0700
Subject: [PATCH 5/5] address second codex review pass

---
 .../extensions/sandbox/superserve/sandbox.py  | 13 ++--
 tests/extensions/sandbox/test_superserve.py   | 73 +++++++++++++++++++
 tests/sandbox/test_compatibility_guards.py    |  1 +
 3 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/src/agents/extensions/sandbox/superserve/sandbox.py b/src/agents/extensions/sandbox/superserve/sandbox.py
index 954f5271ab..b8ecfedfe5 100644
--- a/src/agents/extensions/sandbox/superserve/sandbox.py
+++ b/src/agents/extensions/sandbox/superserve/sandbox.py
@@ -266,6 +266,7 @@ class SuperserveSandboxSessionState(SandboxSessionState):
     base_network: dict[str, object] | None = None
     timeout_seconds: int | None = None
     pause_on_exit: bool = False
+    base_url: str | None = None
     timeouts: SuperserveSandboxTimeouts = Field(default_factory=SuperserveSandboxTimeouts)
 
 
@@ -853,6 +854,7 @@ async def create(
             base_network=dict(options.network) if options.network is not None else None,
             timeout_seconds=options.timeout_seconds,
             pause_on_exit=options.pause_on_exit,
+            base_url=base_url,
             timeouts=timeouts,
             exposed_ports=options.exposed_ports,
         )
@@ -883,7 +885,7 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
         not_found_exc = sup_errors.get("not_found")
 
         api_key = self._api_key
-        base_url = self._base_url
+        base_url = state.base_url or self._base_url
 
         sandbox: Any | None = None
         reconnected = False
@@ -934,13 +936,8 @@ async def _reattach_sandbox(
                 logger.debug(
                     "superserve sandbox %s not found, will recreate", state.sandbox_id
                 )
-            else:
-                logger.debug(
-                    "superserve connect failed for %s (will recreate): %s",
-                    state.sandbox_id,
-                    exc,
-                )
-            return None, False
+                return None, False
+            raise
 
         status = getattr(sandbox, "status", None)
         status_value = getattr(status, "value", status)
diff --git a/tests/extensions/sandbox/test_superserve.py b/tests/extensions/sandbox/test_superserve.py
index d74126fb61..de8e7bc366 100644
--- a/tests/extensions/sandbox/test_superserve.py
+++ b/tests/extensions/sandbox/test_superserve.py
@@ -804,6 +804,79 @@ async def test_superserve_resume_recreates_on_unknown_status(
     assert resumed._inner._workspace_state_preserved_on_start() is False
 
 
+@pytest.mark.asyncio
+async def test_superserve_resume_reraises_transient_connect_errors(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Transient/auth/server errors during connect must not silently orphan the sandbox."""
+    superserve_module = _load_superserve_module(monkeypatch)
+
+    class _TransientError(Exception):
+        status_code = 503
+
+    async def _flaky_connect(cls: type, sandbox_id: str, **kwargs: object) -> _FakeAsyncSandbox:
+        raise _TransientError("upstream blip")
+
+    monkeypatch.setattr(_FakeAsyncSandbox, "connect", classmethod(_flaky_connect))
+
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000047",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-flaky",
+    )
+
+    client = superserve_module.SuperserveSandboxClient()
+    with pytest.raises(_TransientError):
+        await client.resume(state)
+
+    # Should not have fallen back to recreating.
+    assert _FakeAsyncSandbox.create_calls == []
+
+
+@pytest.mark.asyncio
+async def test_superserve_resume_uses_state_base_url_over_client_default(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    existing = _FakeAsyncSandbox(sandbox_id="sup-base-url", status="active")
+    _FakeAsyncSandbox.sandboxes[existing.id] = existing
+
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000048",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id=existing.id,
+        base_url="https://api-staging.superserve.ai",
+    )
+
+    client = superserve_module.SuperserveSandboxClient(base_url="https://api.superserve.ai")
+    await client.resume(state)
+
+    # base_url on state wins over the client default.
+    assert _FakeAsyncSandbox.connect_calls[0]["base_url"] == "https://api-staging.superserve.ai"
+
+
+@pytest.mark.asyncio
+async def test_superserve_exec_uses_unbounded_timeout_when_caller_passes_none(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    superserve_module = _load_superserve_module(monkeypatch)
+    state = superserve_module.SuperserveSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000049",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id="sup-existing",
+        timeouts=superserve_module.SuperserveSandboxTimeouts(exec_timeout_unbounded_s=12345),
+    )
+    sandbox = _FakeAsyncSandbox(sandbox_id="sup-existing")
+    session = superserve_module.SuperserveSandboxSession.from_state(state, sandbox=sandbox)
+
+    await session.exec("echo", "hello", shell=False)
+
+    assert sandbox.commands.calls[0]["timeout_seconds"] == 12345
+
+
 @pytest.mark.asyncio
 async def test_superserve_resume_recreates_on_failed_status(
     monkeypatch: pytest.MonkeyPatch,
diff --git a/tests/sandbox/test_compatibility_guards.py b/tests/sandbox/test_compatibility_guards.py
index 2194791955..74fe40e387 100644
--- a/tests/sandbox/test_compatibility_guards.py
+++ b/tests/sandbox/test_compatibility_guards.py
@@ -773,6 +773,7 @@ def test_optional_sandbox_client_options_positional_field_order_is_stable(
                 "base_network",
                 "timeout_seconds",
                 "pause_on_exit",
+                "base_url",
                 "timeouts",
             ),
         ),