From 437fb546d62e291fb418d9be9c83ce083b2f3c8a Mon Sep 17 00:00:00 2001 From: msfstef Date: Thu, 4 Jun 2026 20:55:49 +0300 Subject: [PATCH 1/6] fix(agents): reclaim zombie sandbox containers and create them lazily Users opening the desktop app found 15+ electric-sbx-* containers running that they never asked for. Several compounding bugs: - The boot sweep only removed *exited ephemeral* leftovers, but crash/ quit leftovers are RUNNING (PID 1 is an infinite sleep loop), so it never reclaimed anything real. Containers now carry an owner-pid label; the sweep reclaims running orphans whose owner is dead (remove ephemeral / stop persistent), consults an in-container adoption marker so a live process that reattached a dead creator's container is never swept, and is awaited at boot so it can't race reattaches. - The debounced idle teardowns are unref'd timers that died with the process: every graceful quit leaked the recently-active containers as running zombies. Runtime shutdown now flushes pending teardowns (BuiltinAgentsServer.stop), leaving live-leased entries to their own dispose or the next boot's sweep. - A failed post-start init (mkdir exec) left a started container that was never registered - invisible to dispose and, while running, to the sweep. Creation now verifies the init exit code and removes the container on any failure. Fixing this exposed that isNameConflict() treated any HTTP 409 as a lost create race (exec on an exited container is also 409), silently "reattaching" to a removed container; it now matches the daemon's name-conflict message. - Sandbox creation was eager on every claimed wake, so a reconnect backlog of trivial wakes (cron ticks, bookkeeping) stampeded the daemon with containers. The docker profile now returns a lazySandbox wrapper that defers the provider factory to first actual use. Terminal reclaim without use goes through reclaimDockerSandboxByKey (no create-to-delete), spawn-inherit force-materializes the owner's workspace before the child can attach, and concurrent creations are capped at 4 to smooth real bursts. - All sandbox containers now carry compose project/service labels (com.docker.compose.project=electric-sandboxes) so Docker Desktop groups them under one entry and they can be stopped/deleted together (docker compose -p electric-sandboxes down). Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/zombie-sandbox-containers-fix.md | 24 ++ packages/agents-runtime/src/process-wake.ts | 11 +- packages/agents-runtime/src/sandbox-docker.ts | 2 + packages/agents-runtime/src/sandbox.ts | 2 + packages/agents-runtime/src/sandbox/docker.ts | 340 +++++++++++++++--- packages/agents-runtime/src/sandbox/lazy.ts | 195 ++++++++++ .../test/sandbox-docker.test.ts | 203 ++++++++++- .../agents-runtime/test/sandbox-lazy.test.ts | 197 ++++++++++ packages/agents/src/bootstrap.ts | 124 ++++--- packages/agents/src/server.ts | 14 + 10 files changed, 1008 insertions(+), 104 deletions(-) create mode 100644 .changeset/zombie-sandbox-containers-fix.md create mode 100644 packages/agents-runtime/src/sandbox/lazy.ts create mode 100644 packages/agents-runtime/test/sandbox-lazy.test.ts diff --git a/.changeset/zombie-sandbox-containers-fix.md b/.changeset/zombie-sandbox-containers-fix.md new file mode 100644 index 0000000000..d05322fd4b --- /dev/null +++ b/.changeset/zombie-sandbox-containers-fix.md @@ -0,0 +1,24 @@ +--- +'@electric-ax/agents-runtime': patch +'@electric-ax/agents': patch +--- + +Fix Docker sandbox containers (`electric-sbx-*`) accumulating as zombies, and +stop creating containers for wakes that never use their sandbox: + +- The boot sweep now reclaims RUNNING orphans whose owning process died + (owner-pid label + in-container adoption marker), instead of only exited + ephemeral leftovers — previously crash/quit leftovers were never cleaned up. +- Runtime shutdown flushes the debounced idle teardowns (stop persistent / + remove ephemeral) instead of letting the unref'd timers die with the + process, which leaked a running container on every quit. +- A failed post-start init no longer leaves a running, untracked container + behind, and a 409 from inside creation is no longer misread as a name + conflict (which "reattached" to a removed container). +- Sandbox creation is now lazy: the container is only created/started when a + wake actually uses its sandbox, so backlog bursts of trivial wakes (cron + ticks, bookkeeping) on runner reconnect no longer spin up containers. + Terminal reclaim and spawn-`inherit` still work for never-used sandboxes, + and concurrent container creations are capped to smooth real bursts. +- All sandbox containers carry `com.docker.compose.project=electric-sandboxes` + so Docker GUIs group them and they can be stopped/removed together. diff --git a/packages/agents-runtime/src/process-wake.ts b/packages/agents-runtime/src/process-wake.ts index b7cb4b3d17..623f914b20 100644 --- a/packages/agents-runtime/src/process-wake.ts +++ b/packages/agents-runtime/src/process-wake.ts @@ -12,6 +12,7 @@ import { createEntityLogPrefix, runtimeLog } from './log' import { createRuntimeServerClient } from './runtime-server-client' import type { RuntimeServerClient } from './runtime-server-client' import { unrestrictedSandbox } from './sandbox/unrestricted' +import { ensureSandboxMaterialized } from './sandbox/lazy' import { resolveSandboxIdentity } from './sandbox/identity' import { appendPathToUrl } from './url' import { manifestChildKey } from './manifest-helpers' @@ -1322,7 +1323,7 @@ export async function processWake( const requestedInherit = opts?.sandbox === `inherit` || (typeof opts?.sandbox === `object` && opts.sandbox.inherit === true) - const sandbox = requestedInherit + const childSandbox = requestedInherit ? resolvedSandboxSelection ? { profile: resolvedSandboxSelection.profile, @@ -1336,6 +1337,12 @@ export async function processWake( : opts?.sandbox === `inherit` ? undefined : opts?.sandbox + // An inheriting child only ever ATTACHES by key — make sure the + // owner's (lazily-created) container/workspace actually exists before + // the child can wake, even if this wake never ran a tool itself. + if (requestedInherit && resolvedSandboxSelection && sandbox) { + await ensureSandboxMaterialized(sandbox) + } return serverClient.spawnEntity({ type: childType, id: childId, @@ -1344,7 +1351,7 @@ export async function processWake( initialMessage: opts?.initialMessage, initialMessageType: opts?.initialMessageType, tags: opts?.tags, - sandbox, + sandbox: childSandbox, wake: wakeOpt, }) }, diff --git a/packages/agents-runtime/src/sandbox-docker.ts b/packages/agents-runtime/src/sandbox-docker.ts index c1c39dc6cd..ffb8e7c4bb 100644 --- a/packages/agents-runtime/src/sandbox-docker.ts +++ b/packages/agents-runtime/src/sandbox-docker.ts @@ -9,6 +9,8 @@ export { dockerSandbox, + reclaimDockerSandboxByKey, + shutdownAllDockerSandboxes, sweepOrphanedDockerSandboxes, __resetPersistentRegistryForTests, } from './sandbox/docker' diff --git a/packages/agents-runtime/src/sandbox.ts b/packages/agents-runtime/src/sandbox.ts index 9e1bc7ffe4..29b5839d2a 100644 --- a/packages/agents-runtime/src/sandbox.ts +++ b/packages/agents-runtime/src/sandbox.ts @@ -8,6 +8,8 @@ export type { RemoteProvider, RemoteSandboxOpts } from './sandbox/remote' export type { RemoteSandboxClient } from './sandbox/remote/types' export { isE2BAvailable } from './sandbox/remote/e2b' export { chooseDefaultSandbox } from './sandbox/default' +export { ensureSandboxMaterialized, lazySandbox } from './sandbox/lazy' +export type { LazySandboxOpts } from './sandbox/lazy' export { SandboxError } from './sandbox/types' export type { Sandbox, diff --git a/packages/agents-runtime/src/sandbox/docker.ts b/packages/agents-runtime/src/sandbox/docker.ts index f30bb1c53f..336f9dbc27 100644 --- a/packages/agents-runtime/src/sandbox/docker.ts +++ b/packages/agents-runtime/src/sandbox/docker.ts @@ -156,10 +156,35 @@ const SANDBOX_ENTITY_LABEL = `com.electric.sandbox.entity` * key) and only reclaims ephemeral leftovers. */ const SANDBOX_PERSISTENT_LABEL = `com.electric.sandbox.persistent` +/** + * PID of the process that created the container. The boot sweep probes it + * (same host as the daemon's clients) to tell a live sibling's container from + * a crash/quit leftover — the keepalive loop never exits on its own, so a dead + * owner's container stays RUNNING forever without this. + */ +const SANDBOX_OWNER_PID_LABEL = `com.electric.sandbox.owner-pid` +/** + * Compose-style grouping labels. Docker Desktop (and other GUI clients) group + * containers by compose project, so every sandbox shows up under one + * collapsible `electric-sandboxes` entry and can be stopped / deleted together + * — also via `docker compose -p electric-sandboxes down`. + */ +const COMPOSE_PROJECT_LABEL = `com.docker.compose.project` +const COMPOSE_SERVICE_LABEL = `com.docker.compose.service` +const COMPOSE_PROJECT_NAME = `electric-sandboxes` /** Common prefix for every container name this module assigns. */ const NAME_PREFIX = `electric-sbx` +/** + * In-container record of the process that most recently ADOPTED the container + * (reattached by key after its creator exited). Labels are immutable, so the + * creation-time owner pid goes stale on reattach; the boot sweep probes this + * marker before reclaiming a running container whose labelled owner is dead. + * Lives on the /tmp tmpfs, so a stop wipes it along with any stale adoption. + */ +const OWNER_MARKER_PATH = `/tmp/.electric-sbx-owner-pid` + /** Default warm window before an idle container is torn down (stop/remove). */ const DEFAULT_IDLE_GRACE_MS = 2 * 60 * 1000 @@ -209,6 +234,33 @@ function withKeyLock(key: string, fn: () => Promise): Promise { return run } +/** + * Bound on concurrent container CREATIONS (image resolve + create + start + + * init), so a burst of wakes materializing sandboxes at once — e.g. a backlog + * replay when a runner reconnects — queues against the daemon instead of + * stampeding it. Reattaches and execs are not limited; total creations are + * unbounded, only their concurrency. + */ +const MAX_CONCURRENT_CREATIONS = 4 +let creationSlots = MAX_CONCURRENT_CREATIONS +const creationQueue: Array<() => void> = [] +async function withCreationSlot(fn: () => Promise): Promise { + if (creationSlots > 0) { + creationSlots -= 1 + } else { + await new Promise((release) => creationQueue.push(release)) + } + try { + return await fn() + } finally { + // Hand the slot directly to the next waiter (no decrement on their side), + // so a new caller can't race in between release and re-acquire. + const next = creationQueue.shift() + if (next) next() + else creationSlots += 1 + } +} + /** Lowercase, DNS-safe slug from an arbitrary identity string (≤24 chars). */ function slugify(s: string): string { const slug = s @@ -421,6 +473,9 @@ export async function dockerSandbox( [SANDBOX_LABEL]: `1`, [SANDBOX_KEY_LABEL]: sandboxKey, [SANDBOX_PERSISTENT_LABEL]: persistent ? `true` : `false`, + [SANDBOX_OWNER_PID_LABEL]: String(process.pid), + [COMPOSE_PROJECT_LABEL]: COMPOSE_PROJECT_NAME, + [COMPOSE_SERVICE_LABEL]: opts.entityType ?? `sandbox`, ...(opts.entityType ? { [SANDBOX_ENTITY_TYPE_LABEL]: opts.entityType } : {}), @@ -429,39 +484,59 @@ export async function dockerSandbox( } // Create-and-start a fresh container. Image is pulled here only (skipped - // entirely when reattaching to an existing container). - const createStarted = async (): Promise => { - const image = resolveImage(opts) - await ensureImage(docker, image, opts) - const c = await docker.createContainer({ - // Spread (rather than a literal `name:`) so the `name` query param — - // which dockerode accepts but doesn't declare on its create-opts type — - // doesn't trip excess-property checking. - ...{ name: containerName }, - Image: image, - Cmd: [`sh`, `-c`, `while true; do sleep 3600; done`], - WorkingDir: containerCwd, - Env: Object.entries(baseEnv).map(([k, v]) => `${k}=${v}`), - Labels: labels, - ExposedPorts: exposedPorts, - HostConfig, + // entirely when reattaching to an existing container). Creation concurrency + // is bounded process-wide (see withCreationSlot). + const createStarted = (): Promise => + withCreationSlot(async (): Promise => { + const image = resolveImage(opts) + await ensureImage(docker, image, opts) + const c = await docker.createContainer({ + // Spread (rather than a literal `name:`) so the `name` query param — + // which dockerode accepts but doesn't declare on its create-opts type — + // doesn't trip excess-property checking. + ...{ name: containerName }, + Image: image, + Cmd: [`sh`, `-c`, `while true; do sleep 3600; done`], + WorkingDir: containerCwd, + Env: Object.entries(baseEnv).map(([k, v]) => `${k}=${v}`), + Labels: labels, + ExposedPorts: exposedPorts, + HostConfig, + }) + try { + await c.start() + } catch (err) { + await c.remove({ force: true, v: true }).catch(() => {}) + throw new SandboxError( + `runtime`, + `dockerSandbox: container start failed: ${ + err instanceof Error ? err.message : String(err) + }` + ) + } + // Tmpfs on `/work` is empty at start; ensure caller-supplied workingDir + // exists with sensible perms. If this post-start init fails — the exec + // call throws, or the mkdir can't run / exits non-zero (a null exit code + // means the exec never produced one, e.g. it couldn't spawn at all) — + // remove the already-started container before rethrowing: it isn't + // registered yet, so no dispose (and, while running, no sweep) would + // ever reclaim it. + try { + const init = await runOneOff(c, [`mkdir`, `-p`, containerCwd]) + if (init.exitCode !== 0) { + throw new SandboxError( + `runtime`, + `dockerSandbox: post-start init failed: \`mkdir -p ${containerCwd}\` exited ${init.exitCode ?? `without a code`}${ + init.stderr.length > 0 ? `: ${init.stderr.toString().trim()}` : `` + }` + ) + } + } catch (err) { + await c.remove({ force: true, v: true }).catch(() => {}) + throw err + } + return c }) - try { - await c.start() - } catch (err) { - await c.remove({ force: true, v: true }).catch(() => {}) - throw new SandboxError( - `runtime`, - `dockerSandbox: container start failed: ${ - err instanceof Error ? err.message : String(err) - }` - ) - } - // Tmpfs on `/work` is empty at start; ensure caller-supplied workingDir - // exists with sensible perms. - await runOneOff(c, [`mkdir`, `-p`, containerCwd]) - return c - } // Serialize reattach + registration against the debounced idle teardown // (which holds the same per-key lock), so a re-acquire can't race a teardown @@ -496,6 +571,25 @@ export async function dockerSandbox( }) } +/** + * Best-effort: record this process as the container's current owner in the + * in-container marker (see {@link OWNER_MARKER_PATH}). Called on the reattach + * paths — where the immutable creation-time owner-pid label may belong to an + * exited process — so the boot sweep can tell an adopted container from a + * crash leftover. + */ +async function writeOwnerMarker(c: DockerodeContainer): Promise { + try { + await runOneOff(c, [ + `sh`, + `-c`, + `echo ${process.pid} > ${OWNER_MARKER_PATH}`, + ]) + } catch { + /* best-effort */ + } +} + /** * Resolve a container by name: reattach to an existing one (starting it if a * persistent container had been stopped) or create it fresh. Handles the race @@ -520,6 +614,7 @@ async function reattachOrCreate( // and exec/fs round-trips work again. await existing.start().catch(() => {}) } + await writeOwnerMarker(existing) return existing } try { @@ -560,34 +655,72 @@ async function reattachExisting( ) } if (!running) await existing.start().catch(() => {}) + await writeOwnerMarker(existing) return existing } function isNameConflict(err: unknown): boolean { - const status = (err as { statusCode?: number; status?: number })?.statusCode - if (status === 409) return true - return /already in use|Conflict/i.test( + // Match the daemon's name-conflict message, not bare HTTP 409 — the daemon + // also answers 409 for unrelated conflicts that can surface from inside + // `createStarted` (e.g. exec on a container that just exited), and treating + // those as a lost create race would "reattach" to a container that is gone. + return /already in use/i.test( err instanceof Error ? err.message : String(err) ) } /** - * One-shot startup cleanup of *ephemeral* sandbox leftovers from a previous - * process (a crash or restart before disposes ran). Call once at runner boot. + * Same-host liveness probe for the pid recorded on a container at creation. + * `kill(pid, 0)` delivers no signal; EPERM means the pid exists but belongs to + * another user — still alive. A reused pid reads as alive (the orphan is then + * left for a later sweep), which errs on the safe side. + */ +function isPidAlive(pid: number): boolean { + try { + process.kill(pid, 0) + return true + } catch (err) { + return (err as NodeJS.ErrnoException).code === `EPERM` + } +} + +/** + * Read the adoption marker (see {@link OWNER_MARKER_PATH}) from a RUNNING + * container. `null` when absent or unreadable — the sweep then falls back to + * the creation-time owner-pid label alone. + */ +async function readOwnerMarker(c: DockerodeContainer): Promise { + try { + const r = await runOneOff(c, [`cat`, OWNER_MARKER_PATH]) + if (r.exitCode !== 0) return null + const pid = Number(r.stdout.toString().trim()) + return Number.isInteger(pid) && pid > 0 ? pid : null + } catch { + return null + } +} + +/** + * One-shot startup cleanup of sandbox leftovers from a previous process (a + * crash, force-quit, or shutdown before disposes ran). Call once at runner + * boot. * - * Two containers are deliberately left untouched: - * - RUNNING containers — they may belong to a live sibling runner (or a - * concurrent test run) sharing this Docker daemon; force-removing those - * would wipe another process's in-use sandbox. Reboot/crash leftovers are - * `Exited` once the daemon restarts, so the common case is still reclaimed; - * a still-running ephemeral orphan is left for a manual labelled prune - * rather than risk a live peer. - * - PERSISTENT containers — `persistent: true` exists precisely so a restarted - * process can reattach to the warm workspace by key, so they must survive a - * boot (only ephemeral leftovers are reclaimed here; a manual labelled prune - * reclaims truly-abandoned persistent ones). + * The keepalive loop never exits on its own, so a leftover is usually still + * RUNNING — but so is a live sibling runner's container on the same daemon. + * The owner-pid label disambiguates (the sweep runs on the same host as the + * daemon's clients): owner alive ⇒ leave it; owner dead ⇒ check the + * in-container adoption marker (a live process that reattached after the + * creator exited) and, if no adopter is alive either, reclaim — REMOVE an + * ephemeral orphan, STOP a persistent one (its writable layer must survive so + * the next wake can reattach by key). A running container without the label + * (created before it existed) can't be probed and is left for a manual + * labelled prune. * - * Returns the names removed. + * Non-running containers: ephemeral leftovers are removed; persistent ones are + * preserved — `persistent: true` exists precisely so a restarted process can + * reattach to the warm workspace by key. + * + * Returns the names reclaimed (removed or stopped). */ export async function sweepOrphanedDockerSandboxes(opts?: { dockerSocket?: string @@ -612,21 +745,116 @@ export async function sweepOrphanedDockerSandboxes(opts?: { return [] } - const removed: Array = [] + const reclaimed: Array = [] for (const c of listed) { const name = c.Names?.[0]?.replace(/^\//, ``) ?? c.Id - // Never touch a running container (possibly a live peer) or a persistent - // one (meant to be reattached by key). See the doc comment above. - if (c.State === `running`) continue - if (c.Labels?.[SANDBOX_PERSISTENT_LABEL] === `true`) continue + // Held by this very process right now — its lifecycle is already managed. + if (sandboxContainers.has(name)) continue + const persistent = c.Labels?.[SANDBOX_PERSISTENT_LABEL] === `true` try { + if (c.State === `running`) { + const ownerPid = Number(c.Labels?.[SANDBOX_OWNER_PID_LABEL]) + // No (or unparsable) owner label, or the owner is alive ⇒ possibly a + // live peer's in-use sandbox — never touch it. + if (!Number.isInteger(ownerPid) || isPidAlive(ownerPid)) continue + // The creator is gone, but a live sibling process may have since + // ADOPTED the container (reattached by key) — labels are immutable, so + // adoption is recorded in an in-container marker. Probe it before + // reclaiming. + const adopter = await readOwnerMarker(docker.getContainer(c.Id)) + if (adopter !== null && isPidAlive(adopter)) continue + if (persistent) { + // `t: 0` → straight to SIGKILL (PID 1 ignores SIGTERM); the writable + // layer survives for a later reattach by key. + await docker.getContainer(c.Id).stop({ t: 0 }) + } else { + await docker.getContainer(c.Id).remove({ force: true, v: true }) + } + reclaimed.push(name) + continue + } + if (persistent) continue await docker.getContainer(c.Id).remove({ force: true, v: true }) - removed.push(name) + reclaimed.push(name) } catch { /* already gone */ } } - return removed + return reclaimed +} + +/** + * Wipe the container for a resolved sandbox key WITHOUT creating one — the + * reclaim path for a lazy sandbox that was never materialized: a terminal + * entity's persistent workspace from an EARLIER wake must not survive just + * because the final wake never used its sandbox. If sibling leases currently + * hold the container, it is only MARKED reclaimed — the last lease draining + * wipes it (matching an owner's `dispose({ reclaim: true })`). A container + * that exists with no registry entry (left from a previous process) is removed + * directly; absent containers are a no-op. + */ +export async function reclaimDockerSandboxByKey( + sandboxKey: string, + opts?: { dockerSocket?: string } +): Promise { + const name = containerNameForKey(sandboxKey) + const Docker = await loadDockerode() + const docker: Dockerode = opts?.dockerSocket + ? new Docker({ socketPath: opts.dockerSocket }) + : new Docker() + await withKeyLock(name, async () => { + const entry = sandboxContainers.get(name) + if (entry && entry.refs > 0) { + entry.reclaim = true + return + } + if (entry?.idleTimer) clearTimeout(entry.idleTimer) + sandboxContainers.delete(name) + try { + await docker.getContainer(name).remove({ force: true, v: true }) + } catch { + /* nothing to reclaim */ + } + }) +} + +/** + * Immediate, lock-serialized teardown of every IDLE container this process + * tracks: pending debounced timers are cancelled and each entry's teardown + * action runs NOW — REMOVE for ephemeral/reclaimed containers, STOP for + * persistent ones (the workspace survives for the next process to reattach by + * key). + * + * Call on runtime shutdown. The debounced idle timers are unref'd and die with + * the process; without this flush every recently-used container is left + * RUNNING for the boot sweep of some future process to find. + * + * Entries with live leases (refs > 0) are left alone: the registry is shared + * process-wide, so they may belong to a sibling runtime that is not shutting + * down (or to a wake whose drain timed out). Their own dispose tears them + * down; if the process exits first, the boot sweep reclaims them by dead + * owner pid. + */ +export async function shutdownAllDockerSandboxes(): Promise { + await Promise.all( + [...sandboxContainers.keys()].map((name) => + withKeyLock(name, async () => { + const entry = sandboxContainers.get(name) + if (!entry || entry.refs > 0) return + if (entry.idleTimer) clearTimeout(entry.idleTimer) + try { + if (sandboxWipesOnDispose(entry.reclaim ?? false, entry.persistent)) { + await entry.container.remove({ force: true, v: true }) + } else { + await entry.container.stop({ t: 0 }) + } + } catch { + /* already stopped / removed / gone */ + } + sandboxContainers.delete(name) + }) + ) + ) } /** Test-only: drop the in-process container registry bookkeeping. */ diff --git a/packages/agents-runtime/src/sandbox/lazy.ts b/packages/agents-runtime/src/sandbox/lazy.ts new file mode 100644 index 0000000000..06b6d7c561 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/lazy.ts @@ -0,0 +1,195 @@ +/** + * Lazy sandbox wrapper — defers the (potentially expensive) provider factory + * until the sandbox is actually USED. A wake that never runs a tool — a cron + * tick deciding there is nothing to do, child-status bookkeeping, a drained + * inbox — then never pays for, or leaks, a provider-side sandbox (e.g. a + * Docker container). Profile authors wrap their factory: + * + * factory: async (params) => + * lazySandbox({ + * workingDirectory: `/work`, + * factory: () => dockerSandbox({ ... }), + * reclaim: params.owner + * ? () => reclaimDockerSandboxByKey(params.sandboxKey) + * : undefined, + * }) + * + * Materialization is single-flight and retried on failure. Disposing a + * never-used wrapper is free — except `dispose({ reclaim: true })`, which runs + * the provider-supplied `reclaim` callback so a terminal entity's persistent + * workspace from an EARLIER wake is still wiped without creating a fresh + * sandbox just to remove it. + */ + +import { SandboxError } from './types' +import type { + DirEntry, + FileStat, + Sandbox, + SandboxExecOpts, + SandboxExecResult, +} from './types' + +/** + * Cross-module-instance brand for {@link ensureSandboxMaterialized}: a global + * symbol survives duplicate copies of this module in a bundle, where an + * `instanceof` check would not. + */ +const MATERIALIZE = Symbol.for(`electric.sandbox.lazy.materialize`) + +export interface LazySandboxOpts { + /** Reported as `Sandbox.name` until the inner sandbox materializes. */ + name?: string + /** + * The inner sandbox's primary writable root. MUST match what `factory` + * produces: tools resolve relative paths against it synchronously, before + * the first (materializing) sandbox call could run. + */ + workingDirectory: string + /** Builds the inner sandbox on first use. */ + factory: () => Promise + /** + * Wipe the provider-side sandbox WITHOUT materializing. Invoked when this + * lease is disposed with `reclaim: true` before the sandbox was ever used — + * an earlier wake's persistent workspace may still exist under the same key + * and must not outlive its terminal entity. Only pass this for OWNER leases: + * the wrapper itself cannot tell an owner from an attacher. + */ + reclaim?: () => Promise +} + +class LazySandbox implements Sandbox { + private readonly opts: LazySandboxOpts + private inner: Sandbox | null = null + private materializing: Promise | null = null + private disposed = false + + constructor(opts: LazySandboxOpts) { + this.opts = opts + } + + get name(): string { + return this.inner?.name ?? this.opts.name ?? `lazy` + } + + get workingDirectory(): string { + return this.opts.workingDirectory + } + + /** Single-flight; a failed factory clears so the next use retries. */ + private materialize(): Promise { + if (this.inner) return Promise.resolve(this.inner) + this.materializing ??= this.opts.factory().then( + (sandbox) => { + this.inner = sandbox + this.materializing = null + return sandbox + }, + (err: unknown) => { + this.materializing = null + throw err + } + ) + return this.materializing + } + + /** @internal see {@link ensureSandboxMaterialized} */ + [MATERIALIZE](): Promise { + this.assertLive() + return this.materialize() + } + + private assertLive(): void { + if (this.disposed) { + throw new SandboxError( + `runtime`, + `lazySandbox: this sandbox lease has been disposed.` + ) + } + } + + async exec(opts: SandboxExecOpts): Promise { + this.assertLive() + return (await this.materialize()).exec(opts) + } + + async readFile(path: string): Promise { + this.assertLive() + return (await this.materialize()).readFile(path) + } + + async writeFile(path: string, content: Buffer | string): Promise { + this.assertLive() + return (await this.materialize()).writeFile(path, content) + } + + async mkdir(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + return (await this.materialize()).mkdir(path, opts) + } + + async readdir(path: string): Promise> { + this.assertLive() + return (await this.materialize()).readdir(path) + } + + async exists(path: string): Promise { + this.assertLive() + return (await this.materialize()).exists(path) + } + + async remove(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + return (await this.materialize()).remove(path, opts) + } + + async stat(path: string): Promise { + this.assertLive() + return (await this.materialize()).stat(path) + } + + async fetch(input: string | URL, init?: RequestInit): Promise { + this.assertLive() + return (await this.materialize()).fetch(input, init) + } + + async dispose(opts?: { reclaim?: boolean }): Promise { + if (this.disposed) return + this.disposed = true + if (this.inner || this.materializing) { + let inner: Sandbox | null = null + try { + inner = await this.materialize() + } catch { + // The in-flight factory failed — nothing live to dispose. Fall + // through: a requested reclaim still wipes the provider-side state. + } + if (inner) { + // The inner dispose owns the reclaim semantics from here. + await inner.dispose(opts) + return + } + } + if (opts?.reclaim && this.opts.reclaim) await this.opts.reclaim() + } +} + +/** Wrap a provider factory so the sandbox materializes on first use. */ +export function lazySandbox(opts: LazySandboxOpts): Sandbox { + return new LazySandbox(opts) +} + +/** + * Force a lazy sandbox to materialize (no-op for any other sandbox). Used by + * the spawn-`inherit` path: a child ATTACHES to this wake's sandbox by key and + * never creates, so the owner's container/workspace must actually exist before + * the child can wake — even if the owner itself never ran a tool. + */ +export async function ensureSandboxMaterialized( + sandbox: Sandbox +): Promise { + const materialize = ( + sandbox as Sandbox & { [MATERIALIZE]?: () => Promise } + )[MATERIALIZE] + if (typeof materialize === `function`) await materialize.call(sandbox) +} diff --git a/packages/agents-runtime/test/sandbox-docker.test.ts b/packages/agents-runtime/test/sandbox-docker.test.ts index e62df96825..8f9ff4ceb9 100644 --- a/packages/agents-runtime/test/sandbox-docker.test.ts +++ b/packages/agents-runtime/test/sandbox-docker.test.ts @@ -2,11 +2,15 @@ import { afterAll, afterEach, beforeAll, describe, expect, it } from 'vitest' import { mkdtemp, rm, symlink, writeFile } from 'node:fs/promises' import { tmpdir } from 'node:os' import { join } from 'node:path' +import { spawnSync } from 'node:child_process' import { __resetPersistentRegistryForTests, dockerSandbox, + reclaimDockerSandboxByKey, + shutdownAllDockerSandboxes, sweepOrphanedDockerSandboxes, } from '../src/sandbox/docker' +import { lazySandbox } from '../src/sandbox/lazy' import { loadDockerode } from '../src/sandbox/docker/loader' import { SandboxError } from '../src/sandbox/types' import { dockerAvailable, TEST_IMAGE, TEST_LABEL } from './helpers/docker-probe' @@ -473,6 +477,7 @@ d(`dockerSandbox keyed lifecycle`, () => { const info = await docker.getContainer(list[0].Id).inspect() return info.State.Running ? `running` : `stopped` } catch (error) { + // Removed between list and inspect (e.g. a reclaim in flight). if ( typeof error === `object` && error !== null && @@ -542,6 +547,12 @@ d(`dockerSandbox keyed lifecycle`, () => { // Per-entity identity lives in labels, since the shared name can't. expect(labels[`com.electric.sandbox.entity-type`]).toBe(`horton`) expect(labels[`com.electric.sandbox.entity`]).toBe(`/horton/abc123/main`) + // Owning process, for the boot sweep's same-host liveness probe. + expect(labels[`com.electric.sandbox.owner-pid`]).toBe(String(process.pid)) + // Compose-style grouping: docker GUIs collapse all sandboxes under one + // project, and `docker compose -p electric-sandboxes down` clears them. + expect(labels[`com.docker.compose.project`]).toBe(`electric-sandboxes`) + expect(labels[`com.docker.compose.service`]).toBe(`horton`) } finally { await sandbox.dispose() } @@ -755,9 +766,10 @@ d(`dockerSandbox keyed lifecycle`, () => { expect(await keyState(key)).toBe(`absent`) }, 90_000) - it(`sweepOrphanedDockerSandboxes leaves a *running* container untouched`, async () => { - // A running container may belong to a live sibling process sharing this - // daemon — force-removing it would wipe a peer's in-use sandbox. + it(`sweepOrphanedDockerSandboxes leaves a *running* container with a live owner untouched`, async () => { + // A running container whose owner-pid process (this test process) is alive + // belongs to a live sibling — force-removing it would wipe an in-use + // sandbox. const key = `${KEY}-orphan-running` await make(key, 60_000, /* persistent */ false) __resetPersistentRegistryForTests() @@ -779,4 +791,189 @@ d(`dockerSandbox keyed lifecycle`, () => { await sweepOrphanedDockerSandboxes() expect(await keyState(key)).toBe(`stopped`) }, 90_000) + + // A pid that is guaranteed dead: spawn a no-op child and reuse its pid after + // it has exited (synchronously, so it is reaped before we return). + const deadPid = (): number => { + const child = spawnSync(process.execPath, [`-e`, ``]) + if (child.pid === undefined) throw new Error(`spawnSync returned no pid`) + return child.pid + } + + // Create a container whose owner-pid label points at a dead process — the + // shape a crashed/quit runner leaves behind (its keepalive loop never exits + // on its own, so the container is still RUNNING). + const makeCrashOrphan = (sandboxKey: string, persistent: boolean) => + dockerSandbox({ + image: TEST_IMAGE, + labels: { + [TEST_LABEL]: `1`, + 'com.electric.sandbox.owner-pid': String(deadPid()), + }, + sandboxKey, + persistent, + sharedIdleGraceMs: 600_000, + }) + + it(`sweepOrphanedDockerSandboxes REMOVES a running *ephemeral* orphan whose owner died`, async () => { + const key = `${KEY}-crash-eph` + await makeCrashOrphan(key, /* persistent */ false) + __resetPersistentRegistryForTests() // the owning process is gone + expect(await keyState(key)).toBe(`running`) + + const reclaimed = await sweepOrphanedDockerSandboxes() + expect(reclaimed.length).toBeGreaterThanOrEqual(1) + expect(await keyState(key)).toBe(`absent`) + }, 90_000) + + it(`sweepOrphanedDockerSandboxes STOPS a running *persistent* orphan whose owner died`, async () => { + // Persistent workspaces survive (the next wake reattaches by key), but a + // dead owner can't stop the keepalive — the sweep does it instead. + const key = `${KEY}-crash-persist` + await makeCrashOrphan(key, /* persistent */ true) + __resetPersistentRegistryForTests() + expect(await keyState(key)).toBe(`running`) + + await sweepOrphanedDockerSandboxes() + expect(await keyState(key)).toBe(`stopped`) + }, 90_000) + + it(`sweepOrphanedDockerSandboxes spares a running orphan ADOPTED by a live process`, async () => { + // The owner-pid label is immutable, so it goes stale when a live process + // reattaches a dead creator's container — adoption is recorded in an + // in-container marker that the sweep probes before reclaiming. + const key = `${KEY}-adopted` + await makeCrashOrphan(key, /* persistent */ true) + __resetPersistentRegistryForTests() // the creator is gone… + await make(key, 600_000, /* persistent */ true) // …a live process adopts + __resetPersistentRegistryForTests() // bypass the in-registry skip + expect(await keyState(key)).toBe(`running`) + + await sweepOrphanedDockerSandboxes() + expect(await keyState(key)).toBe(`running`) + }, 90_000) + + it(`sweepOrphanedDockerSandboxes leaves a running container with no owner-pid label untouched`, async () => { + // Containers created before the owner-pid label existed can't be liveness- + // probed — fail safe and leave them for a manual labelled prune. + const Docker = await loadDockerode() + const docker = new Docker() + const c = await docker.createContainer({ + ...{ name: `electric-sbx-legacy-nolabel-test` }, + Image: TEST_IMAGE, + Cmd: [`sh`, `-c`, `while true; do sleep 3600; done`], + Labels: { + 'com.electric.sandbox': `1`, + 'com.electric.sandbox.persistent': `false`, + [TEST_LABEL]: `1`, + }, + HostConfig: {}, + }) + await c.start() + await sweepOrphanedDockerSandboxes() + const info = await c.inspect() + expect(info.State.Running).toBe(true) + }, 90_000) + + it(`removes the container when post-start init fails (no unregistered zombie)`, async () => { + // pidsLimit:1 — PID 1's keepalive holds the only slot, so the post-start + // `mkdir -p` exec cannot run and creation fails AFTER the container + // started. The failed create must remove it, not leave it behind running + // and unregistered (invisible to dispose and, while running, to the sweep). + const key = `${KEY}-init-fail` + await expect( + dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + sandboxKey: key, + resources: { pidsLimit: 1 }, + }) + ).rejects.toThrow() + expect(await keyState(key)).toBe(`absent`) + }, 90_000) + + it(`shutdownAllDockerSandboxes flushes pending debounced teardowns immediately`, async () => { + // The debounced idle timers are unref'd and die with the process — on + // shutdown the pending action must run NOW: STOP persistent, REMOVE + // ephemeral. + const ephKey = `${KEY}-shutdown-eph` + const perKey = `${KEY}-shutdown-per` + const eph = await make(ephKey, 600_000, /* persistent */ false) + const per = await make(perKey, 600_000, /* persistent */ true) + await eph.dispose() + await per.dispose() + // Teardowns are debounced 10 minutes out — both still running. + expect(await keyState(ephKey)).toBe(`running`) + expect(await keyState(perKey)).toBe(`running`) + + await shutdownAllDockerSandboxes() + expect(await keyState(ephKey)).toBe(`absent`) + expect(await keyState(perKey)).toBe(`stopped`) + }, 90_000) + + it(`lazy docker sandbox creates no container until first use`, async () => { + // The shape bootstrap wires: profile.factory returns a lazy wrapper, so a + // trivial wake (no tool use) never starts a container. + const key = `${KEY}-lazy` + const sb = lazySandbox({ + name: `docker`, + workingDirectory: `/work`, + factory: () => make(key, 60_000), + }) + expect(await keyState(key)).toBe(`absent`) + const r = await sb.exec({ command: `echo materialized` }) + expect(r.stdout.toString().trim()).toBe(`materialized`) + expect(await keyState(key)).toBe(`running`) + await sb.dispose() + }, 90_000) + + it(`lazy docker sandbox disposed without use leaves nothing behind`, async () => { + const key = `${KEY}-lazy-unused` + const sb = lazySandbox({ + name: `docker`, + workingDirectory: `/work`, + factory: () => make(key, 60_000), + }) + await sb.dispose() + expect(await keyState(key)).toBe(`absent`) + }, 60_000) + + it(`reclaimDockerSandboxByKey removes an earlier wake's persistent container without creating one`, async () => { + // The lazy wrapper's reclaim path: a terminal wake that never used its + // sandbox must still wipe the persistent workspace earlier wakes created. + const key = `${KEY}-reclaim-by-key` + const earlier = await make(key, 600_000, /* persistent */ true) + await earlier.dispose() // idle: container stays (debounced stop pending) + expect(await keyState(key)).toBe(`running`) + + await reclaimDockerSandboxByKey(key) + expect(await keyState(key)).toBe(`absent`) + }, 90_000) + + it(`reclaimDockerSandboxByKey is a no-op when nothing exists under the key`, async () => { + await reclaimDockerSandboxByKey(`${KEY}-reclaim-absent`) + expect(await keyState(`${KEY}-reclaim-absent`)).toBe(`absent`) + }, 60_000) + + it(`reclaimDockerSandboxByKey defers to live leases — wiped once the last drains`, async () => { + const key = `${KEY}-reclaim-live` + const lease = await make(key, 600_000, /* persistent */ true) + await reclaimDockerSandboxByKey(key) + // The sibling lease still owns it — not torn down underneath it. + expect(await keyState(key)).toBe(`running`) + await lease.dispose() + // Marked reclaimed: the last lease draining wipes it immediately. + await waitForKeyState(key, `absent`, 5_000) + }, 90_000) + + it(`shutdownAllDockerSandboxes leaves containers with live leases alone`, async () => { + // The registry is process-wide: a still-open lease may belong to a sibling + // runtime that is NOT shutting down. Its own dispose tears it down — and if + // the process exits first, the boot sweep reclaims it by dead owner pid. + const key = `${KEY}-shutdown-live` + const live = await make(key, 600_000, /* persistent */ false) + await shutdownAllDockerSandboxes() + expect(await keyState(key)).toBe(`running`) + await live.dispose() + }, 90_000) }) diff --git a/packages/agents-runtime/test/sandbox-lazy.test.ts b/packages/agents-runtime/test/sandbox-lazy.test.ts new file mode 100644 index 0000000000..b529ba4744 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-lazy.test.ts @@ -0,0 +1,197 @@ +import { describe, expect, it } from 'vitest' +import { ensureSandboxMaterialized, lazySandbox } from '../src/sandbox/lazy' +import type { Sandbox } from '../src/sandbox/types' + +/** + * lazySandbox defers the provider factory until first use, so trivial wakes + * (cron ticks, bookkeeping) never pay for — or leak — a provider-side sandbox + * (e.g. a Docker container). + */ + +interface FakeCalls { + exec: number + dispose: Array<{ reclaim?: boolean } | undefined> +} + +const makeFake = (calls: FakeCalls): Sandbox => ({ + name: `fake:inner`, + workingDirectory: `/work`, + exec: async () => { + calls.exec += 1 + return { + exitCode: 0, + signal: null, + stdout: Buffer.from(`ok`), + stderr: Buffer.alloc(0), + timedOut: false, + aborted: false, + outputTruncated: false, + } + }, + readFile: async () => Buffer.from(`content`), + writeFile: async () => {}, + mkdir: async () => {}, + readdir: async () => [], + exists: async () => true, + remove: async () => {}, + stat: async () => ({ type: `file`, size: 0, mtimeMs: 0 }), + fetch: async () => new Response(`ok`), + dispose: async (opts) => { + calls.dispose.push(opts) + }, +}) + +const harness = () => { + const calls: FakeCalls = { exec: 0, dispose: [] } + let factoryCalls = 0 + const reclaims: Array = [] + const sandbox = lazySandbox({ + name: `fake`, + workingDirectory: `/work`, + factory: async () => { + factoryCalls += 1 + return makeFake(calls) + }, + reclaim: async () => { + reclaims.push(true) + }, + }) + return { + sandbox, + calls, + reclaims, + factoryCalls: () => factoryCalls, + } +} + +describe(`lazySandbox`, () => { + it(`does not invoke the factory on construction or property access`, () => { + const h = harness() + expect(h.sandbox.name).toBe(`fake`) + expect(h.sandbox.workingDirectory).toBe(`/work`) + expect(h.factoryCalls()).toBe(0) + }) + + it(`materializes on first use and reuses the inner sandbox after`, async () => { + const h = harness() + await h.sandbox.exec({ command: `echo hi` }) + await h.sandbox.exec({ command: `echo again` }) + await h.sandbox.readFile(`/work/x`) + expect(h.factoryCalls()).toBe(1) + expect(h.calls.exec).toBe(2) + // After materialization the inner provider's name shows through. + expect(h.sandbox.name).toBe(`fake:inner`) + }) + + it(`single-flights concurrent first calls`, async () => { + let resolveFactory!: (sb: Sandbox) => void + const calls: FakeCalls = { exec: 0, dispose: [] } + let factoryCalls = 0 + const sandbox = lazySandbox({ + workingDirectory: `/work`, + factory: () => { + factoryCalls += 1 + return new Promise((r) => { + resolveFactory = r + }) + }, + }) + const a = sandbox.exec({ command: `a` }) + const b = sandbox.exec({ command: `b` }) + resolveFactory(makeFake(calls)) + await Promise.all([a, b]) + expect(factoryCalls).toBe(1) + expect(calls.exec).toBe(2) + }) + + it(`a failed factory rejects the call and is retried on the next use`, async () => { + let attempts = 0 + const calls: FakeCalls = { exec: 0, dispose: [] } + const sandbox = lazySandbox({ + workingDirectory: `/work`, + factory: async () => { + attempts += 1 + if (attempts === 1) throw new Error(`daemon hiccup`) + return makeFake(calls) + }, + }) + await expect(sandbox.exec({ command: `x` })).rejects.toThrow( + `daemon hiccup` + ) + await sandbox.exec({ command: `x` }) + expect(attempts).toBe(2) + expect(calls.exec).toBe(1) + }) + + it(`dispose without use never invokes the factory`, async () => { + const h = harness() + await h.sandbox.dispose() + expect(h.factoryCalls()).toBe(0) + expect(h.calls.dispose).toEqual([]) + expect(h.reclaims).toEqual([]) + }) + + it(`dispose({reclaim}) without use runs the reclaim callback instead`, async () => { + // A terminal entity's persistent workspace from an EARLIER wake must not + // survive just because the final wake never touched the sandbox. + const h = harness() + await h.sandbox.dispose({ reclaim: true }) + expect(h.factoryCalls()).toBe(0) + expect(h.reclaims).toEqual([true]) + }) + + it(`dispose({reclaim}) without use is a no-op when no reclaim callback exists`, async () => { + let factoryCalls = 0 + const calls: FakeCalls = { exec: 0, dispose: [] } + const sandbox = lazySandbox({ + workingDirectory: `/work`, + factory: async () => { + factoryCalls += 1 + return makeFake(calls) + }, + }) + await sandbox.dispose({ reclaim: true }) + expect(factoryCalls).toBe(0) + }) + + it(`dispose after use forwards to the inner sandbox (reclaim included)`, async () => { + const h = harness() + await h.sandbox.exec({ command: `x` }) + await h.sandbox.dispose({ reclaim: true }) + expect(h.calls.dispose).toEqual([{ reclaim: true }]) + // The inner dispose owns reclaim semantics — the callback must not ALSO run. + expect(h.reclaims).toEqual([]) + }) + + it(`operations after dispose are rejected with a runtime SandboxError`, async () => { + const h = harness() + await h.sandbox.dispose() + await expect(h.sandbox.exec({ command: `x` })).rejects.toMatchObject({ + kind: `runtime`, + }) + expect(h.factoryCalls()).toBe(0) + }) + + it(`repeated dispose is a no-op`, async () => { + const h = harness() + await h.sandbox.exec({ command: `x` }) + await h.sandbox.dispose() + await h.sandbox.dispose() + expect(h.calls.dispose).toHaveLength(1) + }) +}) + +describe(`ensureSandboxMaterialized`, () => { + it(`materializes a lazy sandbox without running any operation`, async () => { + const h = harness() + await ensureSandboxMaterialized(h.sandbox) + expect(h.factoryCalls()).toBe(1) + expect(h.calls.exec).toBe(0) + }) + + it(`is a no-op for a non-lazy sandbox`, async () => { + const calls: FakeCalls = { exec: 0, dispose: [] } + await ensureSandboxMaterialized(makeFake(calls)) + expect(calls.exec).toBe(0) + }) +}) diff --git a/packages/agents/src/bootstrap.ts b/packages/agents/src/bootstrap.ts index f4c98eb18b..236224b542 100644 --- a/packages/agents/src/bootstrap.ts +++ b/packages/agents/src/bootstrap.ts @@ -15,6 +15,7 @@ import { import { chooseDefaultSandbox, isE2BAvailable, + lazySandbox, remoteSandbox, type SandboxProfile, } from '@electric-ax/agents-runtime/sandbox' @@ -43,6 +44,13 @@ export interface AgentHandlerResult { registry: EntityRegistry typeNames: Array skillsRegistry: SkillsRegistry | null + /** + * Immediately tears down the idle sandboxes this process created (set when + * a provider with host-local state — docker — is registered). MUST be called + * on shutdown, after wakes drain: the providers' debounced idle teardowns + * die with the process, which would leave containers running. + */ + shutdownSandboxes: (() => Promise) | null } export type BuiltinElectricToolsFactory = NonNullable< @@ -160,7 +168,8 @@ export async function createBuiltinAgentHandler( registerWorker(registry, { workingDirectory: cwd, streamFn, modelCatalog }) typeNames.push(`worker`) - const sandboxProfiles = await buildBuiltinSandboxProfiles(cwd) + const { profiles: sandboxProfiles, shutdownSandboxes } = + await buildBuiltinSandboxProfiles(cwd) const runtime = createRuntimeHandler({ baseUrl: agentServerUrl, @@ -182,6 +191,7 @@ export async function createBuiltinAgentHandler( registry, typeNames, skillsRegistry, + shutdownSandboxes, } } @@ -217,7 +227,7 @@ export const registerAgentTypes = registerBuiltinAgentTypes * Guard so repeated `buildBuiltinSandboxProfiles` calls in one process don't * re-run the boot sweep. */ -let dockerSweptOnBoot = false +let dockerBootSweep: Promise | null = null type SweepOrphanedDockerSandboxes = // eslint-disable-next-line quotes -- type-position import() requires a string literal @@ -225,17 +235,15 @@ type SweepOrphanedDockerSandboxes = function sweepOrphanedDockerSandboxesOnce( sweep: SweepOrphanedDockerSandboxes -): void { - if (dockerSweptOnBoot) return - dockerSweptOnBoot = true - // One-shot, at boot: reclaim any sandbox containers a previous (crashed) - // process left behind. Fire-and-forget — nothing is acquired yet, and within - // a live process shared containers are stopped (not removed) when idle. - sweep() - .then((removed) => { - if (removed.length > 0) { +): Promise { + // One-shot, at boot: reclaim any sandbox containers a previous (crashed or + // quit) process left behind. Awaited by the caller — the sweep can stop + // running orphans, so it must finish before any wake reattaches by key. + dockerBootSweep ??= sweep() + .then((reclaimed) => { + if (reclaimed.length > 0) { serverLog.info( - `[builtin-agents] docker sandbox boot sweep removed ${removed.length} leftover container(s)` + `[builtin-agents] docker sandbox boot sweep reclaimed ${reclaimed.length} leftover container(s)` ) } }) @@ -244,16 +252,23 @@ function sweepOrphanedDockerSandboxesOnce( `[builtin-agents] docker sandbox boot sweep error: ${err instanceof Error ? err.message : String(err)}` ) ) + return dockerBootSweep } /** * Built-in sandbox profiles. `local` is always available. `docker` is * gated on Docker being reachable so a user without Docker installed * sees only what works — the UI never offers a non-functional choice. + * + * Also returns `shutdownSandboxes` when a host-local provider registered: an + * immediate teardown of this process's live containers that the embedding + * server must run on shutdown (the providers' debounced idle teardowns die + * with the process). */ -async function buildBuiltinSandboxProfiles( - workingDirectory: string -): Promise> { +async function buildBuiltinSandboxProfiles(workingDirectory: string): Promise<{ + profiles: Array + shutdownSandboxes: (() => Promise) | null +}> { const profiles: Array = [ { name: `local`, @@ -263,25 +278,32 @@ async function buildBuiltinSandboxProfiles( chooseDefaultSandbox(resolveCwd(args, workingDirectory)), }, ] + let shutdownSandboxes: (() => Promise) | null = null try { const { isDockerAvailable } = await import( `@electric-ax/agents-runtime/sandbox/docker` ) if (await isDockerAvailable()) { - const { dockerSandbox, sweepOrphanedDockerSandboxes } = await import( - `@electric-ax/agents-runtime/sandbox/docker` - ) - // Reclaim containers a previous crashed process may have left running. - // No periodic reaper: within a live process, shared containers stop - // themselves a short while after their last lease disposes (debounced - // idle-stop), and ephemeral ones are killed on dispose. - sweepOrphanedDockerSandboxesOnce(sweepOrphanedDockerSandboxes) + const { + dockerSandbox, + reclaimDockerSandboxByKey, + shutdownAllDockerSandboxes, + sweepOrphanedDockerSandboxes, + } = await import(`@electric-ax/agents-runtime/sandbox/docker`) + // Reclaim containers a previous process (crash, force-quit, or a + // shutdown that raced the debounced teardowns) left behind. No periodic + // reaper: within a live process, shared containers stop themselves a + // short while after their last lease disposes (debounced idle-stop), + // ephemeral ones are killed on dispose, and the rest are flushed by + // `shutdownSandboxes` on clean shutdown. + await sweepOrphanedDockerSandboxesOnce(sweepOrphanedDockerSandboxes) + shutdownSandboxes = shutdownAllDockerSandboxes profiles.push({ name: `docker`, label: `Docker`, description: `Runs in a hardened Docker container: dropped capabilities, no privilege escalation, and CPU/memory/process limits. The chosen working directory is mounted read-write and, by default, network egress is unrestricted (allow-all).`, - factory: ({ + factory: async ({ args, sandboxKey, persistent, @@ -290,25 +312,41 @@ async function buildBuiltinSandboxProfiles( entityUrl, }) => { const cwd = readWorkingDirectoryArg(args) - return dockerSandbox({ - // Default to open egress for local development. Network policy is a - // capability of this profile, not a separate profile: like the - // working directory above, it can be made a per-spawn arg later so a - // single `docker` profile spans permissive → fully-isolated rather - // than splitting into `docker-permissive` / `docker-isolated`. - initialNetworkPolicy: { mode: `allow-all` }, - extraMounts: cwd - ? [{ hostPath: cwd, containerPath: `/work`, readOnly: false }] + // Lazy: the container is only created/started when the wake actually + // USES the sandbox, so trivial wakes (cron ticks, bookkeeping) don't + // spin one up. `/work` is the container cwd dockerSandbox defaults to. + return lazySandbox({ + name: `docker`, + workingDirectory: `/work`, + factory: () => + dockerSandbox({ + // Default to open egress for local development. Network policy + // is a capability of this profile, not a separate profile: like + // the working directory above, it can be made a per-spawn arg + // later so a single `docker` profile spans permissive → + // fully-isolated rather than splitting into `docker-permissive` + // / `docker-isolated`. + initialNetworkPolicy: { mode: `allow-all` }, + extraMounts: cwd + ? [{ hostPath: cwd, containerPath: `/work`, readOnly: false }] + : undefined, + // The container is always named-by-key and reattachable; + // `persistent` chooses idle teardown (stop vs remove) and + // `owner` gates creation (an attacher reattaches only). All + // resolved upstream from config. + sandboxKey, + persistent, + owner, + // Observability: tag the container/labels with who spawned it. + entityType, + entityUrl, + }), + // A terminal entity whose final wake never used the sandbox must + // still wipe the persistent workspace earlier wakes created. Owner + // leases only — an attacher can never reclaim the owner's sandbox. + reclaim: owner + ? () => reclaimDockerSandboxByKey(sandboxKey) : undefined, - // The container is always named-by-key and reattachable; `persistent` - // chooses idle teardown (stop vs remove) and `owner` gates creation - // (an attacher reattaches only). All resolved upstream from config. - sandboxKey, - persistent, - owner, - // Observability: tag the container/labels with who spawned it. - entityType, - entityUrl, }) }, }) @@ -360,7 +398,7 @@ async function buildBuiltinSandboxProfiles( console.log( `[builtin-agents] sandbox profiles advertised: ${profiles.map((p) => p.name).join(`, `)}` ) - return profiles + return { profiles, shutdownSandboxes } } function readWorkingDirectoryArg( diff --git a/packages/agents/src/server.ts b/packages/agents/src/server.ts index 8523b81a58..9dc7bf66ea 100644 --- a/packages/agents/src/server.ts +++ b/packages/agents/src/server.ts @@ -365,6 +365,20 @@ export class BuiltinAgentsServer { }), new Promise((resolve) => setTimeout(resolve, 5_000)), ]) + // Tear down this process's live sandbox containers NOW — their debounced + // idle teardowns die with the process and would leave them running. + // Bounded like the drain above so a hung daemon can't block quit. + if (this.bootstrap.shutdownSandboxes) { + await Promise.race([ + this.bootstrap.shutdownSandboxes().catch((err) => { + serverLog.error( + `[builtin-agents] sandbox shutdown failed during shutdown:`, + err + ) + }), + new Promise((resolve) => setTimeout(resolve, 5_000)), + ]) + } this.bootstrap = null } From 8011831b21618edc636b610fc902af00de1e2c76 Mon Sep 17 00:00:00 2001 From: msfstef Date: Mon, 8 Jun 2026 17:09:16 +0300 Subject: [PATCH 2/6] docs(agents): reword zombie-sandbox changeset in plain language Rewrite the changeset as a user-facing release note: lead with the symptom (leftover containers piling up) and the three-part fix (create-only-when-used, clean-up-on-quit, reclaim-at-startup), dropping implementation jargon. Matches the rewritten PR description; no code change. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/zombie-sandbox-containers-fix.md | 37 +++++++++++---------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/.changeset/zombie-sandbox-containers-fix.md b/.changeset/zombie-sandbox-containers-fix.md index d05322fd4b..6839149aaf 100644 --- a/.changeset/zombie-sandbox-containers-fix.md +++ b/.changeset/zombie-sandbox-containers-fix.md @@ -3,22 +3,23 @@ '@electric-ax/agents': patch --- -Fix Docker sandbox containers (`electric-sbx-*`) accumulating as zombies, and -stop creating containers for wakes that never use their sandbox: +Fix leftover Docker sandbox containers (`electric-sbx-*`) piling up. -- The boot sweep now reclaims RUNNING orphans whose owning process died - (owner-pid label + in-container adoption marker), instead of only exited - ephemeral leftovers — previously crash/quit leftovers were never cleaned up. -- Runtime shutdown flushes the debounced idle teardowns (stop persistent / - remove ephemeral) instead of letting the unref'd timers die with the - process, which leaked a running container on every quit. -- A failed post-start init no longer leaves a running, untracked container - behind, and a 409 from inside creation is no longer misread as a name - conflict (which "reattached" to a removed container). -- Sandbox creation is now lazy: the container is only created/started when a - wake actually uses its sandbox, so backlog bursts of trivial wakes (cron - ticks, bookkeeping) on runner reconnect no longer spin up containers. - Terminal reclaim and spawn-`inherit` still work for never-used sandboxes, - and concurrent container creations are capped to smooth real bursts. -- All sandbox containers carry `com.docker.compose.project=electric-sandboxes` - so Docker GUIs group them and they can be stopped/removed together. +Sandbox containers are meant to be short-lived, but several gaps let them +outlive the work they were created for — opening the desktop app could leave +15+ containers running that were never explicitly started. This closes those +gaps so a container only exists while something is actually using it: + +- **Created only when used.** A container now starts the first time an agent + actually uses its sandbox (runs a command, reads/writes a file), so trivial + wakes (scheduled ticks, bookkeeping) no longer spin one up. +- **Cleaned up on quit.** Shutdown now tears down idle containers immediately + instead of leaving their delayed-teardown timers to die with the process. +- **Leftovers reclaimed at startup.** Containers are tagged with the process + that created them; at startup, those whose owner is gone are reclaimed + (throwaway ones removed, reusable ones stopped so their files survive), while + containers a live process is still using are left untouched. + +Also: a failed container setup step no longer strands an untracked container, +and all sandboxes are grouped under one `electric-sandboxes` entry in Docker +Desktop so they can be stopped/removed together. From 2b6271554ad68431a8e385e3565c26c3826d46f2 Mon Sep 17 00:00:00 2001 From: msfstef Date: Mon, 8 Jun 2026 17:35:08 +0300 Subject: [PATCH 3/6] fix(agents): address review on zombie-sandbox PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the Claude bot review (all non-blocking nice-to-haves): - Boot sweep now probes + reclaims leftovers concurrently (Promise.all) instead of one awaited exec round-trip per orphan — keeps boot latency flat when many leftovers have accumulated (the sweep is awaited before profiles build). - Document that reclaimDockerSandboxByKey / shutdownAllDockerSandboxes are best-effort: an unreachable daemon is swallowed and left to the next boot sweep; note why the shutdown flush runs after the wake drain (it only reclaims idle, lease-free containers). - Add a lazySandbox unit test for dispose({reclaim}) racing an in-flight factory that then fails — the reclaim callback must still run. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/agents-runtime/src/sandbox/docker.ts | 82 +++++++++++-------- .../agents-runtime/test/sandbox-lazy.test.ts | 30 +++++++ packages/agents/src/server.ts | 9 +- 3 files changed, 83 insertions(+), 38 deletions(-) diff --git a/packages/agents-runtime/src/sandbox/docker.ts b/packages/agents-runtime/src/sandbox/docker.ts index 336f9dbc27..74ad8732e1 100644 --- a/packages/agents-runtime/src/sandbox/docker.ts +++ b/packages/agents-runtime/src/sandbox/docker.ts @@ -745,42 +745,48 @@ export async function sweepOrphanedDockerSandboxes(opts?: { return [] } - const reclaimed: Array = [] - for (const c of listed) { - const name = c.Names?.[0]?.replace(/^\//, ``) ?? c.Id - // Held by this very process right now — its lifecycle is already managed. - if (sandboxContainers.has(name)) continue - const persistent = c.Labels?.[SANDBOX_PERSISTENT_LABEL] === `true` - try { - if (c.State === `running`) { - const ownerPid = Number(c.Labels?.[SANDBOX_OWNER_PID_LABEL]) - // No (or unparsable) owner label, or the owner is alive ⇒ possibly a - // live peer's in-use sandbox — never touch it. - if (!Number.isInteger(ownerPid) || isPidAlive(ownerPid)) continue - // The creator is gone, but a live sibling process may have since - // ADOPTED the container (reattached by key) — labels are immutable, so - // adoption is recorded in an in-container marker. Probe it before - // reclaiming. - const adopter = await readOwnerMarker(docker.getContainer(c.Id)) - if (adopter !== null && isPidAlive(adopter)) continue - if (persistent) { - // `t: 0` → straight to SIGKILL (PID 1 ignores SIGTERM); the writable - // layer survives for a later reattach by key. - await docker.getContainer(c.Id).stop({ t: 0 }) - } else { - await docker.getContainer(c.Id).remove({ force: true, v: true }) + // Probe + reclaim every leftover concurrently. The running-orphan path runs an + // exec round-trip (readOwnerMarker) and the whole sweep is awaited at boot, so + // a sequential loop would serialize one exec per leftover — and the motivating + // case is "15+ leftovers". Each container is independent (distinct name, no + // shared lock), so fanning out keeps boot latency flat as leftovers pile up. + const reclaimed = await Promise.all( + listed.map(async (c): Promise => { + const name = c.Names?.[0]?.replace(/^\//, ``) ?? c.Id + // Held by this very process right now — its lifecycle is already managed. + if (sandboxContainers.has(name)) return null + const persistent = c.Labels?.[SANDBOX_PERSISTENT_LABEL] === `true` + try { + if (c.State === `running`) { + const ownerPid = Number(c.Labels?.[SANDBOX_OWNER_PID_LABEL]) + // No (or unparsable) owner label, or the owner is alive ⇒ possibly a + // live peer's in-use sandbox — never touch it. + if (!Number.isInteger(ownerPid) || isPidAlive(ownerPid)) return null + // The creator is gone, but a live sibling process may have since + // ADOPTED the container (reattached by key) — labels are immutable, so + // adoption is recorded in an in-container marker. Probe it before + // reclaiming. + const adopter = await readOwnerMarker(docker.getContainer(c.Id)) + if (adopter !== null && isPidAlive(adopter)) return null + if (persistent) { + // `t: 0` → straight to SIGKILL (PID 1 ignores SIGTERM); the writable + // layer survives for a later reattach by key. + await docker.getContainer(c.Id).stop({ t: 0 }) + } else { + await docker.getContainer(c.Id).remove({ force: true, v: true }) + } + return name } - reclaimed.push(name) - continue + if (persistent) return null + await docker.getContainer(c.Id).remove({ force: true, v: true }) + return name + } catch { + /* already gone */ + return null } - if (persistent) continue - await docker.getContainer(c.Id).remove({ force: true, v: true }) - reclaimed.push(name) - } catch { - /* already gone */ - } - } - return reclaimed + }) + ) + return reclaimed.filter((n): n is string => n !== null) } /** @@ -791,7 +797,10 @@ export async function sweepOrphanedDockerSandboxes(opts?: { * hold the container, it is only MARKED reclaimed — the last lease draining * wipes it (matching an owner's `dispose({ reclaim: true })`). A container * that exists with no registry entry (left from a previous process) is removed - * directly; absent containers are a no-op. + * directly; absent containers are a no-op. Best-effort: an unreachable daemon + * (like any failed remove) is swallowed — a surviving container is then left to + * a later boot sweep (which preserves a persistent workspace; a manual labelled + * prune reclaims one whose entity is already terminal). */ export async function reclaimDockerSandboxByKey( sandboxKey: string, @@ -834,6 +843,9 @@ export async function reclaimDockerSandboxByKey( * down (or to a wake whose drain timed out). Their own dispose tears them * down; if the process exits first, the boot sweep reclaims them by dead * owner pid. + * + * Best-effort: if the daemon is unreachable each teardown throws and is + * swallowed, leaving those containers for the next process's boot sweep. */ export async function shutdownAllDockerSandboxes(): Promise { await Promise.all( diff --git a/packages/agents-runtime/test/sandbox-lazy.test.ts b/packages/agents-runtime/test/sandbox-lazy.test.ts index b529ba4744..f9c2ea832d 100644 --- a/packages/agents-runtime/test/sandbox-lazy.test.ts +++ b/packages/agents-runtime/test/sandbox-lazy.test.ts @@ -154,6 +154,36 @@ describe(`lazySandbox`, () => { expect(factoryCalls).toBe(0) }) + it(`dispose({reclaim}) during an in-flight factory that then fails runs the reclaim callback`, async () => { + // dispose is called while materialization is still pending; when that + // factory rejects there is nothing live to dispose, so a requested reclaim + // must still wipe the provider-side state via the callback. + let rejectFactory!: (err: unknown) => void + let factoryCalls = 0 + const reclaims: Array = [] + const sandbox = lazySandbox({ + workingDirectory: `/work`, + factory: () => { + factoryCalls += 1 + return new Promise((_, rej) => { + rejectFactory = rej + }) + }, + reclaim: async () => { + reclaims.push(true) + }, + }) + // Kick off materialization (now in-flight), then dispose before it settles. + const pendingUse = sandbox.exec({ command: `x` }) + pendingUse.catch(() => {}) // the use itself rejects; we assert on dispose + const disposed = sandbox.dispose({ reclaim: true }) + rejectFactory(new Error(`daemon hiccup`)) + await disposed + await expect(pendingUse).rejects.toThrow(`daemon hiccup`) + expect(factoryCalls).toBe(1) + expect(reclaims).toEqual([true]) + }) + it(`dispose after use forwards to the inner sandbox (reclaim included)`, async () => { const h = harness() await h.sandbox.exec({ command: `x` }) diff --git a/packages/agents/src/server.ts b/packages/agents/src/server.ts index 9dc7bf66ea..a05c2fab94 100644 --- a/packages/agents/src/server.ts +++ b/packages/agents/src/server.ts @@ -365,9 +365,12 @@ export class BuiltinAgentsServer { }), new Promise((resolve) => setTimeout(resolve, 5_000)), ]) - // Tear down this process's live sandbox containers NOW — their debounced - // idle teardowns die with the process and would leave them running. - // Bounded like the drain above so a hung daemon can't block quit. + // Tear down this process's idle sandbox containers NOW — their debounced + // idle teardowns die with the process and would leave them running. Runs + // AFTER the drain above on purpose: this flush only reclaims idle, + // lease-free containers, so the drained wakes must release their leases + // first. Bounded by its own 5s so a hung daemon can't block quit — worst + // case shutdown is the two bounds back to back. if (this.bootstrap.shutdownSandboxes) { await Promise.race([ this.bootstrap.shutdownSandboxes().catch((err) => { From b57b6fc7d9fc959aa725c15c9e233a80146042bb Mon Sep 17 00:00:00 2001 From: msfstef Date: Tue, 9 Jun 2026 13:02:38 +0300 Subject: [PATCH 4/6] refactor(agents): compact writeOwnerMarker best-effort handling From kevin-dp's review: replace the try/catch wrapper around the best-effort owner-marker write with `.catch(() => {})`. Kept `await` (not a bare `return runOneOff(...).catch(...)`) so the non-void RunOneOffResult doesn't leak into the Promise return type. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/agents-runtime/src/sandbox/docker.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/agents-runtime/src/sandbox/docker.ts b/packages/agents-runtime/src/sandbox/docker.ts index 74ad8732e1..ba52278b15 100644 --- a/packages/agents-runtime/src/sandbox/docker.ts +++ b/packages/agents-runtime/src/sandbox/docker.ts @@ -579,15 +579,13 @@ export async function dockerSandbox( * crash leftover. */ async function writeOwnerMarker(c: DockerodeContainer): Promise { - try { - await runOneOff(c, [ - `sh`, - `-c`, - `echo ${process.pid} > ${OWNER_MARKER_PATH}`, - ]) - } catch { + await runOneOff(c, [ + `sh`, + `-c`, + `echo ${process.pid} > ${OWNER_MARKER_PATH}`, + ]).catch(() => { /* best-effort */ - } + }) } /** From b0feb1f5efd5ea0db3c2842d6385919d1f53c681 Mon Sep 17 00:00:00 2001 From: msfstef Date: Tue, 9 Jun 2026 15:49:30 +0300 Subject: [PATCH 5/6] test(agents): reap docker sandbox test containers, fix conformance flakiness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docker sandbox test files spin up real containers, but only sandbox-docker.test.ts cleaned up after itself. The smoke and conformance suites relied solely on `sandbox.dispose()`, which doesn't remove a container synchronously — it schedules teardown ~2 minutes out via an unref'd timer that dies with the vitest process. So running those suites left RUNNING containers behind that accumulated across runs (3 smoke runs ⇒ 2 live zombies; one conformance run ⇒ 18). Lift the label-scoped sweep into a shared `installDockerSandboxTestCleanup()` helper (beforeAll + afterEach + afterAll) and call it from every describe that creates containers. The sweep filters only on the `electric-test-sandbox` test label, never the production `com.electric.sandbox` label, so a developer's real local sandboxes are untouched; it's self-gating and swallows daemon errors. The main file's duplicated sweep and six inline hooks collapse into one call. Also give the docker conformance provider a 60s timeout (matching the dedicated docker test files) — real container creation outgrows vitest's 5s default, which was flaking those tests independently of the leak. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../test/helpers/docker-sandbox-cleanup.ts | 62 +++++++++++++++++++ .../test/sandbox-conformance.test.ts | 15 ++++- .../test/sandbox-docker-smoke.test.ts | 3 + .../test/sandbox-docker.test.ts | 50 ++------------- 4 files changed, 83 insertions(+), 47 deletions(-) create mode 100644 packages/agents-runtime/test/helpers/docker-sandbox-cleanup.ts diff --git a/packages/agents-runtime/test/helpers/docker-sandbox-cleanup.ts b/packages/agents-runtime/test/helpers/docker-sandbox-cleanup.ts new file mode 100644 index 0000000000..73daf6b110 --- /dev/null +++ b/packages/agents-runtime/test/helpers/docker-sandbox-cleanup.ts @@ -0,0 +1,62 @@ +import { afterAll, afterEach, beforeAll } from 'vitest' +import { __resetPersistentRegistryForTests } from '../../src/sandbox/docker' +import { loadDockerode } from '../../src/sandbox/docker/loader' +import { dockerAvailable, TEST_LABEL } from './docker-probe' + +/** + * Force-remove every container this suite created, identified by the test-only + * label `${TEST_LABEL}=1`. This is the authoritative reaper: it asks the daemon + * by label, so it catches containers the in-process registry no longer tracks — + * ref-leaked by a thrown test, cleared by `__resetPersistentRegistryForTests`, + * or created out-of-band — which `shutdownAllDockerSandboxes()` would miss. + * + * It NEVER filters on the production label `com.electric.sandbox`, so a + * developer's real local sandboxes are untouched. Self-gating and fully + * swallowed so a missing or slow daemon can't redden a run. + */ +export async function sweepTestContainers(): Promise { + if (!dockerAvailable) return + try { + const Docker = await loadDockerode() + const docker = new Docker() + const containers = await docker.listContainers({ + all: true, + filters: { label: [`${TEST_LABEL}=1`] }, + }) + await Promise.all( + containers.map((c) => + docker + .getContainer(c.Id) + .remove({ force: true, v: true }) + .catch(() => {}) + ) + ) + } catch { + /* daemon gone / unreachable — cleanup is best-effort */ + } +} + +/** + * Install container cleanup for a `describe` block that spins up real + * containers. Call it as the first line of the block. + * + * - beforeAll: clear leftovers a previously killed run couldn't (its afterAll + * never ran). + * - afterEach: reset the in-process registry (and its debounced teardown + * timers), then sweep — this bounds leakage to a single container if the run + * is interrupted, and stops containers piling up across a long suite, since + * `dispose()` only *schedules* a removal ~2 minutes out (an unref'd timer + * that dies with the process), so without this every test's container lingers. + * - afterAll: a final sweep so the file leaves nothing behind. + */ +export function installDockerSandboxTestCleanup(): void { + beforeAll(() => sweepTestContainers(), 30_000) + afterEach(async () => { + __resetPersistentRegistryForTests() + await sweepTestContainers() + }, 30_000) + afterAll(async () => { + __resetPersistentRegistryForTests() + await sweepTestContainers() + }, 30_000) +} diff --git a/packages/agents-runtime/test/sandbox-conformance.test.ts b/packages/agents-runtime/test/sandbox-conformance.test.ts index 189c3e4d5a..eca7f7926c 100644 --- a/packages/agents-runtime/test/sandbox-conformance.test.ts +++ b/packages/agents-runtime/test/sandbox-conformance.test.ts @@ -1,7 +1,7 @@ import { mkdtemp, rm, writeFile } from 'node:fs/promises' import { tmpdir } from 'node:os' import { join } from 'node:path' -import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { remoteSandbox } from '../src/sandbox/remote' import { unrestrictedSandbox } from '../src/sandbox/unrestricted' import { dockerSandbox } from '../src/sandbox/docker' @@ -10,6 +10,7 @@ import { KNOWN_ADAPTERS } from '../src/sandbox' import type { Sandbox } from '../src/sandbox/types' import type { RemoteSandboxClient } from '../src/sandbox/remote/types' import { dockerAvailable, TEST_IMAGE, TEST_LABEL } from './helpers/docker-probe' +import { installDockerSandboxTestCleanup } from './helpers/docker-sandbox-cleanup' /** * Cross-provider conformance: a single set of scenarios exercised against @@ -217,6 +218,18 @@ describe(`sandbox conformance`, () => { for (const provider of providers) { const d = provider.enabled ? describe : describe.skip d(provider.name, () => { + // The docker provider creates real containers; reap them per test, and + // give them a real timeout — container creation outgrows the 5s default + // (the dedicated docker test files use 60s). The host-fs and in-memory + // providers create no containers and keep the snappy default. + if (provider.adapter === `docker`) { + installDockerSandboxTestCleanup() + // Bind a longer timeout at collection time (the docker provider runs + // last, so only its tests pick this up); a hook would be too late since + // each test's timeout is fixed when it's registered. + vi.setConfig({ testTimeout: 60_000 }) + } + let cwd: string beforeEach(async () => { diff --git a/packages/agents-runtime/test/sandbox-docker-smoke.test.ts b/packages/agents-runtime/test/sandbox-docker-smoke.test.ts index 945b5cca80..7974e6c358 100644 --- a/packages/agents-runtime/test/sandbox-docker-smoke.test.ts +++ b/packages/agents-runtime/test/sandbox-docker-smoke.test.ts @@ -1,10 +1,13 @@ import { describe, expect, it } from 'vitest' import { dockerSandbox } from '../src/sandbox/docker' import { dockerAvailable, TEST_IMAGE } from './helpers/docker-probe' +import { installDockerSandboxTestCleanup } from './helpers/docker-sandbox-cleanup' const d = dockerAvailable ? describe : describe.skip d(`ad-hoc docker sandbox smoke`, () => { + installDockerSandboxTestCleanup() + it(`exec basic, inspect caps, inspect /etc/passwd vs host, attempt mount`, async () => { const sandbox = await dockerSandbox({ image: TEST_IMAGE, diff --git a/packages/agents-runtime/test/sandbox-docker.test.ts b/packages/agents-runtime/test/sandbox-docker.test.ts index 8f9ff4ceb9..6d1ef744e6 100644 --- a/packages/agents-runtime/test/sandbox-docker.test.ts +++ b/packages/agents-runtime/test/sandbox-docker.test.ts @@ -1,4 +1,4 @@ -import { afterAll, afterEach, beforeAll, describe, expect, it } from 'vitest' +import { describe, expect, it } from 'vitest' import { mkdtemp, rm, symlink, writeFile } from 'node:fs/promises' import { tmpdir } from 'node:os' import { join } from 'node:path' @@ -14,6 +14,7 @@ import { lazySandbox } from '../src/sandbox/lazy' import { loadDockerode } from '../src/sandbox/docker/loader' import { SandboxError } from '../src/sandbox/types' import { dockerAvailable, TEST_IMAGE, TEST_LABEL } from './helpers/docker-probe' +import { installDockerSandboxTestCleanup } from './helpers/docker-sandbox-cleanup' /** * dockerSandbox integration tests. The whole describe block is gated on @@ -29,39 +30,8 @@ if (!dockerAvailable) { const d = dockerAvailable ? describe : describe.skip -async function sweepTestContainers(): Promise { - const Docker = await loadDockerode() - const docker = new Docker() - const containers = await docker.listContainers({ - all: true, - filters: { label: [`${TEST_LABEL}=1`] }, - }) - await Promise.all( - containers.map((c) => - docker - .getContainer(c.Id) - .remove({ force: true, v: true }) - .catch(() => {}) - ) - ) -} - d(`dockerSandbox`, () => { - beforeAll(async () => { - // Best-effort cleanup of leftover containers from previous runs. - await sweepTestContainers() - }, 30_000) - - afterAll(async () => { - await sweepTestContainers() - }, 30_000) - - afterEach(async () => { - // Every container now flows through the registry + debounced teardown; - // clear the in-process bookkeeping (and its timers) between tests. - __resetPersistentRegistryForTests() - await sweepTestContainers() - }, 30_000) + installDockerSandboxTestCleanup() it(`exec roundtrip with stdout / exitCode`, async () => { const sandbox = await dockerSandbox({ @@ -433,19 +403,7 @@ d(`dockerSandbox keyed lifecycle`, () => { // Unique keys per run so reattach targets a clean deterministic name. const KEY = `electric-test-${Date.now()}` - beforeAll(async () => { - await sweepTestContainers() - }, 30_000) - - afterEach(async () => { - __resetPersistentRegistryForTests() - await sweepTestContainers() - }, 30_000) - - afterAll(async () => { - __resetPersistentRegistryForTests() - await sweepTestContainers() - }, 30_000) + installDockerSandboxTestCleanup() const make = ( sandboxKey: string, From 9cc6805d1100081843323f6d4e92670d1bceff7b Mon Sep 17 00:00:00 2001 From: msfstef Date: Tue, 9 Jun 2026 15:49:51 +0300 Subject: [PATCH 6/6] fix(agents): only pull sandbox image when already missing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `pullIfMissing` is documented as pulling "when it's not present locally", but `ensureImage` never checked presence — it called `docker.pull()` on every container create. `docker.pull` round-trips to the registry even for a fully cached digest-pinned image, so creation was needlessly slow and failed whenever the registry was briefly unreachable (an intermittent TLS-handshake timeout to registry-1.docker.io surfaced this). Inspect the image first (a local-only check) and pull only when it's actually absent, matching the documented semantics. Adds `getImage` to the minimal `Dockerode` interface and a unit test (fake daemon) covering present ⇒ skip, absent ⇒ pull, and `pullIfMissing: false` ⇒ neither. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/sandbox-image-pull-if-missing.md | 9 ++++ packages/agents-runtime/src/sandbox/docker.ts | 16 ++++-- .../src/sandbox/docker/loader.ts | 1 + .../test/sandbox-docker-ensure-image.test.ts | 53 +++++++++++++++++++ 4 files changed, 74 insertions(+), 5 deletions(-) create mode 100644 .changeset/sandbox-image-pull-if-missing.md create mode 100644 packages/agents-runtime/test/sandbox-docker-ensure-image.test.ts diff --git a/.changeset/sandbox-image-pull-if-missing.md b/.changeset/sandbox-image-pull-if-missing.md new file mode 100644 index 0000000000..35fc748524 --- /dev/null +++ b/.changeset/sandbox-image-pull-if-missing.md @@ -0,0 +1,9 @@ +--- +'@electric-ax/agents-runtime': patch +--- + +Docker sandbox creation now pulls the image only when it isn't already present +locally, honoring the documented `pullIfMissing` semantics. Previously every +container create called `docker pull`, which round-trips to the registry even +for a fully cached digest-pinned image — making creation needlessly slow and +prone to failing whenever the registry was briefly unreachable. diff --git a/packages/agents-runtime/src/sandbox/docker.ts b/packages/agents-runtime/src/sandbox/docker.ts index ba52278b15..5d25822c37 100644 --- a/packages/agents-runtime/src/sandbox/docker.ts +++ b/packages/agents-runtime/src/sandbox/docker.ts @@ -887,16 +887,22 @@ function resolveImage(opts: DockerSandboxOpts): string { return image } -async function ensureImage( +export async function ensureImage( docker: Dockerode, image: string, opts: DockerSandboxOpts ): Promise { - // Best-effort: try the daemon's inspection by relying on createContainer - // to surface the missing image as a 404. To keep the first-run experience - // smooth on dev machines, we proactively pull when allowed. if (opts.pullIfMissing === false) return - // dockerode's `pull` is idempotent; the daemon dedupes by digest. + // Pull only when the image isn't already on the daemon. `docker.pull` + // always round-trips to the registry — even for a digest that's fully + // cached — so pulling unconditionally made every container create hostage + // to registry reachability (and slower). `inspect` is a local-only check. + try { + await docker.getImage(image).inspect() + return + } catch { + // Absent (or inspect failed) — fall through and pull it. + } const stream = await docker.pull(image) await new Promise((resolve, reject) => { docker.modem.followProgress( diff --git a/packages/agents-runtime/src/sandbox/docker/loader.ts b/packages/agents-runtime/src/sandbox/docker/loader.ts index 9245ff4eac..b56a3c2485 100644 --- a/packages/agents-runtime/src/sandbox/docker/loader.ts +++ b/packages/agents-runtime/src/sandbox/docker/loader.ts @@ -15,6 +15,7 @@ export interface Dockerode { filters?: Record> }): Promise }>> pull(image: string, opts?: unknown): Promise + getImage(name: string): { inspect(): Promise } modem: { followProgress( stream: NodeJS.ReadableStream, diff --git a/packages/agents-runtime/test/sandbox-docker-ensure-image.test.ts b/packages/agents-runtime/test/sandbox-docker-ensure-image.test.ts new file mode 100644 index 0000000000..fa51d90ae4 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-docker-ensure-image.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it, vi } from 'vitest' +import { ensureImage } from '../src/sandbox/docker' + +/** + * Unit test (no daemon) for the image-pull policy. `pullIfMissing` is + * documented as "pulls the image when it's not present locally", and + * `docker.pull` always round-trips to the registry — even for a fully cached + * digest — so pulling unconditionally made every container create hostage to + * registry reachability. These assert the documented presence-based contract. + */ + +function fakeDocker(present: boolean) { + const inspect = vi.fn(async () => { + if (present) return {} as unknown + const err = new Error(`no such image`) as Error & { statusCode?: number } + err.statusCode = 404 + throw err + }) + const pull = vi.fn(async () => ({}) as unknown) + const docker = { + getImage: vi.fn(() => ({ inspect })), + pull, + modem: { + followProgress: (_stream: unknown, onDone: (err: unknown) => void) => + onDone(null), + }, + } + return { docker, inspect, pull } +} + +describe(`ensureImage`, () => { + it(`skips the pull when the image is already present (no registry round-trip)`, async () => { + const { docker, inspect, pull } = fakeDocker(true) + await ensureImage(docker as never, `img@sha256:abc`, {}) + expect(inspect).toHaveBeenCalledOnce() + expect(pull).not.toHaveBeenCalled() + }) + + it(`pulls when the image is absent`, async () => { + const { docker, pull } = fakeDocker(false) + await ensureImage(docker as never, `img@sha256:abc`, {}) + expect(pull).toHaveBeenCalledOnce() + }) + + it(`does nothing when pullIfMissing is false (fail-fast / pre-pulled)`, async () => { + const { docker, inspect, pull } = fakeDocker(true) + await ensureImage(docker as never, `img@sha256:abc`, { + pullIfMissing: false, + }) + expect(inspect).not.toHaveBeenCalled() + expect(pull).not.toHaveBeenCalled() + }) +})