diff --git a/.changeset/sandbox-image-pull-if-missing.md b/.changeset/sandbox-image-pull-if-missing.md new file mode 100644 index 0000000000..35fc748524 --- /dev/null +++ b/.changeset/sandbox-image-pull-if-missing.md @@ -0,0 +1,9 @@ +--- +'@electric-ax/agents-runtime': patch +--- + +Docker sandbox creation now pulls the image only when it isn't already present +locally, honoring the documented `pullIfMissing` semantics. Previously every +container create called `docker pull`, which round-trips to the registry even +for a fully cached digest-pinned image — making creation needlessly slow and +prone to failing whenever the registry was briefly unreachable. diff --git a/.changeset/zombie-sandbox-containers-fix.md b/.changeset/zombie-sandbox-containers-fix.md new file mode 100644 index 0000000000..6839149aaf --- /dev/null +++ b/.changeset/zombie-sandbox-containers-fix.md @@ -0,0 +1,25 @@ +--- +'@electric-ax/agents-runtime': patch +'@electric-ax/agents': patch +--- + +Fix leftover Docker sandbox containers (`electric-sbx-*`) piling up. + +Sandbox containers are meant to be short-lived, but several gaps let them +outlive the work they were created for — opening the desktop app could leave +15+ containers running that were never explicitly started. This closes those +gaps so a container only exists while something is actually using it: + +- **Created only when used.** A container now starts the first time an agent + actually uses its sandbox (runs a command, reads/writes a file), so trivial + wakes (scheduled ticks, bookkeeping) no longer spin one up. +- **Cleaned up on quit.** Shutdown now tears down idle containers immediately + instead of leaving their delayed-teardown timers to die with the process. +- **Leftovers reclaimed at startup.** Containers are tagged with the process + that created them; at startup, those whose owner is gone are reclaimed + (throwaway ones removed, reusable ones stopped so their files survive), while + containers a live process is still using are left untouched. + +Also: a failed container setup step no longer strands an untracked container, +and all sandboxes are grouped under one `electric-sandboxes` entry in Docker +Desktop so they can be stopped/removed together. diff --git a/packages/agents-runtime/src/process-wake.ts b/packages/agents-runtime/src/process-wake.ts index b7cb4b3d17..623f914b20 100644 --- a/packages/agents-runtime/src/process-wake.ts +++ b/packages/agents-runtime/src/process-wake.ts @@ -12,6 +12,7 @@ import { createEntityLogPrefix, runtimeLog } from './log' import { createRuntimeServerClient } from './runtime-server-client' import type { RuntimeServerClient } from './runtime-server-client' import { unrestrictedSandbox } from './sandbox/unrestricted' +import { ensureSandboxMaterialized } from './sandbox/lazy' import { resolveSandboxIdentity } from './sandbox/identity' import { appendPathToUrl } from './url' import { manifestChildKey } from './manifest-helpers' @@ -1322,7 +1323,7 @@ export async function processWake( const requestedInherit = opts?.sandbox === `inherit` || (typeof opts?.sandbox === `object` && opts.sandbox.inherit === true) - const sandbox = requestedInherit + const childSandbox = requestedInherit ? resolvedSandboxSelection ? { profile: resolvedSandboxSelection.profile, @@ -1336,6 +1337,12 @@ export async function processWake( : opts?.sandbox === `inherit` ? undefined : opts?.sandbox + // An inheriting child only ever ATTACHES by key — make sure the + // owner's (lazily-created) container/workspace actually exists before + // the child can wake, even if this wake never ran a tool itself. + if (requestedInherit && resolvedSandboxSelection && sandbox) { + await ensureSandboxMaterialized(sandbox) + } return serverClient.spawnEntity({ type: childType, id: childId, @@ -1344,7 +1351,7 @@ export async function processWake( initialMessage: opts?.initialMessage, initialMessageType: opts?.initialMessageType, tags: opts?.tags, - sandbox, + sandbox: childSandbox, wake: wakeOpt, }) }, diff --git a/packages/agents-runtime/src/sandbox-docker.ts b/packages/agents-runtime/src/sandbox-docker.ts index c1c39dc6cd..ffb8e7c4bb 100644 --- a/packages/agents-runtime/src/sandbox-docker.ts +++ b/packages/agents-runtime/src/sandbox-docker.ts @@ -9,6 +9,8 @@ export { dockerSandbox, + reclaimDockerSandboxByKey, + shutdownAllDockerSandboxes, sweepOrphanedDockerSandboxes, __resetPersistentRegistryForTests, } from './sandbox/docker' diff --git a/packages/agents-runtime/src/sandbox.ts b/packages/agents-runtime/src/sandbox.ts index 9e1bc7ffe4..29b5839d2a 100644 --- a/packages/agents-runtime/src/sandbox.ts +++ b/packages/agents-runtime/src/sandbox.ts @@ -8,6 +8,8 @@ export type { RemoteProvider, RemoteSandboxOpts } from './sandbox/remote' export type { RemoteSandboxClient } from './sandbox/remote/types' export { isE2BAvailable } from './sandbox/remote/e2b' export { chooseDefaultSandbox } from './sandbox/default' +export { ensureSandboxMaterialized, lazySandbox } from './sandbox/lazy' +export type { LazySandboxOpts } from './sandbox/lazy' export { SandboxError } from './sandbox/types' export type { Sandbox, diff --git a/packages/agents-runtime/src/sandbox/docker.ts b/packages/agents-runtime/src/sandbox/docker.ts index f30bb1c53f..5d25822c37 100644 --- a/packages/agents-runtime/src/sandbox/docker.ts +++ b/packages/agents-runtime/src/sandbox/docker.ts @@ -156,10 +156,35 @@ const SANDBOX_ENTITY_LABEL = `com.electric.sandbox.entity` * key) and only reclaims ephemeral leftovers. */ const SANDBOX_PERSISTENT_LABEL = `com.electric.sandbox.persistent` +/** + * PID of the process that created the container. The boot sweep probes it + * (same host as the daemon's clients) to tell a live sibling's container from + * a crash/quit leftover — the keepalive loop never exits on its own, so a dead + * owner's container stays RUNNING forever without this. + */ +const SANDBOX_OWNER_PID_LABEL = `com.electric.sandbox.owner-pid` +/** + * Compose-style grouping labels. Docker Desktop (and other GUI clients) group + * containers by compose project, so every sandbox shows up under one + * collapsible `electric-sandboxes` entry and can be stopped / deleted together + * — also via `docker compose -p electric-sandboxes down`. + */ +const COMPOSE_PROJECT_LABEL = `com.docker.compose.project` +const COMPOSE_SERVICE_LABEL = `com.docker.compose.service` +const COMPOSE_PROJECT_NAME = `electric-sandboxes` /** Common prefix for every container name this module assigns. */ const NAME_PREFIX = `electric-sbx` +/** + * In-container record of the process that most recently ADOPTED the container + * (reattached by key after its creator exited). Labels are immutable, so the + * creation-time owner pid goes stale on reattach; the boot sweep probes this + * marker before reclaiming a running container whose labelled owner is dead. + * Lives on the /tmp tmpfs, so a stop wipes it along with any stale adoption. + */ +const OWNER_MARKER_PATH = `/tmp/.electric-sbx-owner-pid` + /** Default warm window before an idle container is torn down (stop/remove). */ const DEFAULT_IDLE_GRACE_MS = 2 * 60 * 1000 @@ -209,6 +234,33 @@ function withKeyLock(key: string, fn: () => Promise): Promise { return run } +/** + * Bound on concurrent container CREATIONS (image resolve + create + start + + * init), so a burst of wakes materializing sandboxes at once — e.g. a backlog + * replay when a runner reconnects — queues against the daemon instead of + * stampeding it. Reattaches and execs are not limited; total creations are + * unbounded, only their concurrency. + */ +const MAX_CONCURRENT_CREATIONS = 4 +let creationSlots = MAX_CONCURRENT_CREATIONS +const creationQueue: Array<() => void> = [] +async function withCreationSlot(fn: () => Promise): Promise { + if (creationSlots > 0) { + creationSlots -= 1 + } else { + await new Promise((release) => creationQueue.push(release)) + } + try { + return await fn() + } finally { + // Hand the slot directly to the next waiter (no decrement on their side), + // so a new caller can't race in between release and re-acquire. + const next = creationQueue.shift() + if (next) next() + else creationSlots += 1 + } +} + /** Lowercase, DNS-safe slug from an arbitrary identity string (≤24 chars). */ function slugify(s: string): string { const slug = s @@ -421,6 +473,9 @@ export async function dockerSandbox( [SANDBOX_LABEL]: `1`, [SANDBOX_KEY_LABEL]: sandboxKey, [SANDBOX_PERSISTENT_LABEL]: persistent ? `true` : `false`, + [SANDBOX_OWNER_PID_LABEL]: String(process.pid), + [COMPOSE_PROJECT_LABEL]: COMPOSE_PROJECT_NAME, + [COMPOSE_SERVICE_LABEL]: opts.entityType ?? `sandbox`, ...(opts.entityType ? { [SANDBOX_ENTITY_TYPE_LABEL]: opts.entityType } : {}), @@ -429,39 +484,59 @@ export async function dockerSandbox( } // Create-and-start a fresh container. Image is pulled here only (skipped - // entirely when reattaching to an existing container). - const createStarted = async (): Promise => { - const image = resolveImage(opts) - await ensureImage(docker, image, opts) - const c = await docker.createContainer({ - // Spread (rather than a literal `name:`) so the `name` query param — - // which dockerode accepts but doesn't declare on its create-opts type — - // doesn't trip excess-property checking. - ...{ name: containerName }, - Image: image, - Cmd: [`sh`, `-c`, `while true; do sleep 3600; done`], - WorkingDir: containerCwd, - Env: Object.entries(baseEnv).map(([k, v]) => `${k}=${v}`), - Labels: labels, - ExposedPorts: exposedPorts, - HostConfig, + // entirely when reattaching to an existing container). Creation concurrency + // is bounded process-wide (see withCreationSlot). + const createStarted = (): Promise => + withCreationSlot(async (): Promise => { + const image = resolveImage(opts) + await ensureImage(docker, image, opts) + const c = await docker.createContainer({ + // Spread (rather than a literal `name:`) so the `name` query param — + // which dockerode accepts but doesn't declare on its create-opts type — + // doesn't trip excess-property checking. + ...{ name: containerName }, + Image: image, + Cmd: [`sh`, `-c`, `while true; do sleep 3600; done`], + WorkingDir: containerCwd, + Env: Object.entries(baseEnv).map(([k, v]) => `${k}=${v}`), + Labels: labels, + ExposedPorts: exposedPorts, + HostConfig, + }) + try { + await c.start() + } catch (err) { + await c.remove({ force: true, v: true }).catch(() => {}) + throw new SandboxError( + `runtime`, + `dockerSandbox: container start failed: ${ + err instanceof Error ? err.message : String(err) + }` + ) + } + // Tmpfs on `/work` is empty at start; ensure caller-supplied workingDir + // exists with sensible perms. If this post-start init fails — the exec + // call throws, or the mkdir can't run / exits non-zero (a null exit code + // means the exec never produced one, e.g. it couldn't spawn at all) — + // remove the already-started container before rethrowing: it isn't + // registered yet, so no dispose (and, while running, no sweep) would + // ever reclaim it. + try { + const init = await runOneOff(c, [`mkdir`, `-p`, containerCwd]) + if (init.exitCode !== 0) { + throw new SandboxError( + `runtime`, + `dockerSandbox: post-start init failed: \`mkdir -p ${containerCwd}\` exited ${init.exitCode ?? `without a code`}${ + init.stderr.length > 0 ? `: ${init.stderr.toString().trim()}` : `` + }` + ) + } + } catch (err) { + await c.remove({ force: true, v: true }).catch(() => {}) + throw err + } + return c }) - try { - await c.start() - } catch (err) { - await c.remove({ force: true, v: true }).catch(() => {}) - throw new SandboxError( - `runtime`, - `dockerSandbox: container start failed: ${ - err instanceof Error ? err.message : String(err) - }` - ) - } - // Tmpfs on `/work` is empty at start; ensure caller-supplied workingDir - // exists with sensible perms. - await runOneOff(c, [`mkdir`, `-p`, containerCwd]) - return c - } // Serialize reattach + registration against the debounced idle teardown // (which holds the same per-key lock), so a re-acquire can't race a teardown @@ -496,6 +571,23 @@ export async function dockerSandbox( }) } +/** + * Best-effort: record this process as the container's current owner in the + * in-container marker (see {@link OWNER_MARKER_PATH}). Called on the reattach + * paths — where the immutable creation-time owner-pid label may belong to an + * exited process — so the boot sweep can tell an adopted container from a + * crash leftover. + */ +async function writeOwnerMarker(c: DockerodeContainer): Promise { + await runOneOff(c, [ + `sh`, + `-c`, + `echo ${process.pid} > ${OWNER_MARKER_PATH}`, + ]).catch(() => { + /* best-effort */ + }) +} + /** * Resolve a container by name: reattach to an existing one (starting it if a * persistent container had been stopped) or create it fresh. Handles the race @@ -520,6 +612,7 @@ async function reattachOrCreate( // and exec/fs round-trips work again. await existing.start().catch(() => {}) } + await writeOwnerMarker(existing) return existing } try { @@ -560,34 +653,72 @@ async function reattachExisting( ) } if (!running) await existing.start().catch(() => {}) + await writeOwnerMarker(existing) return existing } function isNameConflict(err: unknown): boolean { - const status = (err as { statusCode?: number; status?: number })?.statusCode - if (status === 409) return true - return /already in use|Conflict/i.test( + // Match the daemon's name-conflict message, not bare HTTP 409 — the daemon + // also answers 409 for unrelated conflicts that can surface from inside + // `createStarted` (e.g. exec on a container that just exited), and treating + // those as a lost create race would "reattach" to a container that is gone. + return /already in use/i.test( err instanceof Error ? err.message : String(err) ) } /** - * One-shot startup cleanup of *ephemeral* sandbox leftovers from a previous - * process (a crash or restart before disposes ran). Call once at runner boot. + * Same-host liveness probe for the pid recorded on a container at creation. + * `kill(pid, 0)` delivers no signal; EPERM means the pid exists but belongs to + * another user — still alive. A reused pid reads as alive (the orphan is then + * left for a later sweep), which errs on the safe side. + */ +function isPidAlive(pid: number): boolean { + try { + process.kill(pid, 0) + return true + } catch (err) { + return (err as NodeJS.ErrnoException).code === `EPERM` + } +} + +/** + * Read the adoption marker (see {@link OWNER_MARKER_PATH}) from a RUNNING + * container. `null` when absent or unreadable — the sweep then falls back to + * the creation-time owner-pid label alone. + */ +async function readOwnerMarker(c: DockerodeContainer): Promise { + try { + const r = await runOneOff(c, [`cat`, OWNER_MARKER_PATH]) + if (r.exitCode !== 0) return null + const pid = Number(r.stdout.toString().trim()) + return Number.isInteger(pid) && pid > 0 ? pid : null + } catch { + return null + } +} + +/** + * One-shot startup cleanup of sandbox leftovers from a previous process (a + * crash, force-quit, or shutdown before disposes ran). Call once at runner + * boot. + * + * The keepalive loop never exits on its own, so a leftover is usually still + * RUNNING — but so is a live sibling runner's container on the same daemon. + * The owner-pid label disambiguates (the sweep runs on the same host as the + * daemon's clients): owner alive ⇒ leave it; owner dead ⇒ check the + * in-container adoption marker (a live process that reattached after the + * creator exited) and, if no adopter is alive either, reclaim — REMOVE an + * ephemeral orphan, STOP a persistent one (its writable layer must survive so + * the next wake can reattach by key). A running container without the label + * (created before it existed) can't be probed and is left for a manual + * labelled prune. * - * Two containers are deliberately left untouched: - * - RUNNING containers — they may belong to a live sibling runner (or a - * concurrent test run) sharing this Docker daemon; force-removing those - * would wipe another process's in-use sandbox. Reboot/crash leftovers are - * `Exited` once the daemon restarts, so the common case is still reclaimed; - * a still-running ephemeral orphan is left for a manual labelled prune - * rather than risk a live peer. - * - PERSISTENT containers — `persistent: true` exists precisely so a restarted - * process can reattach to the warm workspace by key, so they must survive a - * boot (only ephemeral leftovers are reclaimed here; a manual labelled prune - * reclaims truly-abandoned persistent ones). + * Non-running containers: ephemeral leftovers are removed; persistent ones are + * preserved — `persistent: true` exists precisely so a restarted process can + * reattach to the warm workspace by key. * - * Returns the names removed. + * Returns the names reclaimed (removed or stopped). */ export async function sweepOrphanedDockerSandboxes(opts?: { dockerSocket?: string @@ -612,21 +743,128 @@ export async function sweepOrphanedDockerSandboxes(opts?: { return [] } - const removed: Array = [] - for (const c of listed) { - const name = c.Names?.[0]?.replace(/^\//, ``) ?? c.Id - // Never touch a running container (possibly a live peer) or a persistent - // one (meant to be reattached by key). See the doc comment above. - if (c.State === `running`) continue - if (c.Labels?.[SANDBOX_PERSISTENT_LABEL] === `true`) continue + // Probe + reclaim every leftover concurrently. The running-orphan path runs an + // exec round-trip (readOwnerMarker) and the whole sweep is awaited at boot, so + // a sequential loop would serialize one exec per leftover — and the motivating + // case is "15+ leftovers". Each container is independent (distinct name, no + // shared lock), so fanning out keeps boot latency flat as leftovers pile up. + const reclaimed = await Promise.all( + listed.map(async (c): Promise => { + const name = c.Names?.[0]?.replace(/^\//, ``) ?? c.Id + // Held by this very process right now — its lifecycle is already managed. + if (sandboxContainers.has(name)) return null + const persistent = c.Labels?.[SANDBOX_PERSISTENT_LABEL] === `true` + try { + if (c.State === `running`) { + const ownerPid = Number(c.Labels?.[SANDBOX_OWNER_PID_LABEL]) + // No (or unparsable) owner label, or the owner is alive ⇒ possibly a + // live peer's in-use sandbox — never touch it. + if (!Number.isInteger(ownerPid) || isPidAlive(ownerPid)) return null + // The creator is gone, but a live sibling process may have since + // ADOPTED the container (reattached by key) — labels are immutable, so + // adoption is recorded in an in-container marker. Probe it before + // reclaiming. + const adopter = await readOwnerMarker(docker.getContainer(c.Id)) + if (adopter !== null && isPidAlive(adopter)) return null + if (persistent) { + // `t: 0` → straight to SIGKILL (PID 1 ignores SIGTERM); the writable + // layer survives for a later reattach by key. + await docker.getContainer(c.Id).stop({ t: 0 }) + } else { + await docker.getContainer(c.Id).remove({ force: true, v: true }) + } + return name + } + if (persistent) return null + await docker.getContainer(c.Id).remove({ force: true, v: true }) + return name + } catch { + /* already gone */ + return null + } + }) + ) + return reclaimed.filter((n): n is string => n !== null) +} + +/** + * Wipe the container for a resolved sandbox key WITHOUT creating one — the + * reclaim path for a lazy sandbox that was never materialized: a terminal + * entity's persistent workspace from an EARLIER wake must not survive just + * because the final wake never used its sandbox. If sibling leases currently + * hold the container, it is only MARKED reclaimed — the last lease draining + * wipes it (matching an owner's `dispose({ reclaim: true })`). A container + * that exists with no registry entry (left from a previous process) is removed + * directly; absent containers are a no-op. Best-effort: an unreachable daemon + * (like any failed remove) is swallowed — a surviving container is then left to + * a later boot sweep (which preserves a persistent workspace; a manual labelled + * prune reclaims one whose entity is already terminal). + */ +export async function reclaimDockerSandboxByKey( + sandboxKey: string, + opts?: { dockerSocket?: string } +): Promise { + const name = containerNameForKey(sandboxKey) + const Docker = await loadDockerode() + const docker: Dockerode = opts?.dockerSocket + ? new Docker({ socketPath: opts.dockerSocket }) + : new Docker() + await withKeyLock(name, async () => { + const entry = sandboxContainers.get(name) + if (entry && entry.refs > 0) { + entry.reclaim = true + return + } + if (entry?.idleTimer) clearTimeout(entry.idleTimer) + sandboxContainers.delete(name) try { - await docker.getContainer(c.Id).remove({ force: true, v: true }) - removed.push(name) + await docker.getContainer(name).remove({ force: true, v: true }) } catch { - /* already gone */ + /* nothing to reclaim */ } - } - return removed + }) +} + +/** + * Immediate, lock-serialized teardown of every IDLE container this process + * tracks: pending debounced timers are cancelled and each entry's teardown + * action runs NOW — REMOVE for ephemeral/reclaimed containers, STOP for + * persistent ones (the workspace survives for the next process to reattach by + * key). + * + * Call on runtime shutdown. The debounced idle timers are unref'd and die with + * the process; without this flush every recently-used container is left + * RUNNING for the boot sweep of some future process to find. + * + * Entries with live leases (refs > 0) are left alone: the registry is shared + * process-wide, so they may belong to a sibling runtime that is not shutting + * down (or to a wake whose drain timed out). Their own dispose tears them + * down; if the process exits first, the boot sweep reclaims them by dead + * owner pid. + * + * Best-effort: if the daemon is unreachable each teardown throws and is + * swallowed, leaving those containers for the next process's boot sweep. + */ +export async function shutdownAllDockerSandboxes(): Promise { + await Promise.all( + [...sandboxContainers.keys()].map((name) => + withKeyLock(name, async () => { + const entry = sandboxContainers.get(name) + if (!entry || entry.refs > 0) return + if (entry.idleTimer) clearTimeout(entry.idleTimer) + try { + if (sandboxWipesOnDispose(entry.reclaim ?? false, entry.persistent)) { + await entry.container.remove({ force: true, v: true }) + } else { + await entry.container.stop({ t: 0 }) + } + } catch { + /* already stopped / removed / gone */ + } + sandboxContainers.delete(name) + }) + ) + ) } /** Test-only: drop the in-process container registry bookkeeping. */ @@ -649,16 +887,22 @@ function resolveImage(opts: DockerSandboxOpts): string { return image } -async function ensureImage( +export async function ensureImage( docker: Dockerode, image: string, opts: DockerSandboxOpts ): Promise { - // Best-effort: try the daemon's inspection by relying on createContainer - // to surface the missing image as a 404. To keep the first-run experience - // smooth on dev machines, we proactively pull when allowed. if (opts.pullIfMissing === false) return - // dockerode's `pull` is idempotent; the daemon dedupes by digest. + // Pull only when the image isn't already on the daemon. `docker.pull` + // always round-trips to the registry — even for a digest that's fully + // cached — so pulling unconditionally made every container create hostage + // to registry reachability (and slower). `inspect` is a local-only check. + try { + await docker.getImage(image).inspect() + return + } catch { + // Absent (or inspect failed) — fall through and pull it. + } const stream = await docker.pull(image) await new Promise((resolve, reject) => { docker.modem.followProgress( diff --git a/packages/agents-runtime/src/sandbox/docker/loader.ts b/packages/agents-runtime/src/sandbox/docker/loader.ts index 9245ff4eac..b56a3c2485 100644 --- a/packages/agents-runtime/src/sandbox/docker/loader.ts +++ b/packages/agents-runtime/src/sandbox/docker/loader.ts @@ -15,6 +15,7 @@ export interface Dockerode { filters?: Record> }): Promise }>> pull(image: string, opts?: unknown): Promise + getImage(name: string): { inspect(): Promise } modem: { followProgress( stream: NodeJS.ReadableStream, diff --git a/packages/agents-runtime/src/sandbox/lazy.ts b/packages/agents-runtime/src/sandbox/lazy.ts new file mode 100644 index 0000000000..06b6d7c561 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/lazy.ts @@ -0,0 +1,195 @@ +/** + * Lazy sandbox wrapper — defers the (potentially expensive) provider factory + * until the sandbox is actually USED. A wake that never runs a tool — a cron + * tick deciding there is nothing to do, child-status bookkeeping, a drained + * inbox — then never pays for, or leaks, a provider-side sandbox (e.g. a + * Docker container). Profile authors wrap their factory: + * + * factory: async (params) => + * lazySandbox({ + * workingDirectory: `/work`, + * factory: () => dockerSandbox({ ... }), + * reclaim: params.owner + * ? () => reclaimDockerSandboxByKey(params.sandboxKey) + * : undefined, + * }) + * + * Materialization is single-flight and retried on failure. Disposing a + * never-used wrapper is free — except `dispose({ reclaim: true })`, which runs + * the provider-supplied `reclaim` callback so a terminal entity's persistent + * workspace from an EARLIER wake is still wiped without creating a fresh + * sandbox just to remove it. + */ + +import { SandboxError } from './types' +import type { + DirEntry, + FileStat, + Sandbox, + SandboxExecOpts, + SandboxExecResult, +} from './types' + +/** + * Cross-module-instance brand for {@link ensureSandboxMaterialized}: a global + * symbol survives duplicate copies of this module in a bundle, where an + * `instanceof` check would not. + */ +const MATERIALIZE = Symbol.for(`electric.sandbox.lazy.materialize`) + +export interface LazySandboxOpts { + /** Reported as `Sandbox.name` until the inner sandbox materializes. */ + name?: string + /** + * The inner sandbox's primary writable root. MUST match what `factory` + * produces: tools resolve relative paths against it synchronously, before + * the first (materializing) sandbox call could run. + */ + workingDirectory: string + /** Builds the inner sandbox on first use. */ + factory: () => Promise + /** + * Wipe the provider-side sandbox WITHOUT materializing. Invoked when this + * lease is disposed with `reclaim: true` before the sandbox was ever used — + * an earlier wake's persistent workspace may still exist under the same key + * and must not outlive its terminal entity. Only pass this for OWNER leases: + * the wrapper itself cannot tell an owner from an attacher. + */ + reclaim?: () => Promise +} + +class LazySandbox implements Sandbox { + private readonly opts: LazySandboxOpts + private inner: Sandbox | null = null + private materializing: Promise | null = null + private disposed = false + + constructor(opts: LazySandboxOpts) { + this.opts = opts + } + + get name(): string { + return this.inner?.name ?? this.opts.name ?? `lazy` + } + + get workingDirectory(): string { + return this.opts.workingDirectory + } + + /** Single-flight; a failed factory clears so the next use retries. */ + private materialize(): Promise { + if (this.inner) return Promise.resolve(this.inner) + this.materializing ??= this.opts.factory().then( + (sandbox) => { + this.inner = sandbox + this.materializing = null + return sandbox + }, + (err: unknown) => { + this.materializing = null + throw err + } + ) + return this.materializing + } + + /** @internal see {@link ensureSandboxMaterialized} */ + [MATERIALIZE](): Promise { + this.assertLive() + return this.materialize() + } + + private assertLive(): void { + if (this.disposed) { + throw new SandboxError( + `runtime`, + `lazySandbox: this sandbox lease has been disposed.` + ) + } + } + + async exec(opts: SandboxExecOpts): Promise { + this.assertLive() + return (await this.materialize()).exec(opts) + } + + async readFile(path: string): Promise { + this.assertLive() + return (await this.materialize()).readFile(path) + } + + async writeFile(path: string, content: Buffer | string): Promise { + this.assertLive() + return (await this.materialize()).writeFile(path, content) + } + + async mkdir(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + return (await this.materialize()).mkdir(path, opts) + } + + async readdir(path: string): Promise> { + this.assertLive() + return (await this.materialize()).readdir(path) + } + + async exists(path: string): Promise { + this.assertLive() + return (await this.materialize()).exists(path) + } + + async remove(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + return (await this.materialize()).remove(path, opts) + } + + async stat(path: string): Promise { + this.assertLive() + return (await this.materialize()).stat(path) + } + + async fetch(input: string | URL, init?: RequestInit): Promise { + this.assertLive() + return (await this.materialize()).fetch(input, init) + } + + async dispose(opts?: { reclaim?: boolean }): Promise { + if (this.disposed) return + this.disposed = true + if (this.inner || this.materializing) { + let inner: Sandbox | null = null + try { + inner = await this.materialize() + } catch { + // The in-flight factory failed — nothing live to dispose. Fall + // through: a requested reclaim still wipes the provider-side state. + } + if (inner) { + // The inner dispose owns the reclaim semantics from here. + await inner.dispose(opts) + return + } + } + if (opts?.reclaim && this.opts.reclaim) await this.opts.reclaim() + } +} + +/** Wrap a provider factory so the sandbox materializes on first use. */ +export function lazySandbox(opts: LazySandboxOpts): Sandbox { + return new LazySandbox(opts) +} + +/** + * Force a lazy sandbox to materialize (no-op for any other sandbox). Used by + * the spawn-`inherit` path: a child ATTACHES to this wake's sandbox by key and + * never creates, so the owner's container/workspace must actually exist before + * the child can wake — even if the owner itself never ran a tool. + */ +export async function ensureSandboxMaterialized( + sandbox: Sandbox +): Promise { + const materialize = ( + sandbox as Sandbox & { [MATERIALIZE]?: () => Promise } + )[MATERIALIZE] + if (typeof materialize === `function`) await materialize.call(sandbox) +} diff --git a/packages/agents-runtime/test/helpers/docker-sandbox-cleanup.ts b/packages/agents-runtime/test/helpers/docker-sandbox-cleanup.ts new file mode 100644 index 0000000000..73daf6b110 --- /dev/null +++ b/packages/agents-runtime/test/helpers/docker-sandbox-cleanup.ts @@ -0,0 +1,62 @@ +import { afterAll, afterEach, beforeAll } from 'vitest' +import { __resetPersistentRegistryForTests } from '../../src/sandbox/docker' +import { loadDockerode } from '../../src/sandbox/docker/loader' +import { dockerAvailable, TEST_LABEL } from './docker-probe' + +/** + * Force-remove every container this suite created, identified by the test-only + * label `${TEST_LABEL}=1`. This is the authoritative reaper: it asks the daemon + * by label, so it catches containers the in-process registry no longer tracks — + * ref-leaked by a thrown test, cleared by `__resetPersistentRegistryForTests`, + * or created out-of-band — which `shutdownAllDockerSandboxes()` would miss. + * + * It NEVER filters on the production label `com.electric.sandbox`, so a + * developer's real local sandboxes are untouched. Self-gating and fully + * swallowed so a missing or slow daemon can't redden a run. + */ +export async function sweepTestContainers(): Promise { + if (!dockerAvailable) return + try { + const Docker = await loadDockerode() + const docker = new Docker() + const containers = await docker.listContainers({ + all: true, + filters: { label: [`${TEST_LABEL}=1`] }, + }) + await Promise.all( + containers.map((c) => + docker + .getContainer(c.Id) + .remove({ force: true, v: true }) + .catch(() => {}) + ) + ) + } catch { + /* daemon gone / unreachable — cleanup is best-effort */ + } +} + +/** + * Install container cleanup for a `describe` block that spins up real + * containers. Call it as the first line of the block. + * + * - beforeAll: clear leftovers a previously killed run couldn't (its afterAll + * never ran). + * - afterEach: reset the in-process registry (and its debounced teardown + * timers), then sweep — this bounds leakage to a single container if the run + * is interrupted, and stops containers piling up across a long suite, since + * `dispose()` only *schedules* a removal ~2 minutes out (an unref'd timer + * that dies with the process), so without this every test's container lingers. + * - afterAll: a final sweep so the file leaves nothing behind. + */ +export function installDockerSandboxTestCleanup(): void { + beforeAll(() => sweepTestContainers(), 30_000) + afterEach(async () => { + __resetPersistentRegistryForTests() + await sweepTestContainers() + }, 30_000) + afterAll(async () => { + __resetPersistentRegistryForTests() + await sweepTestContainers() + }, 30_000) +} diff --git a/packages/agents-runtime/test/sandbox-conformance.test.ts b/packages/agents-runtime/test/sandbox-conformance.test.ts index 189c3e4d5a..eca7f7926c 100644 --- a/packages/agents-runtime/test/sandbox-conformance.test.ts +++ b/packages/agents-runtime/test/sandbox-conformance.test.ts @@ -1,7 +1,7 @@ import { mkdtemp, rm, writeFile } from 'node:fs/promises' import { tmpdir } from 'node:os' import { join } from 'node:path' -import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { remoteSandbox } from '../src/sandbox/remote' import { unrestrictedSandbox } from '../src/sandbox/unrestricted' import { dockerSandbox } from '../src/sandbox/docker' @@ -10,6 +10,7 @@ import { KNOWN_ADAPTERS } from '../src/sandbox' import type { Sandbox } from '../src/sandbox/types' import type { RemoteSandboxClient } from '../src/sandbox/remote/types' import { dockerAvailable, TEST_IMAGE, TEST_LABEL } from './helpers/docker-probe' +import { installDockerSandboxTestCleanup } from './helpers/docker-sandbox-cleanup' /** * Cross-provider conformance: a single set of scenarios exercised against @@ -217,6 +218,18 @@ describe(`sandbox conformance`, () => { for (const provider of providers) { const d = provider.enabled ? describe : describe.skip d(provider.name, () => { + // The docker provider creates real containers; reap them per test, and + // give them a real timeout — container creation outgrows the 5s default + // (the dedicated docker test files use 60s). The host-fs and in-memory + // providers create no containers and keep the snappy default. + if (provider.adapter === `docker`) { + installDockerSandboxTestCleanup() + // Bind a longer timeout at collection time (the docker provider runs + // last, so only its tests pick this up); a hook would be too late since + // each test's timeout is fixed when it's registered. + vi.setConfig({ testTimeout: 60_000 }) + } + let cwd: string beforeEach(async () => { diff --git a/packages/agents-runtime/test/sandbox-docker-ensure-image.test.ts b/packages/agents-runtime/test/sandbox-docker-ensure-image.test.ts new file mode 100644 index 0000000000..fa51d90ae4 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-docker-ensure-image.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it, vi } from 'vitest' +import { ensureImage } from '../src/sandbox/docker' + +/** + * Unit test (no daemon) for the image-pull policy. `pullIfMissing` is + * documented as "pulls the image when it's not present locally", and + * `docker.pull` always round-trips to the registry — even for a fully cached + * digest — so pulling unconditionally made every container create hostage to + * registry reachability. These assert the documented presence-based contract. + */ + +function fakeDocker(present: boolean) { + const inspect = vi.fn(async () => { + if (present) return {} as unknown + const err = new Error(`no such image`) as Error & { statusCode?: number } + err.statusCode = 404 + throw err + }) + const pull = vi.fn(async () => ({}) as unknown) + const docker = { + getImage: vi.fn(() => ({ inspect })), + pull, + modem: { + followProgress: (_stream: unknown, onDone: (err: unknown) => void) => + onDone(null), + }, + } + return { docker, inspect, pull } +} + +describe(`ensureImage`, () => { + it(`skips the pull when the image is already present (no registry round-trip)`, async () => { + const { docker, inspect, pull } = fakeDocker(true) + await ensureImage(docker as never, `img@sha256:abc`, {}) + expect(inspect).toHaveBeenCalledOnce() + expect(pull).not.toHaveBeenCalled() + }) + + it(`pulls when the image is absent`, async () => { + const { docker, pull } = fakeDocker(false) + await ensureImage(docker as never, `img@sha256:abc`, {}) + expect(pull).toHaveBeenCalledOnce() + }) + + it(`does nothing when pullIfMissing is false (fail-fast / pre-pulled)`, async () => { + const { docker, inspect, pull } = fakeDocker(true) + await ensureImage(docker as never, `img@sha256:abc`, { + pullIfMissing: false, + }) + expect(inspect).not.toHaveBeenCalled() + expect(pull).not.toHaveBeenCalled() + }) +}) diff --git a/packages/agents-runtime/test/sandbox-docker-smoke.test.ts b/packages/agents-runtime/test/sandbox-docker-smoke.test.ts index 945b5cca80..7974e6c358 100644 --- a/packages/agents-runtime/test/sandbox-docker-smoke.test.ts +++ b/packages/agents-runtime/test/sandbox-docker-smoke.test.ts @@ -1,10 +1,13 @@ import { describe, expect, it } from 'vitest' import { dockerSandbox } from '../src/sandbox/docker' import { dockerAvailable, TEST_IMAGE } from './helpers/docker-probe' +import { installDockerSandboxTestCleanup } from './helpers/docker-sandbox-cleanup' const d = dockerAvailable ? describe : describe.skip d(`ad-hoc docker sandbox smoke`, () => { + installDockerSandboxTestCleanup() + it(`exec basic, inspect caps, inspect /etc/passwd vs host, attempt mount`, async () => { const sandbox = await dockerSandbox({ image: TEST_IMAGE, diff --git a/packages/agents-runtime/test/sandbox-docker.test.ts b/packages/agents-runtime/test/sandbox-docker.test.ts index e62df96825..6d1ef744e6 100644 --- a/packages/agents-runtime/test/sandbox-docker.test.ts +++ b/packages/agents-runtime/test/sandbox-docker.test.ts @@ -1,15 +1,20 @@ -import { afterAll, afterEach, beforeAll, describe, expect, it } from 'vitest' +import { describe, expect, it } from 'vitest' import { mkdtemp, rm, symlink, writeFile } from 'node:fs/promises' import { tmpdir } from 'node:os' import { join } from 'node:path' +import { spawnSync } from 'node:child_process' import { __resetPersistentRegistryForTests, dockerSandbox, + reclaimDockerSandboxByKey, + shutdownAllDockerSandboxes, sweepOrphanedDockerSandboxes, } from '../src/sandbox/docker' +import { lazySandbox } from '../src/sandbox/lazy' import { loadDockerode } from '../src/sandbox/docker/loader' import { SandboxError } from '../src/sandbox/types' import { dockerAvailable, TEST_IMAGE, TEST_LABEL } from './helpers/docker-probe' +import { installDockerSandboxTestCleanup } from './helpers/docker-sandbox-cleanup' /** * dockerSandbox integration tests. The whole describe block is gated on @@ -25,39 +30,8 @@ if (!dockerAvailable) { const d = dockerAvailable ? describe : describe.skip -async function sweepTestContainers(): Promise { - const Docker = await loadDockerode() - const docker = new Docker() - const containers = await docker.listContainers({ - all: true, - filters: { label: [`${TEST_LABEL}=1`] }, - }) - await Promise.all( - containers.map((c) => - docker - .getContainer(c.Id) - .remove({ force: true, v: true }) - .catch(() => {}) - ) - ) -} - d(`dockerSandbox`, () => { - beforeAll(async () => { - // Best-effort cleanup of leftover containers from previous runs. - await sweepTestContainers() - }, 30_000) - - afterAll(async () => { - await sweepTestContainers() - }, 30_000) - - afterEach(async () => { - // Every container now flows through the registry + debounced teardown; - // clear the in-process bookkeeping (and its timers) between tests. - __resetPersistentRegistryForTests() - await sweepTestContainers() - }, 30_000) + installDockerSandboxTestCleanup() it(`exec roundtrip with stdout / exitCode`, async () => { const sandbox = await dockerSandbox({ @@ -429,19 +403,7 @@ d(`dockerSandbox keyed lifecycle`, () => { // Unique keys per run so reattach targets a clean deterministic name. const KEY = `electric-test-${Date.now()}` - beforeAll(async () => { - await sweepTestContainers() - }, 30_000) - - afterEach(async () => { - __resetPersistentRegistryForTests() - await sweepTestContainers() - }, 30_000) - - afterAll(async () => { - __resetPersistentRegistryForTests() - await sweepTestContainers() - }, 30_000) + installDockerSandboxTestCleanup() const make = ( sandboxKey: string, @@ -473,6 +435,7 @@ d(`dockerSandbox keyed lifecycle`, () => { const info = await docker.getContainer(list[0].Id).inspect() return info.State.Running ? `running` : `stopped` } catch (error) { + // Removed between list and inspect (e.g. a reclaim in flight). if ( typeof error === `object` && error !== null && @@ -542,6 +505,12 @@ d(`dockerSandbox keyed lifecycle`, () => { // Per-entity identity lives in labels, since the shared name can't. expect(labels[`com.electric.sandbox.entity-type`]).toBe(`horton`) expect(labels[`com.electric.sandbox.entity`]).toBe(`/horton/abc123/main`) + // Owning process, for the boot sweep's same-host liveness probe. + expect(labels[`com.electric.sandbox.owner-pid`]).toBe(String(process.pid)) + // Compose-style grouping: docker GUIs collapse all sandboxes under one + // project, and `docker compose -p electric-sandboxes down` clears them. + expect(labels[`com.docker.compose.project`]).toBe(`electric-sandboxes`) + expect(labels[`com.docker.compose.service`]).toBe(`horton`) } finally { await sandbox.dispose() } @@ -755,9 +724,10 @@ d(`dockerSandbox keyed lifecycle`, () => { expect(await keyState(key)).toBe(`absent`) }, 90_000) - it(`sweepOrphanedDockerSandboxes leaves a *running* container untouched`, async () => { - // A running container may belong to a live sibling process sharing this - // daemon — force-removing it would wipe a peer's in-use sandbox. + it(`sweepOrphanedDockerSandboxes leaves a *running* container with a live owner untouched`, async () => { + // A running container whose owner-pid process (this test process) is alive + // belongs to a live sibling — force-removing it would wipe an in-use + // sandbox. const key = `${KEY}-orphan-running` await make(key, 60_000, /* persistent */ false) __resetPersistentRegistryForTests() @@ -779,4 +749,189 @@ d(`dockerSandbox keyed lifecycle`, () => { await sweepOrphanedDockerSandboxes() expect(await keyState(key)).toBe(`stopped`) }, 90_000) + + // A pid that is guaranteed dead: spawn a no-op child and reuse its pid after + // it has exited (synchronously, so it is reaped before we return). + const deadPid = (): number => { + const child = spawnSync(process.execPath, [`-e`, ``]) + if (child.pid === undefined) throw new Error(`spawnSync returned no pid`) + return child.pid + } + + // Create a container whose owner-pid label points at a dead process — the + // shape a crashed/quit runner leaves behind (its keepalive loop never exits + // on its own, so the container is still RUNNING). + const makeCrashOrphan = (sandboxKey: string, persistent: boolean) => + dockerSandbox({ + image: TEST_IMAGE, + labels: { + [TEST_LABEL]: `1`, + 'com.electric.sandbox.owner-pid': String(deadPid()), + }, + sandboxKey, + persistent, + sharedIdleGraceMs: 600_000, + }) + + it(`sweepOrphanedDockerSandboxes REMOVES a running *ephemeral* orphan whose owner died`, async () => { + const key = `${KEY}-crash-eph` + await makeCrashOrphan(key, /* persistent */ false) + __resetPersistentRegistryForTests() // the owning process is gone + expect(await keyState(key)).toBe(`running`) + + const reclaimed = await sweepOrphanedDockerSandboxes() + expect(reclaimed.length).toBeGreaterThanOrEqual(1) + expect(await keyState(key)).toBe(`absent`) + }, 90_000) + + it(`sweepOrphanedDockerSandboxes STOPS a running *persistent* orphan whose owner died`, async () => { + // Persistent workspaces survive (the next wake reattaches by key), but a + // dead owner can't stop the keepalive — the sweep does it instead. + const key = `${KEY}-crash-persist` + await makeCrashOrphan(key, /* persistent */ true) + __resetPersistentRegistryForTests() + expect(await keyState(key)).toBe(`running`) + + await sweepOrphanedDockerSandboxes() + expect(await keyState(key)).toBe(`stopped`) + }, 90_000) + + it(`sweepOrphanedDockerSandboxes spares a running orphan ADOPTED by a live process`, async () => { + // The owner-pid label is immutable, so it goes stale when a live process + // reattaches a dead creator's container — adoption is recorded in an + // in-container marker that the sweep probes before reclaiming. + const key = `${KEY}-adopted` + await makeCrashOrphan(key, /* persistent */ true) + __resetPersistentRegistryForTests() // the creator is gone… + await make(key, 600_000, /* persistent */ true) // …a live process adopts + __resetPersistentRegistryForTests() // bypass the in-registry skip + expect(await keyState(key)).toBe(`running`) + + await sweepOrphanedDockerSandboxes() + expect(await keyState(key)).toBe(`running`) + }, 90_000) + + it(`sweepOrphanedDockerSandboxes leaves a running container with no owner-pid label untouched`, async () => { + // Containers created before the owner-pid label existed can't be liveness- + // probed — fail safe and leave them for a manual labelled prune. + const Docker = await loadDockerode() + const docker = new Docker() + const c = await docker.createContainer({ + ...{ name: `electric-sbx-legacy-nolabel-test` }, + Image: TEST_IMAGE, + Cmd: [`sh`, `-c`, `while true; do sleep 3600; done`], + Labels: { + 'com.electric.sandbox': `1`, + 'com.electric.sandbox.persistent': `false`, + [TEST_LABEL]: `1`, + }, + HostConfig: {}, + }) + await c.start() + await sweepOrphanedDockerSandboxes() + const info = await c.inspect() + expect(info.State.Running).toBe(true) + }, 90_000) + + it(`removes the container when post-start init fails (no unregistered zombie)`, async () => { + // pidsLimit:1 — PID 1's keepalive holds the only slot, so the post-start + // `mkdir -p` exec cannot run and creation fails AFTER the container + // started. The failed create must remove it, not leave it behind running + // and unregistered (invisible to dispose and, while running, to the sweep). + const key = `${KEY}-init-fail` + await expect( + dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + sandboxKey: key, + resources: { pidsLimit: 1 }, + }) + ).rejects.toThrow() + expect(await keyState(key)).toBe(`absent`) + }, 90_000) + + it(`shutdownAllDockerSandboxes flushes pending debounced teardowns immediately`, async () => { + // The debounced idle timers are unref'd and die with the process — on + // shutdown the pending action must run NOW: STOP persistent, REMOVE + // ephemeral. + const ephKey = `${KEY}-shutdown-eph` + const perKey = `${KEY}-shutdown-per` + const eph = await make(ephKey, 600_000, /* persistent */ false) + const per = await make(perKey, 600_000, /* persistent */ true) + await eph.dispose() + await per.dispose() + // Teardowns are debounced 10 minutes out — both still running. + expect(await keyState(ephKey)).toBe(`running`) + expect(await keyState(perKey)).toBe(`running`) + + await shutdownAllDockerSandboxes() + expect(await keyState(ephKey)).toBe(`absent`) + expect(await keyState(perKey)).toBe(`stopped`) + }, 90_000) + + it(`lazy docker sandbox creates no container until first use`, async () => { + // The shape bootstrap wires: profile.factory returns a lazy wrapper, so a + // trivial wake (no tool use) never starts a container. + const key = `${KEY}-lazy` + const sb = lazySandbox({ + name: `docker`, + workingDirectory: `/work`, + factory: () => make(key, 60_000), + }) + expect(await keyState(key)).toBe(`absent`) + const r = await sb.exec({ command: `echo materialized` }) + expect(r.stdout.toString().trim()).toBe(`materialized`) + expect(await keyState(key)).toBe(`running`) + await sb.dispose() + }, 90_000) + + it(`lazy docker sandbox disposed without use leaves nothing behind`, async () => { + const key = `${KEY}-lazy-unused` + const sb = lazySandbox({ + name: `docker`, + workingDirectory: `/work`, + factory: () => make(key, 60_000), + }) + await sb.dispose() + expect(await keyState(key)).toBe(`absent`) + }, 60_000) + + it(`reclaimDockerSandboxByKey removes an earlier wake's persistent container without creating one`, async () => { + // The lazy wrapper's reclaim path: a terminal wake that never used its + // sandbox must still wipe the persistent workspace earlier wakes created. + const key = `${KEY}-reclaim-by-key` + const earlier = await make(key, 600_000, /* persistent */ true) + await earlier.dispose() // idle: container stays (debounced stop pending) + expect(await keyState(key)).toBe(`running`) + + await reclaimDockerSandboxByKey(key) + expect(await keyState(key)).toBe(`absent`) + }, 90_000) + + it(`reclaimDockerSandboxByKey is a no-op when nothing exists under the key`, async () => { + await reclaimDockerSandboxByKey(`${KEY}-reclaim-absent`) + expect(await keyState(`${KEY}-reclaim-absent`)).toBe(`absent`) + }, 60_000) + + it(`reclaimDockerSandboxByKey defers to live leases — wiped once the last drains`, async () => { + const key = `${KEY}-reclaim-live` + const lease = await make(key, 600_000, /* persistent */ true) + await reclaimDockerSandboxByKey(key) + // The sibling lease still owns it — not torn down underneath it. + expect(await keyState(key)).toBe(`running`) + await lease.dispose() + // Marked reclaimed: the last lease draining wipes it immediately. + await waitForKeyState(key, `absent`, 5_000) + }, 90_000) + + it(`shutdownAllDockerSandboxes leaves containers with live leases alone`, async () => { + // The registry is process-wide: a still-open lease may belong to a sibling + // runtime that is NOT shutting down. Its own dispose tears it down — and if + // the process exits first, the boot sweep reclaims it by dead owner pid. + const key = `${KEY}-shutdown-live` + const live = await make(key, 600_000, /* persistent */ false) + await shutdownAllDockerSandboxes() + expect(await keyState(key)).toBe(`running`) + await live.dispose() + }, 90_000) }) diff --git a/packages/agents-runtime/test/sandbox-lazy.test.ts b/packages/agents-runtime/test/sandbox-lazy.test.ts new file mode 100644 index 0000000000..f9c2ea832d --- /dev/null +++ b/packages/agents-runtime/test/sandbox-lazy.test.ts @@ -0,0 +1,227 @@ +import { describe, expect, it } from 'vitest' +import { ensureSandboxMaterialized, lazySandbox } from '../src/sandbox/lazy' +import type { Sandbox } from '../src/sandbox/types' + +/** + * lazySandbox defers the provider factory until first use, so trivial wakes + * (cron ticks, bookkeeping) never pay for — or leak — a provider-side sandbox + * (e.g. a Docker container). + */ + +interface FakeCalls { + exec: number + dispose: Array<{ reclaim?: boolean } | undefined> +} + +const makeFake = (calls: FakeCalls): Sandbox => ({ + name: `fake:inner`, + workingDirectory: `/work`, + exec: async () => { + calls.exec += 1 + return { + exitCode: 0, + signal: null, + stdout: Buffer.from(`ok`), + stderr: Buffer.alloc(0), + timedOut: false, + aborted: false, + outputTruncated: false, + } + }, + readFile: async () => Buffer.from(`content`), + writeFile: async () => {}, + mkdir: async () => {}, + readdir: async () => [], + exists: async () => true, + remove: async () => {}, + stat: async () => ({ type: `file`, size: 0, mtimeMs: 0 }), + fetch: async () => new Response(`ok`), + dispose: async (opts) => { + calls.dispose.push(opts) + }, +}) + +const harness = () => { + const calls: FakeCalls = { exec: 0, dispose: [] } + let factoryCalls = 0 + const reclaims: Array = [] + const sandbox = lazySandbox({ + name: `fake`, + workingDirectory: `/work`, + factory: async () => { + factoryCalls += 1 + return makeFake(calls) + }, + reclaim: async () => { + reclaims.push(true) + }, + }) + return { + sandbox, + calls, + reclaims, + factoryCalls: () => factoryCalls, + } +} + +describe(`lazySandbox`, () => { + it(`does not invoke the factory on construction or property access`, () => { + const h = harness() + expect(h.sandbox.name).toBe(`fake`) + expect(h.sandbox.workingDirectory).toBe(`/work`) + expect(h.factoryCalls()).toBe(0) + }) + + it(`materializes on first use and reuses the inner sandbox after`, async () => { + const h = harness() + await h.sandbox.exec({ command: `echo hi` }) + await h.sandbox.exec({ command: `echo again` }) + await h.sandbox.readFile(`/work/x`) + expect(h.factoryCalls()).toBe(1) + expect(h.calls.exec).toBe(2) + // After materialization the inner provider's name shows through. + expect(h.sandbox.name).toBe(`fake:inner`) + }) + + it(`single-flights concurrent first calls`, async () => { + let resolveFactory!: (sb: Sandbox) => void + const calls: FakeCalls = { exec: 0, dispose: [] } + let factoryCalls = 0 + const sandbox = lazySandbox({ + workingDirectory: `/work`, + factory: () => { + factoryCalls += 1 + return new Promise((r) => { + resolveFactory = r + }) + }, + }) + const a = sandbox.exec({ command: `a` }) + const b = sandbox.exec({ command: `b` }) + resolveFactory(makeFake(calls)) + await Promise.all([a, b]) + expect(factoryCalls).toBe(1) + expect(calls.exec).toBe(2) + }) + + it(`a failed factory rejects the call and is retried on the next use`, async () => { + let attempts = 0 + const calls: FakeCalls = { exec: 0, dispose: [] } + const sandbox = lazySandbox({ + workingDirectory: `/work`, + factory: async () => { + attempts += 1 + if (attempts === 1) throw new Error(`daemon hiccup`) + return makeFake(calls) + }, + }) + await expect(sandbox.exec({ command: `x` })).rejects.toThrow( + `daemon hiccup` + ) + await sandbox.exec({ command: `x` }) + expect(attempts).toBe(2) + expect(calls.exec).toBe(1) + }) + + it(`dispose without use never invokes the factory`, async () => { + const h = harness() + await h.sandbox.dispose() + expect(h.factoryCalls()).toBe(0) + expect(h.calls.dispose).toEqual([]) + expect(h.reclaims).toEqual([]) + }) + + it(`dispose({reclaim}) without use runs the reclaim callback instead`, async () => { + // A terminal entity's persistent workspace from an EARLIER wake must not + // survive just because the final wake never touched the sandbox. + const h = harness() + await h.sandbox.dispose({ reclaim: true }) + expect(h.factoryCalls()).toBe(0) + expect(h.reclaims).toEqual([true]) + }) + + it(`dispose({reclaim}) without use is a no-op when no reclaim callback exists`, async () => { + let factoryCalls = 0 + const calls: FakeCalls = { exec: 0, dispose: [] } + const sandbox = lazySandbox({ + workingDirectory: `/work`, + factory: async () => { + factoryCalls += 1 + return makeFake(calls) + }, + }) + await sandbox.dispose({ reclaim: true }) + expect(factoryCalls).toBe(0) + }) + + it(`dispose({reclaim}) during an in-flight factory that then fails runs the reclaim callback`, async () => { + // dispose is called while materialization is still pending; when that + // factory rejects there is nothing live to dispose, so a requested reclaim + // must still wipe the provider-side state via the callback. + let rejectFactory!: (err: unknown) => void + let factoryCalls = 0 + const reclaims: Array = [] + const sandbox = lazySandbox({ + workingDirectory: `/work`, + factory: () => { + factoryCalls += 1 + return new Promise((_, rej) => { + rejectFactory = rej + }) + }, + reclaim: async () => { + reclaims.push(true) + }, + }) + // Kick off materialization (now in-flight), then dispose before it settles. + const pendingUse = sandbox.exec({ command: `x` }) + pendingUse.catch(() => {}) // the use itself rejects; we assert on dispose + const disposed = sandbox.dispose({ reclaim: true }) + rejectFactory(new Error(`daemon hiccup`)) + await disposed + await expect(pendingUse).rejects.toThrow(`daemon hiccup`) + expect(factoryCalls).toBe(1) + expect(reclaims).toEqual([true]) + }) + + it(`dispose after use forwards to the inner sandbox (reclaim included)`, async () => { + const h = harness() + await h.sandbox.exec({ command: `x` }) + await h.sandbox.dispose({ reclaim: true }) + expect(h.calls.dispose).toEqual([{ reclaim: true }]) + // The inner dispose owns reclaim semantics — the callback must not ALSO run. + expect(h.reclaims).toEqual([]) + }) + + it(`operations after dispose are rejected with a runtime SandboxError`, async () => { + const h = harness() + await h.sandbox.dispose() + await expect(h.sandbox.exec({ command: `x` })).rejects.toMatchObject({ + kind: `runtime`, + }) + expect(h.factoryCalls()).toBe(0) + }) + + it(`repeated dispose is a no-op`, async () => { + const h = harness() + await h.sandbox.exec({ command: `x` }) + await h.sandbox.dispose() + await h.sandbox.dispose() + expect(h.calls.dispose).toHaveLength(1) + }) +}) + +describe(`ensureSandboxMaterialized`, () => { + it(`materializes a lazy sandbox without running any operation`, async () => { + const h = harness() + await ensureSandboxMaterialized(h.sandbox) + expect(h.factoryCalls()).toBe(1) + expect(h.calls.exec).toBe(0) + }) + + it(`is a no-op for a non-lazy sandbox`, async () => { + const calls: FakeCalls = { exec: 0, dispose: [] } + await ensureSandboxMaterialized(makeFake(calls)) + expect(calls.exec).toBe(0) + }) +}) diff --git a/packages/agents/src/bootstrap.ts b/packages/agents/src/bootstrap.ts index f4c98eb18b..236224b542 100644 --- a/packages/agents/src/bootstrap.ts +++ b/packages/agents/src/bootstrap.ts @@ -15,6 +15,7 @@ import { import { chooseDefaultSandbox, isE2BAvailable, + lazySandbox, remoteSandbox, type SandboxProfile, } from '@electric-ax/agents-runtime/sandbox' @@ -43,6 +44,13 @@ export interface AgentHandlerResult { registry: EntityRegistry typeNames: Array skillsRegistry: SkillsRegistry | null + /** + * Immediately tears down the idle sandboxes this process created (set when + * a provider with host-local state — docker — is registered). MUST be called + * on shutdown, after wakes drain: the providers' debounced idle teardowns + * die with the process, which would leave containers running. + */ + shutdownSandboxes: (() => Promise) | null } export type BuiltinElectricToolsFactory = NonNullable< @@ -160,7 +168,8 @@ export async function createBuiltinAgentHandler( registerWorker(registry, { workingDirectory: cwd, streamFn, modelCatalog }) typeNames.push(`worker`) - const sandboxProfiles = await buildBuiltinSandboxProfiles(cwd) + const { profiles: sandboxProfiles, shutdownSandboxes } = + await buildBuiltinSandboxProfiles(cwd) const runtime = createRuntimeHandler({ baseUrl: agentServerUrl, @@ -182,6 +191,7 @@ export async function createBuiltinAgentHandler( registry, typeNames, skillsRegistry, + shutdownSandboxes, } } @@ -217,7 +227,7 @@ export const registerAgentTypes = registerBuiltinAgentTypes * Guard so repeated `buildBuiltinSandboxProfiles` calls in one process don't * re-run the boot sweep. */ -let dockerSweptOnBoot = false +let dockerBootSweep: Promise | null = null type SweepOrphanedDockerSandboxes = // eslint-disable-next-line quotes -- type-position import() requires a string literal @@ -225,17 +235,15 @@ type SweepOrphanedDockerSandboxes = function sweepOrphanedDockerSandboxesOnce( sweep: SweepOrphanedDockerSandboxes -): void { - if (dockerSweptOnBoot) return - dockerSweptOnBoot = true - // One-shot, at boot: reclaim any sandbox containers a previous (crashed) - // process left behind. Fire-and-forget — nothing is acquired yet, and within - // a live process shared containers are stopped (not removed) when idle. - sweep() - .then((removed) => { - if (removed.length > 0) { +): Promise { + // One-shot, at boot: reclaim any sandbox containers a previous (crashed or + // quit) process left behind. Awaited by the caller — the sweep can stop + // running orphans, so it must finish before any wake reattaches by key. + dockerBootSweep ??= sweep() + .then((reclaimed) => { + if (reclaimed.length > 0) { serverLog.info( - `[builtin-agents] docker sandbox boot sweep removed ${removed.length} leftover container(s)` + `[builtin-agents] docker sandbox boot sweep reclaimed ${reclaimed.length} leftover container(s)` ) } }) @@ -244,16 +252,23 @@ function sweepOrphanedDockerSandboxesOnce( `[builtin-agents] docker sandbox boot sweep error: ${err instanceof Error ? err.message : String(err)}` ) ) + return dockerBootSweep } /** * Built-in sandbox profiles. `local` is always available. `docker` is * gated on Docker being reachable so a user without Docker installed * sees only what works — the UI never offers a non-functional choice. + * + * Also returns `shutdownSandboxes` when a host-local provider registered: an + * immediate teardown of this process's live containers that the embedding + * server must run on shutdown (the providers' debounced idle teardowns die + * with the process). */ -async function buildBuiltinSandboxProfiles( - workingDirectory: string -): Promise> { +async function buildBuiltinSandboxProfiles(workingDirectory: string): Promise<{ + profiles: Array + shutdownSandboxes: (() => Promise) | null +}> { const profiles: Array = [ { name: `local`, @@ -263,25 +278,32 @@ async function buildBuiltinSandboxProfiles( chooseDefaultSandbox(resolveCwd(args, workingDirectory)), }, ] + let shutdownSandboxes: (() => Promise) | null = null try { const { isDockerAvailable } = await import( `@electric-ax/agents-runtime/sandbox/docker` ) if (await isDockerAvailable()) { - const { dockerSandbox, sweepOrphanedDockerSandboxes } = await import( - `@electric-ax/agents-runtime/sandbox/docker` - ) - // Reclaim containers a previous crashed process may have left running. - // No periodic reaper: within a live process, shared containers stop - // themselves a short while after their last lease disposes (debounced - // idle-stop), and ephemeral ones are killed on dispose. - sweepOrphanedDockerSandboxesOnce(sweepOrphanedDockerSandboxes) + const { + dockerSandbox, + reclaimDockerSandboxByKey, + shutdownAllDockerSandboxes, + sweepOrphanedDockerSandboxes, + } = await import(`@electric-ax/agents-runtime/sandbox/docker`) + // Reclaim containers a previous process (crash, force-quit, or a + // shutdown that raced the debounced teardowns) left behind. No periodic + // reaper: within a live process, shared containers stop themselves a + // short while after their last lease disposes (debounced idle-stop), + // ephemeral ones are killed on dispose, and the rest are flushed by + // `shutdownSandboxes` on clean shutdown. + await sweepOrphanedDockerSandboxesOnce(sweepOrphanedDockerSandboxes) + shutdownSandboxes = shutdownAllDockerSandboxes profiles.push({ name: `docker`, label: `Docker`, description: `Runs in a hardened Docker container: dropped capabilities, no privilege escalation, and CPU/memory/process limits. The chosen working directory is mounted read-write and, by default, network egress is unrestricted (allow-all).`, - factory: ({ + factory: async ({ args, sandboxKey, persistent, @@ -290,25 +312,41 @@ async function buildBuiltinSandboxProfiles( entityUrl, }) => { const cwd = readWorkingDirectoryArg(args) - return dockerSandbox({ - // Default to open egress for local development. Network policy is a - // capability of this profile, not a separate profile: like the - // working directory above, it can be made a per-spawn arg later so a - // single `docker` profile spans permissive → fully-isolated rather - // than splitting into `docker-permissive` / `docker-isolated`. - initialNetworkPolicy: { mode: `allow-all` }, - extraMounts: cwd - ? [{ hostPath: cwd, containerPath: `/work`, readOnly: false }] + // Lazy: the container is only created/started when the wake actually + // USES the sandbox, so trivial wakes (cron ticks, bookkeeping) don't + // spin one up. `/work` is the container cwd dockerSandbox defaults to. + return lazySandbox({ + name: `docker`, + workingDirectory: `/work`, + factory: () => + dockerSandbox({ + // Default to open egress for local development. Network policy + // is a capability of this profile, not a separate profile: like + // the working directory above, it can be made a per-spawn arg + // later so a single `docker` profile spans permissive → + // fully-isolated rather than splitting into `docker-permissive` + // / `docker-isolated`. + initialNetworkPolicy: { mode: `allow-all` }, + extraMounts: cwd + ? [{ hostPath: cwd, containerPath: `/work`, readOnly: false }] + : undefined, + // The container is always named-by-key and reattachable; + // `persistent` chooses idle teardown (stop vs remove) and + // `owner` gates creation (an attacher reattaches only). All + // resolved upstream from config. + sandboxKey, + persistent, + owner, + // Observability: tag the container/labels with who spawned it. + entityType, + entityUrl, + }), + // A terminal entity whose final wake never used the sandbox must + // still wipe the persistent workspace earlier wakes created. Owner + // leases only — an attacher can never reclaim the owner's sandbox. + reclaim: owner + ? () => reclaimDockerSandboxByKey(sandboxKey) : undefined, - // The container is always named-by-key and reattachable; `persistent` - // chooses idle teardown (stop vs remove) and `owner` gates creation - // (an attacher reattaches only). All resolved upstream from config. - sandboxKey, - persistent, - owner, - // Observability: tag the container/labels with who spawned it. - entityType, - entityUrl, }) }, }) @@ -360,7 +398,7 @@ async function buildBuiltinSandboxProfiles( console.log( `[builtin-agents] sandbox profiles advertised: ${profiles.map((p) => p.name).join(`, `)}` ) - return profiles + return { profiles, shutdownSandboxes } } function readWorkingDirectoryArg( diff --git a/packages/agents/src/server.ts b/packages/agents/src/server.ts index 8523b81a58..a05c2fab94 100644 --- a/packages/agents/src/server.ts +++ b/packages/agents/src/server.ts @@ -365,6 +365,23 @@ export class BuiltinAgentsServer { }), new Promise((resolve) => setTimeout(resolve, 5_000)), ]) + // Tear down this process's idle sandbox containers NOW — their debounced + // idle teardowns die with the process and would leave them running. Runs + // AFTER the drain above on purpose: this flush only reclaims idle, + // lease-free containers, so the drained wakes must release their leases + // first. Bounded by its own 5s so a hung daemon can't block quit — worst + // case shutdown is the two bounds back to back. + if (this.bootstrap.shutdownSandboxes) { + await Promise.race([ + this.bootstrap.shutdownSandboxes().catch((err) => { + serverLog.error( + `[builtin-agents] sandbox shutdown failed during shutdown:`, + err + ) + }), + new Promise((resolve) => setTimeout(resolve, 5_000)), + ]) + } this.bootstrap = null }