diff --git a/packages/engine/src/services/parallelCoordinator.test.ts b/packages/engine/src/services/parallelCoordinator.test.ts index 696343618..3cf266c1f 100644 --- a/packages/engine/src/services/parallelCoordinator.test.ts +++ b/packages/engine/src/services/parallelCoordinator.test.ts @@ -1,5 +1,10 @@ import { describe, it, expect } from "vitest"; -import { calculateOptimalWorkers, distributeFrames } from "./parallelCoordinator.js"; +import { + calculateOptimalWorkers, + distributeFrames, + shouldVerifyWorkerGpu, +} from "./parallelCoordinator.js"; +import type { EngineConfig } from "../config.js"; describe("distributeFrames", () => { it("distributes frames evenly across workers", () => { @@ -68,3 +73,29 @@ describe("calculateOptimalWorkers", () => { expect(workers).toBe(4); }); }); + +describe("shouldVerifyWorkerGpu", () => { + const softwareConfig: Partial = { browserGpuMode: "software" }; + + it("returns true for worker 0 when GPU mode is software", () => { + expect(shouldVerifyWorkerGpu(0, softwareConfig)).toBe(true); + }); + + it("returns false for non-zero workers when GPU mode is software", () => { + expect(shouldVerifyWorkerGpu(1, softwareConfig)).toBe(false); + expect(shouldVerifyWorkerGpu(5, softwareConfig)).toBe(false); + expect(shouldVerifyWorkerGpu(17, softwareConfig)).toBe(false); + }); + + it("returns false for any worker when GPU mode is not software", () => { + expect(shouldVerifyWorkerGpu(0, { browserGpuMode: "hardware" } as Partial)).toBe( + false, + ); + expect(shouldVerifyWorkerGpu(0, {})).toBe(false); + }); + + it("returns false when config is undefined", () => { + expect(shouldVerifyWorkerGpu(0, undefined)).toBe(false); + expect(shouldVerifyWorkerGpu(3, undefined)).toBe(false); + }); +}); diff --git a/packages/engine/src/services/parallelCoordinator.ts b/packages/engine/src/services/parallelCoordinator.ts index 6800ff918..236b8f147 100644 --- a/packages/engine/src/services/parallelCoordinator.ts +++ b/packages/engine/src/services/parallelCoordinator.ts @@ -181,6 +181,16 @@ export function distributeFrames( return tasks; } +/** + * Decide whether a parallel worker should run the per-worker SwiftShader + * assertion. Gated to worker 0 only: workers within a chunk share the same + * Chrome binary, flags, and OS/driver state, so one verification per chunk + * is sufficient. See `heygen-com/hyperframes#955`. + */ +export function shouldVerifyWorkerGpu(workerId: number, config?: Partial): boolean { + return config?.browserGpuMode === "software" && workerId === 0; +} + async function executeWorkerTask( task: WorkerTask, serverUrl: string, @@ -207,17 +217,22 @@ async function executeWorkerTask( createBeforeCaptureHook(), config, ); - // Per-worker SwiftShader assertion: when the caller declares - // `browserGpuMode: "software"`, every worker session must verify Chrome's - // WebGL backend is actually SwiftShader before the first frame. Hosts - // that fall back to a hardware GL backend (or silently fail to load + // Per-worker SwiftShader assertion, gated to worker 0 only. + // When `browserGpuMode: "software"` is declared, the chunk's GL backend + // must be verified as SwiftShader before the first frame — a host that + // falls back to a hardware GL backend (or silently fails to load // SwiftShader) would otherwise produce non-deterministic pixels and - // break the distributed byte-identical-retry contract — the parallel - // branch wouldn't catch it via the pre-warmup probe (renderChunk now - // skips that when chunkWorkerCount > 1). The canvas-based reader works - // on both regular Chrome and chrome-headless-shell (which serves - // `chrome://gpu` as an empty document). - if (config?.browserGpuMode === "software") { + // break the distributed byte-identical-retry contract. Running this + // probe on every worker means N concurrent navigations to a WebGL + // probe page per chunk; with `chunkWorkerCount=6` × 3 chunks, that's + // 18 simultaneous CDP page-loads, which inflated c=3 worst-case wall + // by ~24s vs c=6/c=8 on the texture-launch bench. Workers in the same + // chunk share the same Chrome binary, flags, and OS/driver state, so + // worker 0's success is representative — gate it there and skip the + // rest. See `heygen-com/hyperframes#955` for the bench data and the + // pre-warmup probe interaction (which `renderChunk` already skips + // when `chunkWorkerCount > 1`). + if (shouldVerifyWorkerGpu(task.workerId, config)) { await assertSwiftShader(session.page, readWebGlVendorInfoFromCanvas); } await initializeSession(session); diff --git a/packages/producer/src/services/distributed/renderChunk.ts b/packages/producer/src/services/distributed/renderChunk.ts index 84fd9ac9d..5309cf52f 100644 --- a/packages/producer/src/services/distributed/renderChunk.ts +++ b/packages/producer/src/services/distributed/renderChunk.ts @@ -469,10 +469,12 @@ export async function renderChunk( // Resolve worker count up-front so we can decide whether to bother // pre-warming a probe session at all. The parallel branch // (chunkWorkerCount > 1) closes the probe immediately and creates fresh - // per-worker sessions; `executeWorkerTask` now runs its own - // `assertSwiftShader` against each worker session (gated on - // `cfg.browserGpuMode === "software"`), so the safety contract holds - // without the eager pre-probe. + // per-worker sessions; `executeWorkerTask` runs `assertSwiftShader` + // on worker 0 only (gated on `cfg.browserGpuMode === "software"`), so + // the safety contract holds without the eager pre-probe and without + // every worker concurrently navigating to the GL probe page. See + // `heygen-com/hyperframes#955` for the worst-case wall regression that + // motivated gating the probe to worker 0. // // Capture-cost calibration based on shader transitions / renderModeHints // is not threaded through to chunks yet; the in-process renderer's