heygen-com · jrusso1020 · May 19, 2026 · May 19, 2026
diff --git a/packages/engine/src/services/parallelCoordinator.test.ts b/packages/engine/src/services/parallelCoordinator.test.ts
@@ -1,5 +1,10 @@
 import { describe, it, expect } from "vitest";
-import { calculateOptimalWorkers, distributeFrames } from "./parallelCoordinator.js";
+import {
+  calculateOptimalWorkers,
+  distributeFrames,
+  shouldVerifyWorkerGpu,
+} from "./parallelCoordinator.js";
+import type { EngineConfig } from "../config.js";
 
 describe("distributeFrames", () => {
   it("distributes frames evenly across workers", () => {
@@ -68,3 +73,29 @@ describe("calculateOptimalWorkers", () => {
     expect(workers).toBe(4);
   });
 });
+
+describe("shouldVerifyWorkerGpu", () => {
+  const softwareConfig: Partial<EngineConfig> = { browserGpuMode: "software" };
+
+  it("returns true for worker 0 when GPU mode is software", () => {
+    expect(shouldVerifyWorkerGpu(0, softwareConfig)).toBe(true);
+  });
+
+  it("returns false for non-zero workers when GPU mode is software", () => {
+    expect(shouldVerifyWorkerGpu(1, softwareConfig)).toBe(false);
+    expect(shouldVerifyWorkerGpu(5, softwareConfig)).toBe(false);
+    expect(shouldVerifyWorkerGpu(17, softwareConfig)).toBe(false);
+  });
+
+  it("returns false for any worker when GPU mode is not software", () => {
+    expect(shouldVerifyWorkerGpu(0, { browserGpuMode: "hardware" } as Partial<EngineConfig>)).toBe(
+      false,
+    );
+    expect(shouldVerifyWorkerGpu(0, {})).toBe(false);
+  });
+
+  it("returns false when config is undefined", () => {
+    expect(shouldVerifyWorkerGpu(0, undefined)).toBe(false);
+    expect(shouldVerifyWorkerGpu(3, undefined)).toBe(false);
+  });
+});
diff --git a/packages/engine/src/services/parallelCoordinator.ts b/packages/engine/src/services/parallelCoordinator.ts
@@ -181,6 +181,16 @@ export function distributeFrames(
   return tasks;
 }
 
+/**
+ * Decide whether a parallel worker should run the per-worker SwiftShader
+ * assertion. Gated to worker 0 only: workers within a chunk share the same
+ * Chrome binary, flags, and OS/driver state, so one verification per chunk
+ * is sufficient. See `heygen-com/hyperframes#955`.
+ */
+export function shouldVerifyWorkerGpu(workerId: number, config?: Partial<EngineConfig>): boolean {
+  return config?.browserGpuMode === "software" && workerId === 0;
+}
+
 async function executeWorkerTask(
   task: WorkerTask,
   serverUrl: string,
@@ -207,17 +217,22 @@ async function executeWorkerTask(
       createBeforeCaptureHook(),
       config,
     );
-    // Per-worker SwiftShader assertion: when the caller declares
-    // `browserGpuMode: "software"`, every worker session must verify Chrome's
-    // WebGL backend is actually SwiftShader before the first frame. Hosts
-    // that fall back to a hardware GL backend (or silently fail to load
+    // Per-worker SwiftShader assertion, gated to worker 0 only.
+    // When `browserGpuMode: "software"` is declared, the chunk's GL backend
+    // must be verified as SwiftShader before the first frame — a host that
+    // falls back to a hardware GL backend (or silently fails to load
     // SwiftShader) would otherwise produce non-deterministic pixels and
-    // break the distributed byte-identical-retry contract — the parallel
-    // branch wouldn't catch it via the pre-warmup probe (renderChunk now
-    // skips that when chunkWorkerCount > 1). The canvas-based reader works
-    // on both regular Chrome and chrome-headless-shell (which serves
-    // `chrome://gpu` as an empty document).
-    if (config?.browserGpuMode === "software") {
+    // break the distributed byte-identical-retry contract. Running this
+    // probe on every worker means N concurrent navigations to a WebGL
+    // probe page per chunk; with `chunkWorkerCount=6` × 3 chunks, that's
+    // 18 simultaneous CDP page-loads, which inflated c=3 worst-case wall
+    // by ~24s vs c=6/c=8 on the texture-launch bench. Workers in the same
+    // chunk share the same Chrome binary, flags, and OS/driver state, so
+    // worker 0's success is representative — gate it there and skip the
+    // rest. See `heygen-com/hyperframes#955` for the bench data and the
+    // pre-warmup probe interaction (which `renderChunk` already skips
+    // when `chunkWorkerCount > 1`).
+    if (shouldVerifyWorkerGpu(task.workerId, config)) {
       await assertSwiftShader(session.page, readWebGlVendorInfoFromCanvas);
     }
     await initializeSession(session);

diff --git a/packages/producer/src/services/distributed/renderChunk.ts b/packages/producer/src/services/distributed/renderChunk.ts
@@ -469,10 +469,12 @@ export async function renderChunk(
     // Resolve worker count up-front so we can decide whether to bother
     // pre-warming a probe session at all. The parallel branch
     // (chunkWorkerCount > 1) closes the probe immediately and creates fresh
-    // per-worker sessions; `executeWorkerTask` now runs its own
-    // `assertSwiftShader` against each worker session (gated on
-    // `cfg.browserGpuMode === "software"`), so the safety contract holds
-    // without the eager pre-probe.
+    // per-worker sessions; `executeWorkerTask` runs `assertSwiftShader`
+    // on worker 0 only (gated on `cfg.browserGpuMode === "software"`), so
+    // the safety contract holds without the eager pre-probe and without
+    // every worker concurrently navigating to the GL probe page. See
+    // `heygen-com/hyperframes#955` for the worst-case wall regression that
+    // motivated gating the probe to worker 0.
     //
     // Capture-cost calibration based on shader transitions / renderModeHints
     // is not threaded through to chunks yet; the in-process renderer's