Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion packages/engine/src/services/parallelCoordinator.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import { describe, it, expect } from "vitest";
import { calculateOptimalWorkers, distributeFrames } from "./parallelCoordinator.js";
import {
calculateOptimalWorkers,
distributeFrames,
shouldVerifyWorkerGpu,
} from "./parallelCoordinator.js";
import type { EngineConfig } from "../config.js";

describe("distributeFrames", () => {
it("distributes frames evenly across workers", () => {
Expand Down Expand Up @@ -68,3 +73,29 @@ describe("calculateOptimalWorkers", () => {
expect(workers).toBe(4);
});
});

describe("shouldVerifyWorkerGpu", () => {
const softwareConfig: Partial<EngineConfig> = { browserGpuMode: "software" };

it("returns true for worker 0 when GPU mode is software", () => {
expect(shouldVerifyWorkerGpu(0, softwareConfig)).toBe(true);
});

it("returns false for non-zero workers when GPU mode is software", () => {
expect(shouldVerifyWorkerGpu(1, softwareConfig)).toBe(false);
expect(shouldVerifyWorkerGpu(5, softwareConfig)).toBe(false);
expect(shouldVerifyWorkerGpu(17, softwareConfig)).toBe(false);
});

it("returns false for any worker when GPU mode is not software", () => {
expect(shouldVerifyWorkerGpu(0, { browserGpuMode: "hardware" } as Partial<EngineConfig>)).toBe(
false,
);
expect(shouldVerifyWorkerGpu(0, {})).toBe(false);
});

it("returns false when config is undefined", () => {
expect(shouldVerifyWorkerGpu(0, undefined)).toBe(false);
expect(shouldVerifyWorkerGpu(3, undefined)).toBe(false);
});
});
35 changes: 25 additions & 10 deletions packages/engine/src/services/parallelCoordinator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,16 @@ export function distributeFrames(
return tasks;
}

/**
* Decide whether a parallel worker should run the per-worker SwiftShader
* assertion. Gated to worker 0 only: workers within a chunk share the same
* Chrome binary, flags, and OS/driver state, so one verification per chunk
* is sufficient. See `heygen-com/hyperframes#955`.
*/
export function shouldVerifyWorkerGpu(workerId: number, config?: Partial<EngineConfig>): boolean {
return config?.browserGpuMode === "software" && workerId === 0;
}

async function executeWorkerTask(
task: WorkerTask,
serverUrl: string,
Expand All @@ -207,17 +217,22 @@ async function executeWorkerTask(
createBeforeCaptureHook(),
config,
);
// Per-worker SwiftShader assertion: when the caller declares
// `browserGpuMode: "software"`, every worker session must verify Chrome's
// WebGL backend is actually SwiftShader before the first frame. Hosts
// that fall back to a hardware GL backend (or silently fail to load
// Per-worker SwiftShader assertion, gated to worker 0 only.
// When `browserGpuMode: "software"` is declared, the chunk's GL backend
// must be verified as SwiftShader before the first frame — a host that
// falls back to a hardware GL backend (or silently fails to load
// SwiftShader) would otherwise produce non-deterministic pixels and
// break the distributed byte-identical-retry contract — the parallel
// branch wouldn't catch it via the pre-warmup probe (renderChunk now
// skips that when chunkWorkerCount > 1). The canvas-based reader works
// on both regular Chrome and chrome-headless-shell (which serves
// `chrome://gpu` as an empty document).
if (config?.browserGpuMode === "software") {
// break the distributed byte-identical-retry contract. Running this
// probe on every worker means N concurrent navigations to a WebGL
// probe page per chunk; with `chunkWorkerCount=6` × 3 chunks, that's
// 18 simultaneous CDP page-loads, which inflated c=3 worst-case wall
// by ~24s vs c=6/c=8 on the texture-launch bench. Workers in the same
// chunk share the same Chrome binary, flags, and OS/driver state, so
// worker 0's success is representative — gate it there and skip the
// rest. See `heygen-com/hyperframes#955` for the bench data and the
// pre-warmup probe interaction (which `renderChunk` already skips
// when `chunkWorkerCount > 1`).
if (shouldVerifyWorkerGpu(task.workerId, config)) {
await assertSwiftShader(session.page, readWebGlVendorInfoFromCanvas);
}
await initializeSession(session);
Expand Down
10 changes: 6 additions & 4 deletions packages/producer/src/services/distributed/renderChunk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -469,10 +469,12 @@ export async function renderChunk(
// Resolve worker count up-front so we can decide whether to bother
// pre-warming a probe session at all. The parallel branch
// (chunkWorkerCount > 1) closes the probe immediately and creates fresh
// per-worker sessions; `executeWorkerTask` now runs its own
// `assertSwiftShader` against each worker session (gated on
// `cfg.browserGpuMode === "software"`), so the safety contract holds
// without the eager pre-probe.
// per-worker sessions; `executeWorkerTask` runs `assertSwiftShader`
// on worker 0 only (gated on `cfg.browserGpuMode === "software"`), so
// the safety contract holds without the eager pre-probe and without
// every worker concurrently navigating to the GL probe page. See
// `heygen-com/hyperframes#955` for the worst-case wall regression that
// motivated gating the probe to worker 0.
//
// Capture-cost calibration based on shader transitions / renderModeHints
// is not threaded through to chunks yet; the in-process renderer's
Expand Down
Loading