From 6c6add88a76f604c3d43513c24b057299f968cf5 Mon Sep 17 00:00:00 2001 From: Chris Lorenzo Date: Thu, 4 Jun 2026 13:09:12 -0400 Subject: [PATCH] feat(webgl): detect GPU out-of-memory and emit an outOfMemory event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some devices log a flood of "Could not create WebGL Texture" on image-heavy pages: a texImage2D OOM escalates to a lost context, after which every gl.createTexture() returns null. The renderer had no way to detect the originating GPU out-of-memory — getError() is only checked in dev builds because it forces a CPU/GPU sync. Add a once-per-loop OOM probe and surface it as an application event so the app can recover (e.g. reload with a lower criticalThreshold): - WebGlRenderer.checkForOutOfMemory() drains the GL error queue (bounded) and reports whether GL_OUT_OF_MEMORY was seen. checkForOutOfMemory is now a required CoreRenderer method; CanvasRenderer returns false. - The probe runs at the idle transition (end of a render burst), not every frame. GL errors accumulate and persist until drained, so a single check still catches any OOM raised during the active frames without paying the getError() sync per frame. - TextureMemoryManager.handleOutOfMemory() queues an `outOfMemory` frame event ({ memUsed, criticalThreshold }) and requests a best-effort cleanup. Persistence/threshold-lowering/reload is left to the app; RendererMainOutOfMemoryEvent documents the recommended integration (read calibrated threshold from localStorage, namespaced per app for file:// deployments; lower to 90% of the measured ceiling with a floor; reload). Tests: TextureMemoryManager event behavior and the idle-path probe (fires at idle, not on active frames). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/core/TextureMemoryManager.test.ts | 98 ++++++++++++++++ src/core/TextureMemoryManager.ts | 30 ++++- .../web/WebPlatform.outOfMemory.test.ts | 78 +++++++++++++ src/core/platforms/web/WebPlatform.ts | 8 ++ src/core/renderers/CoreRenderer.ts | 12 ++ src/core/renderers/canvas/CanvasRenderer.ts | 5 + src/core/renderers/webgl/WebGlRenderer.ts | 36 ++++++ src/main-api/Renderer.ts | 110 ++++++++++++++++-- 8 files changed, 364 insertions(+), 13 deletions(-) create mode 100644 src/core/TextureMemoryManager.test.ts create mode 100644 src/core/platforms/web/WebPlatform.outOfMemory.test.ts diff --git a/src/core/TextureMemoryManager.test.ts b/src/core/TextureMemoryManager.test.ts new file mode 100644 index 0000000..5c685bc --- /dev/null +++ b/src/core/TextureMemoryManager.test.ts @@ -0,0 +1,98 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { + TextureMemoryManager, + type TextureMemoryManagerSettings, +} from './TextureMemoryManager.js'; +import type { Stage } from './Stage.js'; +import type { Texture } from './textures/Texture.js'; + +function makeSettings( + overrides: Partial = {}, +): TextureMemoryManagerSettings { + return { + criticalThreshold: 200e6, + targetThresholdLevel: 0.5, + cleanupInterval: 5000, + debugLogging: false, + baselineMemoryAllocation: 26e6, + doNotExceedCriticalThreshold: false, + ...overrides, + }; +} + +// The only Stage method the OOM path touches is queueFrameEvent. +function makeStage(): { + stage: Stage; + queueFrameEvent: ReturnType; +} { + const queueFrameEvent = vi.fn(); + const stage = { queueFrameEvent } as unknown as Stage; + return { stage, queueFrameEvent }; +} + +function makeManager(overrides: Partial = {}): { + mgr: TextureMemoryManager; + queueFrameEvent: ReturnType; +} { + const { stage, queueFrameEvent } = makeStage(); + const mgr = new TextureMemoryManager(stage, makeSettings(overrides)); + return { mgr, queueFrameEvent }; +} + +// setTextureMemUse expects a Texture with a mutable memUsed field; nothing else +// is read on the OOM path. +function fakeTexture(): Texture { + return { memUsed: 0 } as unknown as Texture; +} + +describe('TextureMemoryManager — out-of-memory event', () => { + it('queues an outOfMemory frame event with the estimate and threshold', () => { + const { mgr, queueFrameEvent } = makeManager({ criticalThreshold: 200e6 }); + // memUsed = baseline (26e6) + texture (100e6) = 126e6 + mgr.setTextureMemUse(fakeTexture(), 100e6); + + mgr.handleOutOfMemory(); + + expect(queueFrameEvent).toHaveBeenCalledTimes(1); + expect(queueFrameEvent).toHaveBeenCalledWith('outOfMemory', { + memUsed: 126e6, + criticalThreshold: 200e6, + }); + }); + + it('requests an immediate cleanup as a best-effort mitigation', () => { + const { mgr } = makeManager(); + expect(mgr.criticalCleanupRequested).toBe(false); + + mgr.handleOutOfMemory(); + + expect(mgr.criticalCleanupRequested).toBe(true); + }); + + it('does not change the critical threshold itself', () => { + const { mgr } = makeManager({ criticalThreshold: 200e6 }); + const before = mgr.getMemoryInfo().criticalThreshold; + + mgr.handleOutOfMemory(); + + expect(mgr.getMemoryInfo().criticalThreshold).toBe(before); + }); + + it('reports the current estimate each time it fires', () => { + const { mgr, queueFrameEvent } = makeManager({ criticalThreshold: 200e6 }); + + mgr.setTextureMemUse(fakeTexture(), 50e6); + mgr.handleOutOfMemory(); + mgr.setTextureMemUse(fakeTexture(), 80e6); + mgr.handleOutOfMemory(); + + expect(queueFrameEvent.mock.calls[0]![1]).toEqual({ + memUsed: 76e6, // 26e6 baseline + 50e6 + criticalThreshold: 200e6, + }); + expect(queueFrameEvent.mock.calls[1]![1]).toEqual({ + memUsed: 156e6, // 26e6 baseline + 50e6 + 80e6 + criticalThreshold: 200e6, + }); + }); +}); diff --git a/src/core/TextureMemoryManager.ts b/src/core/TextureMemoryManager.ts index 361db79..4487e6a 100644 --- a/src/core/TextureMemoryManager.ts +++ b/src/core/TextureMemoryManager.ts @@ -342,10 +342,36 @@ export class TextureMemoryManager { }, 1000); } - // If the threshold is 0, we disable the memory manager by replacing the - // setTextureMemUse method with a no-op function. + // If the threshold is 0, we disable memory tracking/cleanup by replacing the + // setTextureMemUse method with a no-op function. Note this only disables LRU + // tracking — GPU out-of-memory detection still runs (see handleOutOfMemory). if (criticalThreshold === 0) { this.setTextureMemUse = () => {}; } } + + /** + * React to a real GPU out-of-memory reported by the renderer. + * + * @remarks + * WebGL never exposes the VRAM budget up front, so the only certain signal is + * a `GL_OUT_OF_MEMORY` after the fact. When it fires we queue an `outOfMemory` + * frame event carrying the estimated memory in use and the critical threshold + * in effect — the estimate is a *measured ceiling* (the real budget is at or + * below it). What to do about it (lower the threshold, persist, reload) is + * application policy, not the renderer's; see the `outOfMemory` event docs on + * the public Renderer for the recommended integration. + * + * The engine also requests an immediate cleanup as a best-effort mitigation + * to free non-renderable textures before the app reacts. + */ + handleOutOfMemory(): void { + this.stage.queueFrameEvent('outOfMemory', { + memUsed: this.memUsed, + criticalThreshold: this.criticalThreshold, + }); + + // Free whatever non-renderable textures we can right now. + this.criticalCleanupRequested = true; + } } diff --git a/src/core/platforms/web/WebPlatform.outOfMemory.test.ts b/src/core/platforms/web/WebPlatform.outOfMemory.test.ts new file mode 100644 index 0000000..ae544c2 --- /dev/null +++ b/src/core/platforms/web/WebPlatform.outOfMemory.test.ts @@ -0,0 +1,78 @@ +/** + * Tests that the GPU out-of-memory probe runs at the idle transition (end of a + * render burst), not on every active frame. + */ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { WebPlatform } from './WebPlatform.js'; +import type { Stage } from '../../Stage.js'; + +function makeIdleStage(outOfMemory: boolean) { + const checkForOutOfMemory = vi.fn(() => outOfMemory); + const handleOutOfMemory = vi.fn(); + const stage = { + isContextLost: false, + targetFrameTime: 0, + updateFrameTime: vi.fn(), + updateAnimations: vi.fn(() => false), + hasSceneUpdates: vi.fn(() => false), // idle + calculateFps: vi.fn(), + drawFrame: vi.fn(), + flushFrameEvents: vi.fn(), + shManager: { cleanup: vi.fn() }, + eventBus: { emit: vi.fn() }, + txMemManager: { + checkCleanup: vi.fn(() => false), + cleanup: vi.fn(), + handleOutOfMemory, + }, + renderer: { checkForOutOfMemory }, + } as unknown as Stage; + return { stage, checkForOutOfMemory, handleOutOfMemory }; +} + +describe('WebPlatform render loop — out-of-memory probe at idle', () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + function runOneIdleFrame(stage: Stage) { + let capturedLoop: ((t?: number) => void) | null = null; + const raf = vi.fn((cb: (t?: number) => void) => { + capturedLoop = cb; + return 1; + }); + vi.stubGlobal('requestAnimationFrame', raf); + vi.stubGlobal( + 'setTimeout', + vi.fn(() => 1 as unknown as ReturnType), + ); + + new WebPlatform().startLoop(stage); + capturedLoop!(0); + } + + it('probes the renderer once when the scene goes idle', () => { + const { stage, checkForOutOfMemory } = makeIdleStage(false); + runOneIdleFrame(stage); + expect(checkForOutOfMemory).toHaveBeenCalledTimes(1); + }); + + it('handles OOM when the probe reports it at idle', () => { + const { stage, handleOutOfMemory } = makeIdleStage(true); + runOneIdleFrame(stage); + expect(handleOutOfMemory).toHaveBeenCalledTimes(1); + }); + + it('does not handle OOM when the probe reports none', () => { + const { stage, handleOutOfMemory } = makeIdleStage(false); + runOneIdleFrame(stage); + expect(handleOutOfMemory).not.toHaveBeenCalled(); + }); + + it('does not probe on an active (non-idle) frame', () => { + const { stage, checkForOutOfMemory } = makeIdleStage(false); + (stage.hasSceneUpdates as ReturnType).mockReturnValue(true); + runOneIdleFrame(stage); + expect(checkForOutOfMemory).not.toHaveBeenCalled(); + }); +}); diff --git a/src/core/platforms/web/WebPlatform.ts b/src/core/platforms/web/WebPlatform.ts index 8c2d527..80a7390 100644 --- a/src/core/platforms/web/WebPlatform.ts +++ b/src/core/platforms/web/WebPlatform.ts @@ -77,6 +77,14 @@ export class WebPlatform extends Platform { setTimeout(requestLoop, Math.max(targetFrameTime, 15)); if (isIdle === false) { + // The render burst has settled. Probe for a GPU out-of-memory now + // rather than every frame: GL errors accumulate and persist until + // drained, so a single check here still catches any OOM raised during + // the active frames, without paying the getError() CPU/GPU sync on + // every frame. Queues the `outOfMemory` event, flushed below. + if (stage.renderer.checkForOutOfMemory() === true) { + stage.txMemManager.handleOutOfMemory(); + } stage.shManager.cleanup(); stage.eventBus.emit('idle'); isIdle = true; diff --git a/src/core/renderers/CoreRenderer.ts b/src/core/renderers/CoreRenderer.ts index 2249c14..f5e0077 100644 --- a/src/core/renderers/CoreRenderer.ts +++ b/src/core/renderers/CoreRenderer.ts @@ -67,4 +67,16 @@ export abstract class CoreRenderer { * on the next render call. */ invalidateQuadBuffer?(): void; + + /** + * Probe the backend for a GPU out-of-memory condition since the last call. + * Returns `true` when an out-of-memory was seen. Backends that cannot detect + * this (e.g. Canvas2D) return `false`. + * + * @remarks + * Called once per frame by the Stage. Backends where the probe is expensive + * (a CPU/GPU sync, e.g. WebGL `gl.getError()`) rely on this once-per-frame + * cadence rather than checking per draw/upload. + */ + abstract checkForOutOfMemory(): boolean; } diff --git a/src/core/renderers/canvas/CanvasRenderer.ts b/src/core/renderers/canvas/CanvasRenderer.ts index cf02b1d..8ef5613 100644 --- a/src/core/renderers/canvas/CanvasRenderer.ts +++ b/src/core/renderers/canvas/CanvasRenderer.ts @@ -264,6 +264,11 @@ export class CanvasRenderer extends CoreRenderer { return null; } + // Canvas2D has no GPU out-of-memory signal to probe. + checkForOutOfMemory(): boolean { + return false; + } + /** * Updates the clear color of the canvas renderer. * diff --git a/src/core/renderers/webgl/WebGlRenderer.ts b/src/core/renderers/webgl/WebGlRenderer.ts index b0d838e..f4aa9c0 100644 --- a/src/core/renderers/webgl/WebGlRenderer.ts +++ b/src/core/renderers/webgl/WebGlRenderer.ts @@ -42,6 +42,15 @@ import type { Dimensions } from '../../../common/CommonTypes.js'; export type WebGlRendererOptions = CoreRendererOptions; +const GL_OUT_OF_MEMORY = 0x0505; + +/** + * Upper bound on how many queued GL errors we drain per frame in + * {@link WebGlRenderer.checkForOutOfMemory}. Keeps the per-frame `getError()` + * sync cost fixed even if the error queue is unexpectedly deep. + */ +const MAX_DRAINED_GL_ERRORS = 8; + interface CoreWebGlSystem { parameters: CoreWebGlParameters; extensions: CoreWebGlExtensions; @@ -1283,6 +1292,33 @@ export class WebGlRenderer extends CoreRenderer { return bufferInfo; } + /** + * Drain the GL error queue once and report whether a GL_OUT_OF_MEMORY was + * seen since the last call. + * + * @remarks + * `gl.getError()` forces a CPU↔GPU sync, so this is deliberately invoked at + * most once per frame by the Stage rather than after each texture upload. + * `getError()` returns one error at a time, so we drain a bounded number of + * queued errors to ensure a non-OOM error ahead of the OOM doesn't mask it + * for this frame. Non-OOM errors are ignored here (the renderer otherwise + * only inspects them in development builds). + */ + override checkForOutOfMemory(): boolean { + const glw = this.glw; + let outOfMemory = false; + for (let i = 0; i < MAX_DRAINED_GL_ERRORS; i++) { + const error = glw.getError(); + if (error === 0) { + break; + } + if (error === GL_OUT_OF_MEMORY) { + outOfMemory = true; + } + } + return outOfMemory; + } + getDefaultShaderNode(): WebGlShaderNode { if (this.defaultShaderNode !== null) { return this.defaultShaderNode as WebGlShaderNode; diff --git a/src/main-api/Renderer.ts b/src/main-api/Renderer.ts index 940ebcc..e8e130f 100644 --- a/src/main-api/Renderer.ts +++ b/src/main-api/Renderer.ts @@ -28,7 +28,7 @@ import { Platform } from '../core/platforms/Platform.js'; * @category Events * @example * ```typescript - * renderer.on('fpsUpdate', (data) => { + * renderer.on('fpsUpdate', (_target, data) => { * console.log(`Current FPS: ${data.fps}`); * if (data.contextSpyData) { * console.log('WebGL calls:', data.contextSpyData); @@ -49,7 +49,7 @@ export interface RendererMainFpsUpdateEvent { * @category Events * @example * ```typescript - * renderer.on('frameTick', (data) => { + * renderer.on('frameTick', (_target, data) => { * console.log(`Frame time: ${data.time}ms, delta: ${data.delta}ms`); * }); * ``` @@ -67,7 +67,7 @@ export interface RendererMainFrameTickEvent { * @category Events * @example * ```typescript - * renderer.on('renderUpdate', (data) => { + * renderer.on('renderUpdate', (_target, data) => { * console.log(`Rendered quads: ${data.quads}, renderOps: ${data.renderOps}`); * }); * ``` @@ -110,7 +110,7 @@ export interface RendererMainIdleEvent { * @category Events * @example * ```typescript - * renderer.on('criticalCleanup', (data) => { + * renderer.on('criticalCleanup', (_target, data) => { * console.log(`Memory cleanup triggered!`); * console.log(`Memory used: ${data.memUsed} bytes`); * console.log(`Critical threshold: ${data.criticalThreshold} bytes`); @@ -130,7 +130,7 @@ export interface RendererMainCriticalCleanupEvent { * @category Events * @example * ```typescript - * renderer.on('criticalCleanupFailed', (data) => { + * renderer.on('criticalCleanupFailed', (_target, data) => { * console.warn(`Memory cleanup failed!`); * console.log(`Memory still used: ${data.memUsed} bytes`); * console.log(`Critical threshold: ${data.criticalThreshold} bytes`); @@ -167,6 +167,90 @@ export interface RendererMainContextLostEvent { readonly __eventHasNoPayload?: never; } +/** + * GPU Out Of Memory Event Data + * + * @remarks + * Fired when the renderer detects a real `GL_OUT_OF_MEMORY` from the GPU (probed + * once per frame). This is the only certain signal that the texture memory + * estimate has overshot the device's real VRAM budget. At this point a texture + * upload has already failed and the driver may soon drop the WebGL context, so + * the supported recovery is for the application to reload with a lower + * `criticalThreshold`. + * + * `memUsed` is the estimated texture memory in use at the moment of the failure. + * Because the upload failed, the real GPU budget is at or below this value — so + * it is a good basis for the next `criticalThreshold`. + * + * The renderer deliberately does NOT persist, reload, or change the threshold + * itself — that is application policy. The recommended integration is to lower + * the threshold, persist it, and reload: + * + * @category Events + * @example + * ```typescript + * // --- on startup: read the calibrated threshold before creating the renderer + * // + * // Namespace the storage key per app. On TV devices that run from the + * // filesystem (file://) the origin is null/opaque, so a bare key can collide + * // across apps — including the path keeps each app's calibration separate. + * const STORAGE_KEY = `myapp:criticalThreshold:${location.pathname}`; + * + * // Never let calibration drive the threshold so low the UX breaks. Pick a + * // floor that matches your app (here, 70% of the default budget). + * const DEFAULT_CRITICAL = 200e6; + * const MIN_THRESHOLD = Math.round(DEFAULT_CRITICAL * 0.7); + * + * function readCriticalThreshold(): number { + * const raw = + * typeof localStorage !== 'undefined' + * ? localStorage.getItem(STORAGE_KEY) + * : null; + * const stored = raw !== null ? parseInt(raw, 10) : NaN; + * if (Number.isNaN(stored) === false && stored > 0) { + * return Math.max(stored, MIN_THRESHOLD); + * } + * return DEFAULT_CRITICAL; + * } + * + * const renderer = new RendererMain( + * { textureMemory: { criticalThreshold: readCriticalThreshold() } }, + * 'app', + * ); + * + * // --- at runtime: react to a real GPU out-of-memory + * let handlingOOM = false; + * renderer.on('outOfMemory', (_target, { memUsed, criticalThreshold }) => { + * if (handlingOOM === true) { + * return; // debounce — several uploads can fail in the same burst + * } + * handlingOOM = true; + * + * // The OOM proves the real budget is <= memUsed. Drop to 90% of the lower + * // of (estimate, current threshold), but never below the floor. + * const ceiling = Math.min(memUsed, criticalThreshold); + * const next = Math.max(Math.round(ceiling * 0.9), MIN_THRESHOLD); + * + * try { + * localStorage.setItem(STORAGE_KEY, String(next)); + * } catch (e) { + * // storage may be blocked (e.g. file:// with storage disabled); the new + * // value just won't survive the reload. + * } + * + * // Reload so the renderer reinitializes with the lower budget. The engine + * // does not rebuild GPU resources in place, so reload is the clean recovery. + * location.reload(); + * }); + * ``` + */ +export interface RendererMainOutOfMemoryEvent { + /** Estimated texture memory in use at the time of the failure (bytes) */ + memUsed: number; + /** Critical threshold in effect at the time of the failure (bytes) */ + criticalThreshold: number; +} + /** * Settings for the Renderer that can be updated during runtime. */ @@ -532,11 +616,11 @@ export type RendererMainSettings = RendererRuntimeSettings & { * * Listen to events using the standard EventEmitter API: * ```typescript - * renderer.on('fpsUpdate', (data: RendererMainFpsUpdateEvent) => { + * renderer.on('fpsUpdate', (_target, data: RendererMainFpsUpdateEvent) => { * console.log(`FPS: ${data.fps}`); * }); * - * renderer.on('idle', (data: RendererMainIdleEvent) => { + * renderer.on('idle', () => { * // Renderer is idle - no scene changes * }); * ``` @@ -548,6 +632,7 @@ export type RendererMainSettings = RendererRuntimeSettings & { * @see {@link RendererMainCriticalCleanupEvent} * @see {@link RendererMainCriticalCleanupFailedEvent} * @see {@link RendererMainContextLostEvent} + * @see {@link RendererMainOutOfMemoryEvent} * * @fires RendererMain#fpsUpdate * @fires RendererMain#frameTick @@ -556,6 +641,7 @@ export type RendererMainSettings = RendererRuntimeSettings & { * @fires RendererMain#criticalCleanup * @fires RendererMain#criticalCleanupFailed * @fires RendererMain#contextLost + * @fires RendererMain#outOfMemory */ export class RendererMain extends EventEmitter { readonly root: INode; @@ -717,11 +803,13 @@ export class RendererMain extends EventEmitter { const currentTxSettings = (this.stage && this.stage.options.textureMemory) || {}; + const criticalThreshold = + textureMemory?.criticalThreshold ?? + currentTxSettings?.criticalThreshold ?? + 200e6; + return { - criticalThreshold: - textureMemory?.criticalThreshold ?? - currentTxSettings?.criticalThreshold ?? - 200e6, + criticalThreshold, targetThresholdLevel: textureMemory?.targetThresholdLevel ?? currentTxSettings?.targetThresholdLevel ??