From cc580bf470763ccd6232e6a76ad59011819187e9 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Mon, 27 Apr 2026 12:39:32 -0700 Subject: [PATCH 01/10] feat(unofficial): support 950dt + huawei-mindie for unofficial runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds Huawei Ascend 950DT GPU and Huawei MindIE framework so the Mock-ascend GitHub Actions run (25014782858) can be loaded via the unofficial-run viewer. TDP/power/cost are placeholder 9.99 — Huawei specs are not yet finalized. - HW_REGISTRY: new 950dt entry, vendor=Huawei, arch=Ascend, sort=9 - VENDOR_OKLCH_ZONES / VENDOR_HSL_ZONES: amber/yellow zone for Huawei (steals 30-60° HSL from AMD's lower band; AMD keeps 300-360 + 0-30) - FW_REGISTRY: huawei-mindie → "Huawei MindIE" - dynamic-colors Vendor type extended with 'huawei' - chart-utils BANNED_HUE_TEST/PREFERRED_ZONE entries for huawei (avoid AMD-red and NVIDIA-green clashes; preferred amber/yellow zone) Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/app/src/lib/chart-utils.ts | 6 ++++- packages/app/src/lib/dynamic-colors.ts | 3 ++- packages/constants/src/framework-aliases.ts | 1 + packages/constants/src/gpu-keys.ts | 28 ++++++++++++++++----- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/packages/app/src/lib/chart-utils.ts b/packages/app/src/lib/chart-utils.ts index c7c51a7c..5d522116 100644 --- a/packages/app/src/lib/chart-utils.ts +++ b/packages/app/src/lib/chart-utils.ts @@ -20,16 +20,19 @@ import { getVendor, type Vendor } from '@/lib/dynamic-colors'; * In Lab space: 0° = red, 90° = yellow, 180° = green, 270° = blue. * NVIDIA must not be red/rose/pink (wraps around 0°: 320–40°). * AMD must not be green (roughly 120–195°). + * Huawei must not be red (AMD zone) or green (NVIDIA zone). */ const BANNED_HUE_TEST: Record boolean) | null> = { nvidia: (hue) => hue >= 320 || hue <= 40, // red/rose/pink zone amd: (hue) => hue >= 120 && hue <= 195, // green zone + huawei: (hue) => hue >= 320 || hue <= 40 || (hue >= 120 && hue <= 195), // avoid red + green unknown: null, }; /** * Preferred hue ranges (CIELab) — used when a vendor has few items so they - * cluster in the brand-appropriate zone. NVIDIA = greens, AMD = reds/oranges. + * cluster in the brand-appropriate zone. NVIDIA = greens, AMD = reds/oranges, + * Huawei = amber/yellow. */ const PREFERRED_ZONE: Record< Vendor, @@ -37,6 +40,7 @@ const PREFERRED_ZONE: Record< > = { nvidia: { hmin: 100, hmax: 195 }, // greens/teals amd: { hmin: 20, hmax: 50, cmin: 70, lmin: 50 }, // vivid reds/oranges + huawei: { hmin: 50, hmax: 95, cmin: 60 }, // amber/yellow unknown: null, }; diff --git a/packages/app/src/lib/dynamic-colors.ts b/packages/app/src/lib/dynamic-colors.ts index 38b9e10e..410701c3 100644 --- a/packages/app/src/lib/dynamic-colors.ts +++ b/packages/app/src/lib/dynamic-colors.ts @@ -14,7 +14,7 @@ import { getModelSortIndex } from '@/lib/constants'; // Vendor detection // --------------------------------------------------------------------------- -export type Vendor = 'nvidia' | 'amd' | 'unknown'; +export type Vendor = 'nvidia' | 'amd' | 'huawei' | 'unknown'; /** Determine vendor from a hardware key by looking up GPU_VENDORS. */ export function getVendor(hwKey: string): Vendor { @@ -23,6 +23,7 @@ export function getVendor(hwKey: string): Vendor { const vendor = GPU_VENDORS[base]; if (vendor === 'NVIDIA') return 'nvidia'; if (vendor === 'AMD') return 'amd'; + if (vendor === 'Huawei') return 'huawei'; return 'unknown'; } diff --git a/packages/constants/src/framework-aliases.ts b/packages/constants/src/framework-aliases.ts index cc5eb6b4..6d34e543 100644 --- a/packages/constants/src/framework-aliases.ts +++ b/packages/constants/src/framework-aliases.ts @@ -9,6 +9,7 @@ export const FW_REGISTRY: Record = { 'dynamo-sglang': { label: 'Dynamo SGLang' }, 'dynamo-trt': { label: 'Dynamo TRT' }, 'dynamo-vllm': { label: 'Dynamo vLLM' }, + 'huawei-mindie': { label: 'Huawei MindIE' }, 'mori-sglang': { label: 'MoRI SGLang' }, sglang: { label: 'SGLang' }, trt: { label: 'TRT' }, diff --git a/packages/constants/src/gpu-keys.ts b/packages/constants/src/gpu-keys.ts index ec0ba96e..95590588 100644 --- a/packages/constants/src/gpu-keys.ts +++ b/packages/constants/src/gpu-keys.ts @@ -122,6 +122,17 @@ export const HW_REGISTRY: Record = { costn: 1.9, costr: 2.1, }, + '950dt': { + vendor: 'Huawei', + arch: 'Ascend', + label: 'Ascend 950DT', + sort: 9, + tdp: 9.99, + power: 9.99, + costh: 9.99, + costn: 9.99, + costr: 9.99, + }, }; /** Canonical set of GPU key strings used across all packages. */ @@ -146,7 +157,9 @@ export const GPU_VENDORS: Record = Object.fromEntries( * Layout (approximate): * 0-12 (gap) * 12-42 AMD reds/oranges - * 42-120 (gap) + * 42-60 (gap) + * 60-90 Huawei amber/yellow + * 90-120 (gap) * 120-170 NVIDIA greens * 170-275 (gap) * 275-330 unknown / fallback (purples) @@ -157,6 +170,7 @@ export const VENDOR_OKLCH_ZONES: Record< { start: number; end: number; chroma: { light: number; dark: number } } > = { amd: { start: 12, end: 42, chroma: { light: 0.18, dark: 0.22 } }, + huawei: { start: 60, end: 90, chroma: { light: 0.16, dark: 0.18 } }, nvidia: { start: 120, end: 170, chroma: { light: 0.15, dark: 0.15 } }, unknown: { start: 275, end: 330, chroma: { light: 0.14, dark: 0.16 } }, }; @@ -165,23 +179,25 @@ export const VENDOR_OKLCH_ZONES: Record< * Preferred HSL hue zones for high-contrast mode. * Each vendor gets a non-overlapping slice of the 360° hue wheel so items * from different vendors are visually distinct and vendor-appropriate - * (NVIDIA = greens, AMD = reds/oranges, unknown = blues/purples). + * (NVIDIA = greens, AMD = reds/oranges, Huawei = amber/yellow, unknown = blues/purples). * When a vendor has too many items to fit with sufficient spacing, the zone * expands symmetrically — these are preferred zones, not hard constraints. * * Layout (360° wheel): - * NVIDIA: 60–195 (135°) — greens through cyans - * AMD: 300–360 + 0–60 (120°, wraps) — magentas through oranges + * NVIDIA: 90–195 (105°) — greens through cyans + * Huawei: 30–60 (30°) — amber/yellow + * AMD: 300–360 + 0–30 (90°, wraps) — magentas through reds * unknown: 195–300 (105°) — blues/purples * * Each entry is an array of linear {start, span} segments (wrapping bands * are split into two segments). */ export const VENDOR_HSL_ZONES: Record = { - nvidia: [{ start: 60, span: 135 }], + nvidia: [{ start: 90, span: 105 }], + huawei: [{ start: 30, span: 30 }], amd: [ { start: 300, span: 60 }, - { start: 0, span: 60 }, + { start: 0, span: 30 }, ], unknown: [{ start: 195, span: 105 }], }; From c01af64859608cbba867c01d6d4f89c57d5a4316 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Mon, 27 Apr 2026 13:15:35 -0700 Subject: [PATCH 02/10] feat(unofficial-run): fall back to per-config bmk_* artifacts The Mock-ascend workflow uploads one bmk__conc__ artifact per concurrency instead of a single aggregated results_bmk. The unofficial-run API was filtering strictly on name === 'results_bmk', returning 404 even though the per-config artifacts contained valid rows. When results_bmk is absent, gather every artifact whose name starts with 'bmk_', download each, and concat the rows before normalization. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../app/src/app/api/unofficial-run/route.ts | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/packages/app/src/app/api/unofficial-run/route.ts b/packages/app/src/app/api/unofficial-run/route.ts index 4e5b5265..1a84a4ba 100644 --- a/packages/app/src/app/api/unofficial-run/route.ts +++ b/packages/app/src/app/api/unofficial-run/route.ts @@ -217,15 +217,22 @@ async function processSingleRun( const bmkArtifact = artifacts .filter((a) => a.name === 'results_bmk') .toSorted((a, b) => b.id - a.id)[0]; + // Fallback: some workflows (e.g. the Mock-ascend uploader) emit one + // `bmk__conc__` artifact per concurrency instead of a + // single aggregated `results_bmk`. When the canonical artifact is absent, + // gather everything matching `bmk_*` and concatenate the rows. + const perConfigBmkArtifacts = bmkArtifact + ? [] + : artifacts.filter((a) => a.name.startsWith('bmk_')); const evalArtifact = artifacts .filter((a) => a.name === 'eval_results_all') .toSorted((a, b) => b.id - a.id)[0]; - if (!bmkArtifact && !evalArtifact) { + if (!bmkArtifact && perConfigBmkArtifacts.length === 0 && !evalArtifact) { return { errorResponse: NextResponse.json( { - error: `No results_bmk or eval_results_all artifact found for runId ${runId}`, + error: `No results_bmk, bmk_*, or eval_results_all artifact found for runId ${runId}`, }, { status: 404 }, ), @@ -246,6 +253,17 @@ async function processSingleRun( ); if (errorResponse) return { errorResponse }; benchmarks = normalizeArtifactRows(rows, date, runUrl || null); + } else if (perConfigBmkArtifacts.length > 0) { + const allRows: Record[] = []; + for (const artifact of perConfigBmkArtifacts) { + const { rows, errorResponse } = await downloadArtifactRows( + artifact.archive_download_url, + githubToken, + ); + if (errorResponse) return { errorResponse }; + allRows.push(...rows); + } + benchmarks = normalizeArtifactRows(allRows, date, runUrl || null); } if (evalArtifact) { From f1549edcb8a88d46f32265956a5fb4204581f1ca Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Wed, 6 May 2026 21:01:00 +0000 Subject: [PATCH 03/10] feat(unofficial): support 8K/256 sequence (8192/256) Registers '8k/256' (isl=8192, osl=256) so unofficial-run overlays from sweeps like InferenceX run 25457884421 render instead of being silently dropped by islOslToSequence returning null. - packages/constants/src/models.ts: add 8k/256 to both conversion maps. - packages/app/src/lib/data-mappings.ts: add Sequence.EightK_256 with SEQUENCE_CONFIG entry (label "8K / 256"). Replace the binary 1k-vs-8k ternary in getModelAndSequenceFromArtifact with islOslToSequence so future ISL/OSL pairs registered in constants are picked up automatically rather than mis-mapped. - Tests added for round-trip + label coverage in models.test.ts / models-mapping.test.ts and a dsv4 + 8192/256 case in data-mappings.test.ts. Co-authored-by: Bryan Shan --- packages/app/src/lib/data-mappings.test.ts | 10 ++++++++++ packages/app/src/lib/data-mappings.ts | 13 +++++++------ packages/app/src/lib/models-mapping.test.ts | 10 +++++++++- packages/constants/src/models.test.ts | 10 +++++++++- packages/constants/src/models.ts | 2 ++ 5 files changed, 37 insertions(+), 8 deletions(-) diff --git a/packages/app/src/lib/data-mappings.test.ts b/packages/app/src/lib/data-mappings.test.ts index d96d28a6..c79c52f2 100644 --- a/packages/app/src/lib/data-mappings.test.ts +++ b/packages/app/src/lib/data-mappings.test.ts @@ -124,6 +124,15 @@ describe('getModelAndSequenceFromArtifact', () => { expect(result).toEqual({ model: Model.Kimi_K2_5, sequence: Sequence.EightK_OneK }); }); + it('parses structured artifact with dsv4 prefix and 8k/256 ISL/OSL', () => { + const result = getModelAndSequenceFromArtifact({ + infmax_model_prefix: 'dsv4', + isl: 8192, + osl: 256, + }); + expect(result).toEqual({ model: Model.DeepSeek_V4_Pro, sequence: Sequence.EightK_256 }); + }); + it('returns undefined for unknown model prefix', () => { const result = getModelAndSequenceFromArtifact({ infmax_model_prefix: 'unknown', @@ -206,6 +215,7 @@ describe('getSequenceLabel', () => { expect(getSequenceLabel(Sequence.OneK_OneK)).toBe('1K / 1K'); expect(getSequenceLabel(Sequence.OneK_EightK)).toBe('1K / 8K'); expect(getSequenceLabel(Sequence.EightK_OneK)).toBe('8K / 1K'); + expect(getSequenceLabel(Sequence.EightK_256)).toBe('8K / 256'); }); it('falls back to the sequence value for unknown sequence', () => { diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts index ad7d4ad7..2dcfd0f7 100644 --- a/packages/app/src/lib/data-mappings.ts +++ b/packages/app/src/lib/data-mappings.ts @@ -1,3 +1,5 @@ +import { islOslToSequence } from '@semianalysisai/inferencex-constants'; + export enum Model { Llama3_3_70B = 'Llama-3.3-70B-Instruct-FP8', Llama3_1_70B = 'Llama-3.1-70B-Instruct-FP8-KV', @@ -118,6 +120,7 @@ export enum Sequence { OneK_OneK = '1k/1k', OneK_EightK = '1k/8k', EightK_OneK = '8k/1k', + EightK_256 = '8k/256', } const SEQUENCE_CONFIG: Record = @@ -125,6 +128,7 @@ const SEQUENCE_CONFIG: Record { expect(sequenceToIslOsl('8k/1k')).toEqual({ isl: 8192, osl: 1024 }); }); + it('converts 8k/256 to 8192/256', () => { + expect(sequenceToIslOsl('8k/256')).toEqual({ isl: 8192, osl: 256 }); + }); + it('returns null for unknown sequences', () => { expect(sequenceToIslOsl('4k/4k')).toBeNull(); expect(sequenceToIslOsl('')).toBeNull(); @@ -67,13 +71,17 @@ describe('islOslToSequence', () => { expect(islOslToSequence(8192, 1024)).toBe('8k/1k'); }); + it('converts 8192/256 to 8k/256', () => { + expect(islOslToSequence(8192, 256)).toBe('8k/256'); + }); + it('returns null for unknown ISL/OSL combos', () => { expect(islOslToSequence(4096, 4096)).toBeNull(); expect(islOslToSequence(0, 0)).toBeNull(); }); it('round-trips with sequenceToIslOsl', () => { - for (const seq of ['1k/1k', '1k/8k', '8k/1k']) { + for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256']) { const islOsl = sequenceToIslOsl(seq)!; expect(islOslToSequence(islOsl.isl, islOsl.osl)).toBe(seq); } diff --git a/packages/constants/src/models.test.ts b/packages/constants/src/models.test.ts index 308c9c2a..515fcf65 100644 --- a/packages/constants/src/models.test.ts +++ b/packages/constants/src/models.test.ts @@ -39,6 +39,10 @@ describe('sequenceToIslOsl', () => { expect(sequenceToIslOsl('8k/1k')).toEqual({ isl: 8192, osl: 1024 }); }); + it('parses 8k/256 to 8192/256', () => { + expect(sequenceToIslOsl('8k/256')).toEqual({ isl: 8192, osl: 256 }); + }); + it('returns null for unknown sequences', () => { expect(sequenceToIslOsl('2k/2k')).toBeNull(); expect(sequenceToIslOsl('')).toBeNull(); @@ -55,13 +59,17 @@ describe('islOslToSequence', () => { expect(islOslToSequence(1024, 8192)).toBe('1k/8k'); }); + it('converts 8192/256 to 8k/256', () => { + expect(islOslToSequence(8192, 256)).toBe('8k/256'); + }); + it('returns null for unmapped ISL/OSL pairs', () => { expect(islOslToSequence(2048, 2048)).toBeNull(); expect(islOslToSequence(0, 0)).toBeNull(); }); it('round-trips with sequenceToIslOsl for all known sequences', () => { - for (const seq of ['1k/1k', '1k/8k', '8k/1k']) { + for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256']) { const parsed = sequenceToIslOsl(seq)!; expect(islOslToSequence(parsed.isl, parsed.osl)).toBe(seq); } diff --git a/packages/constants/src/models.ts b/packages/constants/src/models.ts index c75034c7..83b6540c 100644 --- a/packages/constants/src/models.ts +++ b/packages/constants/src/models.ts @@ -41,6 +41,7 @@ export function sequenceToIslOsl(seq: string): { isl: number; osl: number } | nu '1k/1k': { isl: 1024, osl: 1024 }, '1k/8k': { isl: 1024, osl: 8192 }, '8k/1k': { isl: 8192, osl: 1024 }, + '8k/256': { isl: 8192, osl: 256 }, }; return map[seq] ?? null; } @@ -51,6 +52,7 @@ export function islOslToSequence(isl: number, osl: number): string | null { '1024_1024': '1k/1k', '1024_8192': '1k/8k', '8192_1024': '8k/1k', + '8192_256': '8k/256', }; return map[`${isl}_${osl}`] ?? null; } From 5e327a74346815689b8710a60b441bd7d8c2af32 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 04:26:56 +0000 Subject: [PATCH 04/10] feat(unofficial): toggle to render unofficial-run rows as ingested series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a "Show as ingested" switch in the inference scatter legend (visible when an unofficial run is loaded) that promotes overlay rows to first-class points so they participate in the same filter pipeline as DB-ingested data: hardware toggles, Optimal-only, precision filtering, and per-(hwKey, run) roofline grouping. Each (run, original hwKey) pair is rewritten to a synthesized hwKey of the form `${origHwKey}__uorun${runId}` — preserving the GPU base prefix so `getModelSortIndex` and `isKnownGpu` keep working — with a synthesized HardwareEntry whose label embeds the run branch and a per-run color override drawn from the existing overlay palette. Multiple runs and per-run multi-GPU configs each surface as their own legend entry. URL-synced via `i_uoff_ingested=1` so the state survives reloads and shares. The X-shape overlay layer is suppressed and stale DOM is cleaned up when the toggle flips on (the layer system has no built-in teardown for layers that drop out of the array). Unit tests cover synth-key encoding, no-op behavior when no overlay matches, and multi-run separation. Co-authored-by: Bryan Shan --- packages/app/cypress/support/mock-data.ts | 3 + .../components/inference/InferenceContext.tsx | 62 +++- .../app/src/components/inference/types.ts | 6 + .../components/inference/ui/ChartDisplay.tsx | 17 +- .../components/inference/ui/ScatterGraph.tsx | 42 ++- .../components/unofficial-run-provider.tsx | 37 ++ packages/app/src/lib/unofficial-merge.test.ts | 320 ++++++++++++++++++ packages/app/src/lib/unofficial-merge.ts | 220 ++++++++++++ packages/app/src/lib/url-state.ts | 2 + 9 files changed, 702 insertions(+), 7 deletions(-) create mode 100644 packages/app/src/lib/unofficial-merge.test.ts create mode 100644 packages/app/src/lib/unofficial-merge.ts diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts index 467cbd59..0b9769c2 100644 --- a/packages/app/cypress/support/mock-data.ts +++ b/packages/app/cypress/support/mock-data.ts @@ -246,6 +246,7 @@ export function createMockInferenceContext( activePresetId: null, setActivePresetId: namedStub('setActivePresetId'), presetGuardRef: { current: false } as React.RefObject, + hwColorOverrides: {}, ...overrides, }; } @@ -435,6 +436,8 @@ export function createMockUnofficialRunContext( ): UnofficialRunContextType { return { isUnofficialRun: false, + mergeAsIngested: false, + setMergeAsIngested: namedStub('setMergeAsIngested'), unofficialRunInfo: null, unofficialRunInfos: [], runIndexByUrl: {}, diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index 633a6269..8f0c23c9 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -17,10 +17,14 @@ import { FAVORITE_PRESETS, type FavoritePreset } from '@/components/favorites/fa import { useGlobalFilters } from '@/components/GlobalFilterContext'; import type { + ChartDefinition, InferenceChartContextType, InferenceData, TrackedConfig, } from '@/components/inference/types'; +import { useUnofficialRun } from '@/components/unofficial-run-provider'; +import chartDefinitions from '@/components/inference/inference-chart-config.json'; +import { mergeUnofficialIntoOfficial } from '@/lib/unofficial-merge'; import { Button } from '@/components/ui/button'; import { Dialog, @@ -144,10 +148,10 @@ export function InferenceProvider({ const latestDate = availableDates.length > 0 ? availableDates.at(-1) : undefined; const { - graphs, + graphs: officialGraphs, loading: chartDataLoading, error: chartDataError, - hardwareConfig, + hardwareConfig: officialHardwareConfig, } = useChartData( selectedModel, effectiveSequence, @@ -165,6 +169,58 @@ export function InferenceProvider({ latestDate, ); + // ── Promote unofficial rows to first-class series when toggled ──────────── + // When `mergeAsIngested` is on, overlay points are re-keyed with per-run + // synth hwKeys and merged into `graphs` so they participate in the same + // filter/optimal-only/legend pipeline as official data. The resulting + // `hwColorOverrides` map is consumed by ScatterGraph's color resolver. + const { mergeAsIngested, unofficialChartData, unofficialRunInfos, runIndexByUrl } = + useUnofficialRun(); + + const { graphs, hardwareConfig, hwColorOverrides } = useMemo(() => { + if (!mergeAsIngested) { + return { + graphs: officialGraphs, + hardwareConfig: officialHardwareConfig, + hwColorOverrides: {} as Record, + }; + } + const merged = mergeUnofficialIntoOfficial({ + graphs: officialGraphs, + hardwareConfig: officialHardwareConfig, + unofficialChartData, + selectedModel, + selectedSequence: effectiveSequence, + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + runIndexByUrl, + unofficialRunInfos: unofficialRunInfos.map((r) => ({ + id: r.id, + branch: r.branch, + url: r.url, + })), + chartDefinitions: chartDefinitions as ChartDefinition[], + }); + return { + graphs: merged.graphs, + hardwareConfig: merged.hardwareConfig, + hwColorOverrides: merged.colorOverrides, + }; + }, [ + mergeAsIngested, + officialGraphs, + officialHardwareConfig, + unofficialChartData, + selectedModel, + effectiveSequence, + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + runIndexByUrl, + unofficialRunInfos, + ]); + // For GPU comparison date picker — use shared availability data from global filters const dbModelKeys = useMemo( () => DISPLAY_MODEL_TO_DB[selectedModel] ?? [selectedModel], @@ -833,6 +889,7 @@ export function InferenceProvider({ activePresetId, setActivePresetId, presetGuardRef, + hwColorOverrides, }), [ activeHwTypes, @@ -884,6 +941,7 @@ export function InferenceProvider({ removeTrackedConfig, clearTrackedConfigs, activePresetId, + hwColorOverrides, ], ); diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index 365923da..db67b2e8 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -527,6 +527,12 @@ export interface InferenceChartContextType { activePresetId: string | null; setActivePresetId: (id: string | null) => void; presetGuardRef: React.RefObject; + /** + * Per-hwKey CSS color overrides. Populated when unofficial-as-ingested + * merging is on so each synthesized run series gets the same per-run color + * the overlay legend would have shown. Empty when nothing is merged. + */ + hwColorOverrides: Record; } export interface CalculateUserCostsRequest { model: string; diff --git a/packages/app/src/components/inference/ui/ChartDisplay.tsx b/packages/app/src/components/inference/ui/ChartDisplay.tsx index 9b563bfc..5f8c8f68 100644 --- a/packages/app/src/components/inference/ui/ChartDisplay.tsx +++ b/packages/app/src/components/inference/ui/ChartDisplay.tsx @@ -165,12 +165,20 @@ export default function ChartDisplay() { track('inference_view_changed', { view: value, chartIndex: index }); }; - const { unofficialRunInfo, unofficialRunInfos, runIndexByUrl, getOverlayData, isUnofficialRun } = - useUnofficialRun(); + const { + unofficialRunInfo, + unofficialRunInfos, + runIndexByUrl, + getOverlayData, + isUnofficialRun, + mergeAsIngested, + } = useUnofficialRun(); - // Compute overlay data for each chart type — must match useChartData processing + // Compute overlay data for each chart type — must match useChartData processing. + // When `mergeAsIngested` is on, the unofficial rows are already promoted to + // official series via InferenceContext, so suppress the X-shape overlay layer. const overlayDataByChartType = useMemo(() => { - if (!unofficialRunInfo || !getOverlayData) { + if (mergeAsIngested || !unofficialRunInfo || !getOverlayData) { return { e2e: null, interactivity: null }; } @@ -224,6 +232,7 @@ export default function ChartDisplay() { interactivity: processData(interactivityRaw, 'interactivity'), }; }, [ + mergeAsIngested, unofficialRunInfo, unofficialRunInfos, runIndexByUrl, diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index ba8b6a42..35cf8129 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -144,10 +144,13 @@ const ScatterGraph = React.memo( trackedConfigs, addTrackedConfig, removeTrackedConfig, + hwColorOverrides, } = useInference(); const { isUnofficialRun, + mergeAsIngested, + setMergeAsIngested, activeOverlayHwTypes, setActiveOverlayHwTypes, allOverlayHwTypes, @@ -213,12 +216,23 @@ const ScatterGraph = React.memo( () => [...effectiveOfficialHwTypes], [effectiveOfficialHwTypes], ); - const { resolveColor, getCssColor } = useThemeColors({ + const { resolveColor: baseResolveColor, getCssColor } = useThemeColors({ highContrast, identifiers: activeHwKeys, activeKeys: activeOfficialKeys, }); + // Wrap resolveColor so synthesized unofficial-as-ingested hwKeys (provided + // by InferenceContext via `hwColorOverrides`) get their per-run palette + // color even when the vendor system would otherwise pick a GPU-derived hue. + const resolveColor = useCallback( + (identifier: string, hardwareKey?: string): string => { + if (identifier in hwColorOverrides) return hwColorOverrides[identifier]; + return baseResolveColor(identifier, hardwareKey); + }, + [baseResolveColor, hwColorOverrides], + ); + // --- Changelog --- const changelog = availableRuns ? availableRuns[selectedRunId]?.changelog || null : null; const highlightConfigSuffixes = useMemo(() => { @@ -1610,6 +1624,19 @@ const ScatterGraph = React.memo( chartRef.current?.dismissTooltip(); }, [selectedPrecisions, selectedYAxisMetric, hideNonOptimal, overlayData, chartId]); + // Clean up overlay DOM elements when overlayData is removed (e.g. when + // unofficial-as-ingested is toggled on). The layer system has no built-in + // teardown for layers that drop out of the array, so the previous render's + // X-shape points / dashed rooflines would otherwise stick around. + useEffect(() => { + if (overlayData) return; + const svg = chartRef.current?.getSvgElement?.(); + if (!svg) return; + const root = d3.select(svg); + root.selectAll('.unofficial-overlay-pt').remove(); + root.selectAll('.overlay-roofline-path').remove(); + }, [overlayData]); + // Dismiss when pinned point's hardware becomes hidden useEffect(() => { const pp = chartRef.current?.getPinnedPoint() as InferenceData | null; @@ -1771,6 +1798,19 @@ const ScatterGraph = React.memo( track('latency_legend_expanded', { expanded }); }} switches={[ + ...(isUnofficialRun + ? [ + { + id: 'scatter-uoff-as-ingested', + label: 'Show as ingested', + checked: mergeAsIngested, + onCheckedChange: (checked: boolean) => { + setMergeAsIngested(checked); + track('latency_unofficial_as_ingested_toggled', { enabled: checked }); + }, + }, + ] + : []), ...(selectedYAxisMetric !== 'y_inputTputPerGpu' ? [ { diff --git a/packages/app/src/components/unofficial-run-provider.tsx b/packages/app/src/components/unofficial-run-provider.tsx index 9de84519..c56a2ed9 100644 --- a/packages/app/src/components/unofficial-run-provider.tsx +++ b/packages/app/src/components/unofficial-run-provider.tsx @@ -51,6 +51,15 @@ interface AvailableModelSequence { export interface UnofficialRunContextType { isUnofficialRun: boolean; + /** + * When true, unofficial-run rows are promoted to first-class series in the + * inference scatter — each (run, GPU config) pair becomes its own legend + * entry with the run's branch name, and the rows participate in the same + * filter pipeline as ingested data (Optimal-only, hardware toggles, etc.) + * instead of rendering as a separate X-shape overlay. + */ + mergeAsIngested: boolean; + setMergeAsIngested: (v: boolean) => void; /** First run in the loaded set — kept as a convenience alias for overlay labels. */ unofficialRunInfo: UnofficialRunInfo | null; /** All runs loaded from the `unofficialrun(s)` URL param (comma-separated). */ @@ -176,6 +185,32 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { AvailableModelSequence[] >([]); + // Promote unofficial rows to ingested-style series. Initial value seeded + // synchronously when running in the browser so the toggle starts checked + // when the user shares a URL like `?unofficialrun=…&i_uoff_ingested=1`. + // Under SSR the value is false; we sync from the URL again after mount via + // the popstate listener attached below. + const [mergeAsIngested, setMergeAsIngestedRaw] = useState(() => { + if (typeof window === 'undefined') return false; + const sp = new URLSearchParams(window.location.search); + return sp.get('i_uoff_ingested') === '1'; + }); + // Re-sync after hydration in case the server rendered with the SSR default. + useEffect(() => { + if (typeof window === 'undefined') return; + const sp = new URLSearchParams(window.location.search); + const fromUrl = sp.get('i_uoff_ingested') === '1'; + setMergeAsIngestedRaw((prev) => (prev !== fromUrl ? fromUrl : prev)); + }, []); + const setMergeAsIngested = useCallback((v: boolean) => { + setMergeAsIngestedRaw(v); + if (typeof window === 'undefined') return; + const url = new URL(window.location.href); + if (v) url.searchParams.set('i_uoff_ingested', '1'); + else url.searchParams.delete('i_uoff_ingested'); + window.history.replaceState({}, '', url); + }, []); + // --- Shared overlay toggle state (unified across both charts) --- const [activeOverlayHwTypes, setActiveOverlayHwTypes] = useState>(new Set()); const [localOfficialOverride, setLocalOfficialOverrideRaw] = useState | null>(null); @@ -398,6 +433,8 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { 0, + mergeAsIngested, + setMergeAsIngested, unofficialRunInfo, unofficialRunInfos, runIndexByUrl, diff --git a/packages/app/src/lib/unofficial-merge.test.ts b/packages/app/src/lib/unofficial-merge.test.ts new file mode 100644 index 00000000..971953b6 --- /dev/null +++ b/packages/app/src/lib/unofficial-merge.test.ts @@ -0,0 +1,320 @@ +import { describe, expect, it } from 'vitest'; + +import type { + ChartDefinition, + HardwareConfig, + InferenceData, + RenderableGraph, +} from '@/components/inference/types'; + +import { + isSynthHwKey, + makeSynthHwKey, + mergeUnofficialIntoOfficial, + parseSynthHwKey, + type UnofficialChartDataMap, +} from './unofficial-merge'; + +const E2E_DEF: ChartDefinition = { + chartType: 'e2e', + x: 'median_e2el', + y: 'tput_per_gpu', + x_label: 'End-to-end Latency (s)', + y_label: 'Throughput per GPU (tok/s/GPU)', + heading: 'Throughput vs Latency', + y_tpPerGpu_label: 'Throughput per GPU (tok/s/GPU)', +} as unknown as ChartDefinition; + +const INTERACTIVITY_DEF: ChartDefinition = { + chartType: 'interactivity', + x: 'median_intvty', + y: 'tput_per_gpu', + x_label: 'Interactivity (tok/s/user)', + y_label: 'Throughput per GPU (tok/s/GPU)', + heading: 'Throughput vs Interactivity', + y_tpPerGpu_label: 'Throughput per GPU (tok/s/GPU)', +} as unknown as ChartDefinition; + +const CHART_DEFS: ChartDefinition[] = [E2E_DEF, INTERACTIVITY_DEF]; + +function makeOverlayPoint(overrides: Partial = {}): InferenceData { + return { + hwKey: 'h100_vllm', + precision: 'fp8', + tp: 8, + conc: 64, + x: 0, + y: 0, + median_e2el: 2.3, + median_intvty: 12.5, + p99_ttft: 0.35, + median_ttft: 0.15, + tpPerGpu: { y: 450.5, roof: false }, + date: '2026-04-01', + run_url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/100', + ...overrides, + } as InferenceData; +} + +function makeOverlayChartData(): UnofficialChartDataMap { + const e2eData = [ + makeOverlayPoint({ conc: 32 }), + makeOverlayPoint({ + hwKey: 'a100_sglang', + conc: 64, + tpPerGpu: { y: 200.1, roof: false }, + }), + ]; + const interactivityData = [ + makeOverlayPoint({ conc: 32 }), + makeOverlayPoint({ + hwKey: 'a100_sglang', + conc: 64, + tpPerGpu: { y: 200.1, roof: false }, + }), + ]; + const gpus: HardwareConfig = { + h100_vllm: { name: 'h100_vllm', label: 'H100', suffix: '(VLLM)', gpu: 'NVIDIA H100' }, + a100_sglang: { name: 'a100_sglang', label: 'A100', suffix: '(SGLANG)', gpu: 'NVIDIA A100' }, + }; + return { + 'DeepSeek-R1-0528_1k/1k': { + e2e: { data: e2eData, gpus }, + interactivity: { data: interactivityData, gpus }, + }, + }; +} + +function emptyOfficial(): { graphs: RenderableGraph[]; hardwareConfig: HardwareConfig } { + return { + graphs: [ + { model: 'DeepSeek-R1-0528', sequence: '1k/1k', chartDefinition: E2E_DEF, data: [] }, + { + model: 'DeepSeek-R1-0528', + sequence: '1k/1k', + chartDefinition: INTERACTIVITY_DEF, + data: [], + }, + ], + hardwareConfig: {}, + }; +} + +const RUN_INDEX = { + 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/100': 0, + '100': 0, + 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/200': 1, + '200': 1, +}; + +const RUN_INFOS = [ + { + id: 100, + branch: 'feature-branch-a', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/100', + }, + { + id: 200, + branch: 'feature-branch-b', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/200', + }, +]; + +describe('synth hwKey helpers', () => { + it('encodes runId into hwKey while preserving the GPU base prefix', () => { + const synth = makeSynthHwKey('h100_vllm', 100); + expect(synth).toBe('h100_vllm__uorun100'); + // Critical: the base GPU is still recoverable via split('_')[0] so + // getModelSortIndex / isKnownGpu keep working. + expect(synth.split('_')[0]).toBe('h100'); + }); + + it('round-trips through parseSynthHwKey', () => { + const synth = makeSynthHwKey('a100_sglang', 200); + expect(parseSynthHwKey(synth)).toEqual({ origHwKey: 'a100_sglang', runId: 200 }); + }); + + it('parseSynthHwKey returns null for non-synth keys', () => { + expect(parseSynthHwKey('h100_vllm')).toBeNull(); + expect(parseSynthHwKey('mi300x')).toBeNull(); + }); + + it('isSynthHwKey detects synthesized keys', () => { + expect(isSynthHwKey(makeSynthHwKey('h100', 100))).toBe(true); + expect(isSynthHwKey('h100_vllm')).toBe(false); + }); +}); + +describe('mergeUnofficialIntoOfficial', () => { + it('is a no-op when unofficialChartData is null', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: null, + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: {}, + unofficialRunInfos: [], + }); + expect(result.graphs).toBe(graphs); + expect(result.hardwareConfig).toBe(hardwareConfig); + expect(result.colorOverrides).toEqual({}); + }); + + it('is a no-op when no overlay group matches the selected (model, sequence)', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'gpt-oss-120b', // not present in overlay map + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + expect(result.graphs).toBe(graphs); + expect(result.colorOverrides).toEqual({}); + }); + + it('rewrites overlay rows with synth hwKeys and adds matching hardwareConfig + colorOverrides', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + // Each chart graph received both overlay rows (different GPUs, both run 100). + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + expect(e2eGraph.data).toHaveLength(2); + const synthKeys = e2eGraph.data.map((d) => d.hwKey); + expect(synthKeys).toContain('h100_vllm__uorun100'); + expect(synthKeys).toContain('a100_sglang__uorun100'); + + // The synth keys are present in hardwareConfig with branch-bearing labels. + const h100Synth = result.hardwareConfig['h100_vllm__uorun100']; + expect(h100Synth.label).toBe('H100 • feature-branch-a'); + expect(h100Synth.gpu).toContain('UNOFFICIAL: feature-branch-a'); + + // Color overrides are populated for each synth key (palette-based, not GPU-vendor). + expect(result.colorOverrides['h100_vllm__uorun100']).toBe('var(--overlay-run-0)'); + expect(result.colorOverrides['a100_sglang__uorun100']).toBe('var(--overlay-run-0)'); + }); + + it('keeps multiple runs separate so each (run, GPU) becomes its own legend entry', () => { + const data = makeOverlayChartData(); + // Inject a second run's row alongside the first. + const secondRunPoint = makeOverlayPoint({ + hwKey: 'h100_vllm', + run_url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/200', + tpPerGpu: { y: 460, roof: false }, + }); + data['DeepSeek-R1-0528_1k/1k'].e2e.data.push(secondRunPoint); + data['DeepSeek-R1-0528_1k/1k'].interactivity.data.push(secondRunPoint); + + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: data, + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + // Same physical GPU (h100_vllm) appears twice — once per run — with distinct + // synth keys so they form separate roofline groups in the scatter chart. + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + const h100Keys = e2eGraph.data + .map((d) => d.hwKey) + .filter((k) => String(k).startsWith('h100_vllm__uorun')); + expect(h100Keys).toContain('h100_vllm__uorun100'); + expect(h100Keys).toContain('h100_vllm__uorun200'); + + expect(result.hardwareConfig['h100_vllm__uorun200'].label).toBe('H100 • feature-branch-b'); + expect(result.colorOverrides['h100_vllm__uorun100']).toBe('var(--overlay-run-0)'); + expect(result.colorOverrides['h100_vllm__uorun200']).toBe('var(--overlay-run-1)'); + }); + + it('preserves official rows alongside merged overlay rows', () => { + const { hardwareConfig } = emptyOfficial(); + const officialPoint = { + hwKey: 'b200_trt', + precision: 'fp4', + tp: 4, + conc: 8, + x: 1.5, + y: 800, + date: '2026-03-01', + } as InferenceData; + const graphs: RenderableGraph[] = [ + { + model: 'DeepSeek-R1-0528', + sequence: '1k/1k', + chartDefinition: E2E_DEF, + data: [officialPoint], + }, + { + model: 'DeepSeek-R1-0528', + sequence: '1k/1k', + chartDefinition: INTERACTIVITY_DEF, + data: [officialPoint], + }, + ]; + + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + expect(e2eGraph.data.some((d) => d.hwKey === 'b200_trt')).toBe(true); + expect(e2eGraph.data.some((d) => String(d.hwKey).startsWith('h100_vllm__uorun'))).toBe(true); + }); + + it('synthesizes stub graphs from chartDefinitions when official graphs is empty', () => { + const result = mergeUnofficialIntoOfficial({ + graphs: [], + hardwareConfig: {}, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + chartDefinitions: CHART_DEFS, + }); + + // Two stub graphs synthesized (e2e + interactivity), each carrying merged overlay rows. + expect(result.graphs).toHaveLength(2); + expect(result.graphs.every((g) => g.data.length > 0)).toBe(true); + }); +}); diff --git a/packages/app/src/lib/unofficial-merge.ts b/packages/app/src/lib/unofficial-merge.ts new file mode 100644 index 00000000..e29db2e8 --- /dev/null +++ b/packages/app/src/lib/unofficial-merge.ts @@ -0,0 +1,220 @@ +/** + * Helpers for promoting unofficial-run benchmark rows to first-class + * "ingested-style" series so they participate in the regular scatter + * filter pipeline (Optimal-only, hardware toggles, precision filter, etc.) + * instead of being rendered as a separate overlay layer. + * + * Each (run, original hwKey) pair gets a synthesized hardware key of the form + * `${origHwKey}__uorun${runId}` + * — preserving the base GPU as `hwKey.split('_')[0]` so `getModelSortIndex` + * and `isKnownGpu` keep working — while still being unique per run so a single + * job with multiple GPUs surfaces as separate legend entries, and multiple + * runs don't collapse onto each other. + */ +import type { + ChartDefinition, + HardwareConfig, + InferenceData, + RenderableGraph, +} from '@/components/inference/types'; +import { processOverlayChartData } from '@/components/inference/utils'; +import type { HardwareEntry } from '@/lib/constants'; +import { overlayRunColor, overlayRunIndex } from '@/lib/overlay-run-style'; + +const SYNTH_KEY_DELIM = '__uorun'; + +export interface UnofficialRunInfoLite { + id: number; + branch: string; + url: string; +} + +export interface OverlayChartGroup { + e2e: { data: InferenceData[]; gpus: HardwareConfig }; + interactivity: { data: InferenceData[]; gpus: HardwareConfig }; +} + +export type UnofficialChartDataMap = Record; + +/** Build a unique per-run hwKey while keeping the original GPU base prefix. */ +export function makeSynthHwKey(origHwKey: string, runId: number): string { + return `${origHwKey}${SYNTH_KEY_DELIM}${runId}`; +} + +/** Reverse the encoding produced by {@link makeSynthHwKey}. */ +export function parseSynthHwKey(hwKey: string): { origHwKey: string; runId: number } | null { + const idx = hwKey.indexOf(SYNTH_KEY_DELIM); + if (idx === -1) return null; + const origHwKey = hwKey.slice(0, idx); + const runId = Number(hwKey.slice(idx + SYNTH_KEY_DELIM.length)); + if (!Number.isFinite(runId)) return null; + return { origHwKey, runId }; +} + +export function isSynthHwKey(hwKey: string): boolean { + return hwKey.includes(SYNTH_KEY_DELIM); +} + +function makeSynthHardwareEntry( + origEntry: HardwareEntry | undefined, + origHwKey: string, + run: UnofficialRunInfoLite, + synthHwKey: string, +): HardwareEntry { + const branch = run.branch || `run ${run.id}`; + const baseLabel = origEntry?.label ?? origHwKey; + return { + name: synthHwKey.replaceAll('_', '-'), + label: `${baseLabel} • ${branch}`, + suffix: origEntry?.suffix ?? '', + gpu: origEntry?.gpu ? `${origEntry.gpu} (UNOFFICIAL: ${branch})` : `UNOFFICIAL: ${branch}`, + framework: origEntry?.framework, + }; +} + +interface MergeArgs { + graphs: RenderableGraph[]; + hardwareConfig: HardwareConfig; + /** + * Per-(model_sequence) overlay chart data, indexed exactly as produced by + * {@link unofficial-run-provider#buildChartData}. We look up the entry for + * the currently-selected `${model}_${sequence}` key. + */ + unofficialChartData: UnofficialChartDataMap | null; + selectedModel: string; + selectedSequence: string; + selectedYAxisMetric: string; + selectedXAxisMetric: string | null; + selectedE2eXAxisMetric: string | null; + runIndexByUrl: Record; + unofficialRunInfos: UnofficialRunInfoLite[]; + /** + * Chart definitions to fall back on when `graphs` is empty. Lets the merger + * synthesize stub graphs so unofficial-only data (e.g. a model with no DB + * coverage but an unofficial sweep) still renders when the toggle is on. + * Optional — when omitted and `graphs` is empty, the merge is a no-op. + */ + chartDefinitions?: ChartDefinition[]; +} + +export interface MergeResult { + graphs: RenderableGraph[]; + hardwareConfig: HardwareConfig; + /** Map from synth hwKey → CSS color. ScatterGraph consults this before falling back to vendor colors. */ + colorOverrides: Record; +} + +/** + * Inject overlay rows into the official `graphs` as first-class points with + * synthesized per-run hwKeys, returning extended `hardwareConfig` and a + * color-override map for ScatterGraph's `resolveColor`. + * + * If `unofficialChartData` is null or has no rows for the selected + * (model, sequence), the result mirrors the input verbatim — the merge is a + * no-op and downstream behavior is unchanged. + */ +export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { + const { + graphs: inputGraphs, + hardwareConfig, + unofficialChartData, + selectedModel, + selectedSequence, + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + runIndexByUrl, + unofficialRunInfos, + chartDefinitions, + } = args; + + const dataKey = `${selectedModel}_${selectedSequence}`; + const overlayGroup = unofficialChartData?.[dataKey]; + if (!overlayGroup) { + return { graphs: inputGraphs, hardwareConfig, colorOverrides: {} }; + } + + // When there are no official graphs but caller supplied chartDefinitions, + // synthesize empty stubs so the merge still has a place to inject points. + const graphs: RenderableGraph[] = + inputGraphs.length === 0 && chartDefinitions + ? buildStubGraphsForMerge(selectedModel, selectedSequence, chartDefinitions) + : inputGraphs; + + const mergedHardwareConfig: HardwareConfig = { ...hardwareConfig }; + const colorOverrides: Record = {}; + + /** + * Process overlay rows for one chart type: re-key by (run, origHwKey), + * synthesize HardwareEntry/colorOverride entries on first encounter, and + * apply the same metric/x-axis pipeline that `useChartData` runs on + * official rows so the resulting points sit in the same coordinate space. + */ + const processForChart = ( + chartType: 'e2e' | 'interactivity', + rawRows: InferenceData[], + overlayHwConfig: HardwareConfig, + ): InferenceData[] => { + if (rawRows.length === 0) return []; + const effectiveXMetric = chartType === 'e2e' ? selectedE2eXAxisMetric : selectedXAxisMetric; + const processed = processOverlayChartData( + rawRows, + chartType, + selectedYAxisMetric, + effectiveXMetric, + ); + return processed.map((row) => { + const runIdx = overlayRunIndex(row.run_url ?? null, runIndexByUrl); + const run = unofficialRunInfos[runIdx] ?? unofficialRunInfos[0]; + // No runs known (defensive — provider always populates one when overlay + // data exists). Fall back to the original hwKey untouched. + if (!run) return row; + const origHwKey = String(row.hwKey); + const synthHwKey = makeSynthHwKey(origHwKey, run.id); + if (!(synthHwKey in mergedHardwareConfig)) { + const origEntry = hardwareConfig[origHwKey] ?? overlayHwConfig[origHwKey]; + mergedHardwareConfig[synthHwKey] = makeSynthHardwareEntry( + origEntry, + origHwKey, + run, + synthHwKey, + ); + colorOverrides[synthHwKey] = overlayRunColor(runIdx); + } + return { ...row, hwKey: synthHwKey }; + }); + }; + + const mergedGraphs: RenderableGraph[] = graphs.map((g) => { + const ct = g.chartDefinition.chartType as 'e2e' | 'interactivity'; + const overlayRows = ct === 'e2e' ? overlayGroup.e2e.data : overlayGroup.interactivity.data; + const overlayHwCfg = ct === 'e2e' ? overlayGroup.e2e.gpus : overlayGroup.interactivity.gpus; + const merged = processForChart(ct, overlayRows, overlayHwCfg); + if (merged.length === 0) return g; + return { ...g, data: [...g.data, ...merged] }; + }); + + return { + graphs: mergedGraphs, + hardwareConfig: mergedHardwareConfig, + colorOverrides, + }; +} + +/** + * Build empty-data stub graphs from chart definitions, used when the official + * model has no DB data but we still want the unofficial rows to render after + * merge. Mirrors `effectiveGraphs` in ChartDisplay's no-data fallback. + */ +export function buildStubGraphsForMerge( + selectedModel: string, + selectedSequence: string, + chartDefinitions: ChartDefinition[], +): RenderableGraph[] { + return chartDefinitions.map((chartDefinition) => ({ + model: selectedModel, + sequence: selectedSequence, + chartDefinition, + data: [] as InferenceData[], + })); +} diff --git a/packages/app/src/lib/url-state.ts b/packages/app/src/lib/url-state.ts index e770caea..bcd79a01 100644 --- a/packages/app/src/lib/url-state.ts +++ b/packages/app/src/lib/url-state.ts @@ -37,6 +37,7 @@ const URL_STATE_KEYS = [ 'i_advlabel', 'i_gradlabel', 'i_linelabel', + 'i_uoff_ingested', // Evaluation 'e_rundate', 'e_bench', @@ -76,6 +77,7 @@ export const PARAM_DEFAULTS: Record = { i_advlabel: '', i_gradlabel: '', i_linelabel: '', + i_uoff_ingested: '', e_rundate: '', e_bench: '', e_hc: '', From 323d1b4e1f0a677ffab2f442e97a424c171fe577 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 06:25:04 +0000 Subject: [PATCH 05/10] feat(unofficial): use vendor-zone colors for ingested unofficial rows Drops the branch suffix from synth-key labels and the per-run color override so unofficial rows promoted via "Show as ingested" share the same vendor-aware palette as official rows. Two NVIDIA GPUs from one unofficial run now get distinct shades of green instead of one shared overlay-palette red. The branch + run URL still surface in `gpu` so the row tooltip retains provenance. Co-authored-by: Bryan Shan --- packages/app/src/lib/unofficial-merge.test.ts | 80 ++++++++++++++++--- packages/app/src/lib/unofficial-merge.ts | 29 +++++-- 2 files changed, 93 insertions(+), 16 deletions(-) diff --git a/packages/app/src/lib/unofficial-merge.test.ts b/packages/app/src/lib/unofficial-merge.test.ts index 971953b6..713dd2f7 100644 --- a/packages/app/src/lib/unofficial-merge.test.ts +++ b/packages/app/src/lib/unofficial-merge.test.ts @@ -6,6 +6,7 @@ import type { InferenceData, RenderableGraph, } from '@/components/inference/types'; +import { generateVendorColors, getVendor } from '@/lib/dynamic-colors'; import { isSynthHwKey, @@ -183,7 +184,7 @@ describe('mergeUnofficialIntoOfficial', () => { expect(result.colorOverrides).toEqual({}); }); - it('rewrites overlay rows with synth hwKeys and adds matching hardwareConfig + colorOverrides', () => { + it('rewrites overlay rows with synth hwKeys and adds matching hardwareConfig (no color override)', () => { const { graphs, hardwareConfig } = emptyOfficial(); const result = mergeUnofficialIntoOfficial({ graphs, @@ -205,14 +206,19 @@ describe('mergeUnofficialIntoOfficial', () => { expect(synthKeys).toContain('h100_vllm__uorun100'); expect(synthKeys).toContain('a100_sglang__uorun100'); - // The synth keys are present in hardwareConfig with branch-bearing labels. + // The synth keys are present in hardwareConfig with bare GPU labels — the + // branch is intentionally NOT in the legend label (the run is still + // recoverable from `gpu` for the row tooltip). const h100Synth = result.hardwareConfig['h100_vllm__uorun100']; - expect(h100Synth.label).toBe('H100 • feature-branch-a'); + expect(h100Synth.label).toBe('H100'); + expect(h100Synth.label).not.toContain('feature-branch-a'); expect(h100Synth.gpu).toContain('UNOFFICIAL: feature-branch-a'); - // Color overrides are populated for each synth key (palette-based, not GPU-vendor). - expect(result.colorOverrides['h100_vllm__uorun100']).toBe('var(--overlay-run-0)'); - expect(result.colorOverrides['a100_sglang__uorun100']).toBe('var(--overlay-run-0)'); + // No color overrides are populated — colors fall through to the + // vendor-aware system in dynamic-colors.ts so two NVIDIA GPUs from a + // single unofficial run get distinct shades of green instead of one + // shared overlay-palette color. + expect(result.colorOverrides).toEqual({}); }); it('keeps multiple runs separate so each (run, GPU) becomes its own legend entry', () => { @@ -249,9 +255,18 @@ describe('mergeUnofficialIntoOfficial', () => { expect(h100Keys).toContain('h100_vllm__uorun100'); expect(h100Keys).toContain('h100_vllm__uorun200'); - expect(result.hardwareConfig['h100_vllm__uorun200'].label).toBe('H100 • feature-branch-b'); - expect(result.colorOverrides['h100_vllm__uorun100']).toBe('var(--overlay-run-0)'); - expect(result.colorOverrides['h100_vllm__uorun200']).toBe('var(--overlay-run-1)'); + // Both runs of the same GPU get the bare GPU label — visual disambiguation + // is done by the vendor-zone color system, which assigns distinct hues + // within the same vendor band. Provenance still surfaces via `gpu`. + expect(result.hardwareConfig['h100_vllm__uorun100'].label).toBe('H100'); + expect(result.hardwareConfig['h100_vllm__uorun200'].label).toBe('H100'); + expect(result.hardwareConfig['h100_vllm__uorun100'].gpu).toContain( + 'UNOFFICIAL: feature-branch-a', + ); + expect(result.hardwareConfig['h100_vllm__uorun200'].gpu).toContain( + 'UNOFFICIAL: feature-branch-b', + ); + expect(result.colorOverrides).toEqual({}); }); it('preserves official rows alongside merged overlay rows', () => { @@ -318,3 +333,50 @@ describe('mergeUnofficialIntoOfficial', () => { expect(result.graphs.every((g) => g.data.length > 0)).toBe(true); }); }); + +// Pull a hue out of an `oklch(L C H)` string for assertions below. +function hueOf(s: string): number { + const m = s.match(/oklch\([^)]*\s+([\d.]+)\)/); + return m ? Number(m[1]) : NaN; +} + +describe('synth hwKey color integration with generateVendorColors', () => { + // Regression: previously, two NVIDIA GPUs from one unofficial run shared a + // single overlay-palette color (e.g. both rendered red), making B200 and + // B300 visually identical. Now the merge omits color overrides and the + // vendor-zone palette assigns each synth key its own hue within the + // vendor's band. + it('assigns distinct shades within the vendor zone to two NVIDIA GPUs from one unofficial run', () => { + const synthKeys = [makeSynthHwKey('b200_vllm', 100), makeSynthHwKey('b300_vllm', 100)]; + expect(getVendor(synthKeys[0])).toBe('nvidia'); + expect(getVendor(synthKeys[1])).toBe('nvidia'); + const colors = generateVendorColors(synthKeys, 'light'); + expect(colors[synthKeys[0]]).toBeDefined(); + expect(colors[synthKeys[1]]).toBeDefined(); + expect(colors[synthKeys[0]]).not.toBe(colors[synthKeys[1]]); + }); + + it('keeps NVIDIA synth keys inside the NVIDIA hue zone and AMD synth keys inside AMD', () => { + const nvidiaSynth = makeSynthHwKey('b200_vllm', 100); + const amdSynth = makeSynthHwKey('mi300x_sglang', 100); + const colors = generateVendorColors([nvidiaSynth, amdSynth], 'light'); + // VENDOR_OKLCH_ZONES.nvidia is 120–170 (greens/teals). + const nvidiaHue = hueOf(colors[nvidiaSynth]); + expect(nvidiaHue).toBeGreaterThanOrEqual(120); + expect(nvidiaHue).toBeLessThanOrEqual(170); + // VENDOR_OKLCH_ZONES.amd is 12–42 (reds/oranges). + const amdHue = hueOf(colors[amdSynth]); + expect(amdHue).toBeGreaterThanOrEqual(12); + expect(amdHue).toBeLessThanOrEqual(42); + }); + + it('does not pin two unofficial runs of the same GPU to one color', () => { + // Both synth keys share the `b200_vllm` base, so they fall in the same + // sort bucket — but generateVendorColors still spreads them across + // distinct hues within the NVIDIA zone. + const a = makeSynthHwKey('b200_vllm', 100); + const b = makeSynthHwKey('b200_vllm', 200); + const colors = generateVendorColors([a, b], 'light'); + expect(colors[a]).not.toBe(colors[b]); + }); +}); diff --git a/packages/app/src/lib/unofficial-merge.ts b/packages/app/src/lib/unofficial-merge.ts index e29db2e8..248a1e62 100644 --- a/packages/app/src/lib/unofficial-merge.ts +++ b/packages/app/src/lib/unofficial-merge.ts @@ -19,7 +19,7 @@ import type { } from '@/components/inference/types'; import { processOverlayChartData } from '@/components/inference/utils'; import type { HardwareEntry } from '@/lib/constants'; -import { overlayRunColor, overlayRunIndex } from '@/lib/overlay-run-style'; +import { overlayRunIndex } from '@/lib/overlay-run-style'; const SYNTH_KEY_DELIM = '__uorun'; @@ -63,9 +63,12 @@ function makeSynthHardwareEntry( ): HardwareEntry { const branch = run.branch || `run ${run.id}`; const baseLabel = origEntry?.label ?? origHwKey; + // Legend label intentionally drops the branch — the color (assigned by the + // shared vendor-zone palette) is what disambiguates runs/GPUs visually. + // Branch + run URL stay in `gpu` so the row tooltip still shows provenance. return { name: synthHwKey.replaceAll('_', '-'), - label: `${baseLabel} • ${branch}`, + label: baseLabel, suffix: origEntry?.suffix ?? '', gpu: origEntry?.gpu ? `${origEntry.gpu} (UNOFFICIAL: ${branch})` : `UNOFFICIAL: ${branch}`, framework: origEntry?.framework, @@ -100,7 +103,14 @@ interface MergeArgs { export interface MergeResult { graphs: RenderableGraph[]; hardwareConfig: HardwareConfig; - /** Map from synth hwKey → CSS color. ScatterGraph consults this before falling back to vendor colors. */ + /** + * Map from synth hwKey → CSS color. ScatterGraph consults this before falling + * back to vendor colors. Currently empty — synth keys preserve the original + * GPU base prefix (`b200_vllm__uorun123`), so the standard + * `generateVendorColors` pipeline picks a vendor-appropriate hue for each + * synth key automatically. The override map is retained so callers can still + * pin a specific color per synth key if needed. + */ colorOverrides: Record; } @@ -146,9 +156,15 @@ export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { /** * Process overlay rows for one chart type: re-key by (run, origHwKey), - * synthesize HardwareEntry/colorOverride entries on first encounter, and - * apply the same metric/x-axis pipeline that `useChartData` runs on - * official rows so the resulting points sit in the same coordinate space. + * synthesize a HardwareEntry on first encounter, and apply the same + * metric/x-axis pipeline that `useChartData` runs on official rows so the + * resulting points sit in the same coordinate space. + * + * No color override is set: the synth hwKey preserves the original GPU base + * prefix, so the standard vendor-zone color generator distributes hues + * across all (official + synth) keys for a vendor automatically — that's + * how two NVIDIA GPUs from one unofficial run end up as different shades + * of green rather than two copies of the same overlay-palette color. */ const processForChart = ( chartType: 'e2e' | 'interactivity', @@ -179,7 +195,6 @@ export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { run, synthHwKey, ); - colorOverrides[synthHwKey] = overlayRunColor(runIdx); } return { ...row, hwKey: synthHwKey }; }); From 462dc0f6222a47775c3efe4cdba3c2298b608af1 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 21:09:38 +0000 Subject: [PATCH 06/10] feat(unofficial): default to 8K/256 + Output Token Throughput per GPU on unofficial-run load Temporary branch-only override: when a `?unofficialrun=` URL loads and the user did not pin `i_seq` / `i_metric` themselves, default the sequence to `8K / 256` and the y-axis metric to `Output Token Throughput per GPU` so InfiniteBench-style sweeps land on a useful default view alongside the model auto-switch from #243. Mirrors the dedupe shape of `computeAutoSwitchDecision` via a new pure helper `computeUnofficialOverrideDecision` so manual user picks stick once URL-synced and a fresh run-set transition can re-arm the override. Co-authored-by: Bryan Shan --- .../src/components/GlobalFilterContext.tsx | 22 ++++- .../components/inference/InferenceContext.tsx | 28 ++++++- .../lib/unofficial-run-auto-switch.test.ts | 80 ++++++++++++++++++- .../app/src/lib/unofficial-run-auto-switch.ts | 41 ++++++++++ 4 files changed, 167 insertions(+), 4 deletions(-) diff --git a/packages/app/src/components/GlobalFilterContext.tsx b/packages/app/src/components/GlobalFilterContext.tsx index b7e12e08..089d385f 100644 --- a/packages/app/src/components/GlobalFilterContext.tsx +++ b/packages/app/src/components/GlobalFilterContext.tsx @@ -25,7 +25,10 @@ import { Sequence, SEQUENCE_OPTIONS, } from '@/lib/data-mappings'; -import { computeAutoSwitchDecision } from '@/lib/unofficial-run-auto-switch'; +import { + computeAutoSwitchDecision, + computeUnofficialOverrideDecision, +} from '@/lib/unofficial-run-auto-switch'; import type { AvailabilityRow, WorkflowInfoResponse } from '@/lib/api'; interface RunInfo { @@ -201,6 +204,23 @@ export function GlobalFilterProvider({ children }: { children: ReactNode }) { } }, [unofficialAvailable, selectedModel]); + // TEMPORARY (this branch only): default the sequence to `8K / 256` when an + // unofficial run loads and the URL didn't pin `i_seq`. Same dedupe shape as + // the model auto-switch above — manual sequence picks stick because the URL + // gets `i_seq` written by the URL-sync effect after the override fires. + const lastUnofficialSeqOverrideRef = useRef(''); + useEffect(() => { + const decision = computeUnofficialOverrideDecision( + unofficialAvailable, + getUrlParam('i_seq'), + lastUnofficialSeqOverrideRef.current, + ); + lastUnofficialSeqOverrideRef.current = decision.nextKey; + if (decision.shouldOverride) { + setSelectedSequence(Sequence.EightK_256); + } + }, [unofficialAvailable]); + // Sequences available for the selected model (DB ∪ unofficial run for this model) const availableSequences = useMemo(() => { const unofficialSeqs = unofficialAvailable diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index bd1c2e21..2eb9da10 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -29,6 +29,7 @@ import type { import { useUnofficialRun } from '@/components/unofficial-run-provider'; import chartDefinitions from '@/components/inference/inference-chart-config.json'; import { mergeUnofficialIntoOfficial } from '@/lib/unofficial-merge'; +import { computeUnofficialOverrideDecision } from '@/lib/unofficial-run-auto-switch'; import { Button } from '@/components/ui/button'; import { Dialog, @@ -200,8 +201,31 @@ export function InferenceProvider({ // synth hwKeys and merged into `graphs` so they participate in the same // filter/optimal-only/legend pipeline as official data. The resulting // `hwColorOverrides` map is consumed by ScatterGraph's color resolver. - const { mergeAsIngested, unofficialChartData, unofficialRunInfos, runIndexByUrl } = - useUnofficialRun(); + const { + mergeAsIngested, + unofficialChartData, + unofficialRunInfos, + runIndexByUrl, + availableModelsAndSequences: unofficialAvailable, + } = useUnofficialRun(); + + // TEMPORARY (this branch only): default the y-axis metric to "Output Token + // Throughput per GPU" when an unofficial run loads and the URL didn't pin + // `i_metric`. Mirrors the sequence override in GlobalFilterContext — manual + // metric picks stick because the URL gets `i_metric` written after the + // override fires. + const lastUnofficialMetricOverrideRef = useRef(''); + useEffect(() => { + const decision = computeUnofficialOverrideDecision( + unofficialAvailable, + getUrlParam('i_metric'), + lastUnofficialMetricOverrideRef.current, + ); + lastUnofficialMetricOverrideRef.current = decision.nextKey; + if (decision.shouldOverride) { + setSelectedYAxisMetric('y_outputTputPerGpu'); + } + }, [unofficialAvailable]); const { graphs, hardwareConfig, hwColorOverrides } = useMemo(() => { if (!mergeAsIngested) { diff --git a/packages/app/src/lib/unofficial-run-auto-switch.test.ts b/packages/app/src/lib/unofficial-run-auto-switch.test.ts index f58776ad..485ca237 100644 --- a/packages/app/src/lib/unofficial-run-auto-switch.test.ts +++ b/packages/app/src/lib/unofficial-run-auto-switch.test.ts @@ -3,7 +3,10 @@ import { describe, expect, it } from 'vitest'; import type { AvailableModelSequence } from '@/components/unofficial-run-provider'; import { Model, Sequence } from '@/lib/data-mappings'; -import { computeAutoSwitchDecision } from './unofficial-run-auto-switch'; +import { + computeAutoSwitchDecision, + computeUnofficialOverrideDecision, +} from './unofficial-run-auto-switch'; function entry(model: Model, sequence: Sequence): AvailableModelSequence { return { model, sequence, precisions: [] }; @@ -112,3 +115,78 @@ describe('computeAutoSwitchDecision', () => { expect(a.nextKey).toBe(b.nextKey); }); }); + +describe('computeUnofficialOverrideDecision', () => { + it('returns no-op and resets the key when no unofficial run is loaded', () => { + expect(computeUnofficialOverrideDecision([], undefined, 'stale-key')).toEqual({ + nextKey: '', + shouldOverride: false, + }); + }); + + it('fires the override on a fresh run set when the URL does not pin the param', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const decision = computeUnofficialOverrideDecision(run, undefined, ''); + expect(decision.shouldOverride).toBe(true); + expect(decision.nextKey).toBe(Model.DeepSeek_V4_Pro); + }); + + it('respects an explicit URL pin even on a fresh run set', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const decision = computeUnofficialOverrideDecision(run, '1k/1k', ''); + expect(decision.shouldOverride).toBe(false); + // Ref must not be advanced — if the URL is later cleared we still want + // a fresh load of the same run to fire the override. + expect(decision.nextKey).toBe(''); + }); + + it('does not re-fire after the override has already been applied for this run set', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const lastKey = Model.DeepSeek_V4_Pro; + const decision = computeUnofficialOverrideDecision(run, undefined, lastKey); + expect(decision.shouldOverride).toBe(false); + expect(decision.nextKey).toBe(lastKey); + }); + + it('re-arms after the overlay set is cleared so a subsequent load can override again', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const first = computeUnofficialOverrideDecision(run, undefined, ''); + expect(first.shouldOverride).toBe(true); + + const cleared = computeUnofficialOverrideDecision([], undefined, first.nextKey); + expect(cleared).toEqual({ nextKey: '', shouldOverride: false }); + + const run2 = [entry(Model.Kimi_K2_5, Sequence.OneK_OneK)]; + const second = computeUnofficialOverrideDecision(run2, undefined, cleared.nextKey); + expect(second.shouldOverride).toBe(true); + }); + + it('ignores sequence-only deltas in the dedupe key', () => { + const oneK = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const both = [ + entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK), + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_OneK), + ]; + const first = computeUnofficialOverrideDecision(oneK, undefined, ''); + const second = computeUnofficialOverrideDecision(both, undefined, first.nextKey); + expect(first.nextKey).toBe(second.nextKey); + expect(second.shouldOverride).toBe(false); + }); + + it('produces a deterministic key across insertion orders', () => { + const orderA = [ + entry(Model.MiniMax_M2_5, Sequence.OneK_OneK), + entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK), + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + ]; + const orderB = [ + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK), + entry(Model.MiniMax_M2_5, Sequence.OneK_OneK), + ]; + const a = computeUnofficialOverrideDecision(orderA, undefined, ''); + const b = computeUnofficialOverrideDecision(orderB, undefined, ''); + expect(a.nextKey).toBe(b.nextKey); + expect(a.shouldOverride).toBe(b.shouldOverride); + }); +}); diff --git a/packages/app/src/lib/unofficial-run-auto-switch.ts b/packages/app/src/lib/unofficial-run-auto-switch.ts index a4af4683..1910e95c 100644 --- a/packages/app/src/lib/unofficial-run-auto-switch.ts +++ b/packages/app/src/lib/unofficial-run-auto-switch.ts @@ -46,3 +46,44 @@ export function computeAutoSwitchDecision( } return { nextKey: key, modelToSet: sortedModels[0] }; } + +export interface UnofficialOverrideDecision { + /** New value the caller should write into the dedupe ref. */ + nextKey: string; + /** Whether the caller should apply the temporary override. */ + shouldOverride: boolean; +} + +/** + * TEMPORARY (this branch only): when an unofficial run loads, override the + * default sequence to `8K / 256` and the default y-axis metric to "Output + * Token Throughput per GPU" so the InfiniteBench-style sweeps land on a + * useful default view. Mirrors the dedupe behavior of + * {@link computeAutoSwitchDecision} so manual user changes stick once they + * are URL-synced, and a fresh run-set transition can re-arm the override. + * + * - When the overlay set is empty, the dedupe key is reset. + * - When the URL pinned the corresponding param explicitly, no override + * fires (respect intent). + * - The dedupe key is the sorted unique list of overlay models — same shape + * as the auto-switch key — so a sequence-only delta does not invalidate a + * manual user pick. + */ +export function computeUnofficialOverrideDecision( + unofficialAvailable: AvailableModelSequence[], + urlValue: string | undefined, + lastKey: string, +): UnofficialOverrideDecision { + if (unofficialAvailable.length === 0) { + return { nextKey: '', shouldOverride: false }; + } + if (urlValue) { + return { nextKey: lastKey, shouldOverride: false }; + } + const sortedModels = [...new Set(unofficialAvailable.map((a) => a.model))].toSorted(); + const key = sortedModels.join(','); + if (lastKey === key) { + return { nextKey: lastKey, shouldOverride: false }; + } + return { nextKey: key, shouldOverride: true }; +} From 1127bd242f32f83727565ae6d6accf9b753038a8 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Sat, 13 Jun 2026 16:20:55 -0700 Subject: [PATCH 07/10] feat(unofficial): support 8k625 benchmark rows --- .../src/app/api/unofficial-run/route.test.ts | 48 +++++++++++++++++++ .../unofficial-run-provider.test.ts | 6 +++ packages/app/src/lib/data-mappings.test.ts | 10 ++++ packages/app/src/lib/data-mappings.ts | 2 + packages/app/src/lib/models-mapping.test.ts | 10 +++- packages/constants/src/models.test.ts | 10 +++- packages/constants/src/models.ts | 2 + 7 files changed, 86 insertions(+), 2 deletions(-) diff --git a/packages/app/src/app/api/unofficial-run/route.test.ts b/packages/app/src/app/api/unofficial-run/route.test.ts index be87e016..89fe0cd2 100644 --- a/packages/app/src/app/api/unofficial-run/route.test.ts +++ b/packages/app/src/app/api/unofficial-run/route.test.ts @@ -202,6 +202,54 @@ describe('normalizeArtifactRows', () => { ); expect(rows.every((r) => r.date === '2026-03-11')).toBe(true); }); + + it('normalizes the offline B300 TRT compatibility row', () => { + const rows = normalizeArtifactRows( + [ + rawRow({ + hw: 'b300', + model: 'deepseek-ai/DeepSeek-V4-Pro', + infmax_model_prefix: 'dsv4', + framework: 'trt', + precision: 'fp4', + isl: 8192, + osl: 625, + conc: 32, + prefill_tp: 4, + prefill_ep: 1, + prefill_dp_attention: false, + prefill_num_workers: 0, + decode_tp: 4, + decode_ep: 1, + decode_dp_attention: false, + decode_num_workers: 0, + num_prefill_gpu: 4, + num_decode_gpu: 4, + spec_decoding: 'mtp', + tput_per_gpu: 489.17, + output_tput_per_gpu: 489.17, + mean_tpot: 0.01635, + }), + ], + '2026-06-13', + ); + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ + hardware: 'b300', + framework: 'trt', + model: 'dsv4', + precision: 'fp4', + spec_method: 'mtp', + isl: 8192, + osl: 625, + conc: 32, + decode_tp: 4, + decode_ep: 1, + num_decode_gpu: 4, + }); + expect(rows[0].metrics.output_tput_per_gpu).toBe(489.17); + expect(rows[0].metrics.mean_tpot).toBe(0.01635); + }); }); describe('normalizeEvalArtifactRows', () => { diff --git a/packages/app/src/components/unofficial-run-provider.test.ts b/packages/app/src/components/unofficial-run-provider.test.ts index 1863060d..aeac06de 100644 --- a/packages/app/src/components/unofficial-run-provider.test.ts +++ b/packages/app/src/components/unofficial-run-provider.test.ts @@ -163,6 +163,12 @@ describe('buildChartData', () => { expect(Object.keys(result)).toEqual(['DeepSeek-R1-0528_8k/1k']); }); + it('maps the offline TRT 8k/625 sequence correctly', () => { + const rows = [stubRow({ model: 'dsv4', isl: 8192, osl: 625 })]; + const result = buildChartData(rows); + expect(Object.keys(result)).toEqual(['DeepSeek-V4-Pro_8k/625']); + }); + it('skips rows with unmapped ISL/OSL', () => { const rows = [stubRow({ model: 'dsr1', isl: 4096, osl: 4096 })]; const result = buildChartData(rows); diff --git a/packages/app/src/lib/data-mappings.test.ts b/packages/app/src/lib/data-mappings.test.ts index c79c52f2..6bda0d4e 100644 --- a/packages/app/src/lib/data-mappings.test.ts +++ b/packages/app/src/lib/data-mappings.test.ts @@ -133,6 +133,15 @@ describe('getModelAndSequenceFromArtifact', () => { expect(result).toEqual({ model: Model.DeepSeek_V4_Pro, sequence: Sequence.EightK_256 }); }); + it('parses structured artifact with dsv4 prefix and 8k/625 ISL/OSL', () => { + const result = getModelAndSequenceFromArtifact({ + infmax_model_prefix: 'dsv4', + isl: 8192, + osl: 625, + }); + expect(result).toEqual({ model: Model.DeepSeek_V4_Pro, sequence: Sequence.EightK_625 }); + }); + it('returns undefined for unknown model prefix', () => { const result = getModelAndSequenceFromArtifact({ infmax_model_prefix: 'unknown', @@ -216,6 +225,7 @@ describe('getSequenceLabel', () => { expect(getSequenceLabel(Sequence.OneK_EightK)).toBe('1K / 8K'); expect(getSequenceLabel(Sequence.EightK_OneK)).toBe('8K / 1K'); expect(getSequenceLabel(Sequence.EightK_256)).toBe('8K / 256'); + expect(getSequenceLabel(Sequence.EightK_625)).toBe('8K / 625'); }); it('falls back to the sequence value for unknown sequence', () => { diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts index 8a24ecdb..e788f77a 100644 --- a/packages/app/src/lib/data-mappings.ts +++ b/packages/app/src/lib/data-mappings.ts @@ -147,6 +147,7 @@ export enum Sequence { OneK_EightK = '1k/8k', EightK_OneK = '8k/1k', EightK_256 = '8k/256', + EightK_625 = '8k/625', } const SEQUENCE_CONFIG: Record = @@ -155,6 +156,7 @@ const SEQUENCE_CONFIG: Record { expect(sequenceToIslOsl('8k/256')).toEqual({ isl: 8192, osl: 256 }); }); + it('converts 8k/625 to 8192/625', () => { + expect(sequenceToIslOsl('8k/625')).toEqual({ isl: 8192, osl: 625 }); + }); + it('returns null for unknown sequences', () => { expect(sequenceToIslOsl('4k/4k')).toBeNull(); expect(sequenceToIslOsl('')).toBeNull(); @@ -75,13 +79,17 @@ describe('islOslToSequence', () => { expect(islOslToSequence(8192, 256)).toBe('8k/256'); }); + it('converts 8192/625 to 8k/625', () => { + expect(islOslToSequence(8192, 625)).toBe('8k/625'); + }); + it('returns null for unknown ISL/OSL combos', () => { expect(islOslToSequence(4096, 4096)).toBeNull(); expect(islOslToSequence(0, 0)).toBeNull(); }); it('round-trips with sequenceToIslOsl', () => { - for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256']) { + for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256', '8k/625']) { const islOsl = sequenceToIslOsl(seq)!; expect(islOslToSequence(islOsl.isl, islOsl.osl)).toBe(seq); } diff --git a/packages/constants/src/models.test.ts b/packages/constants/src/models.test.ts index 515fcf65..a1c20bb2 100644 --- a/packages/constants/src/models.test.ts +++ b/packages/constants/src/models.test.ts @@ -43,6 +43,10 @@ describe('sequenceToIslOsl', () => { expect(sequenceToIslOsl('8k/256')).toEqual({ isl: 8192, osl: 256 }); }); + it('parses 8k/625 to 8192/625', () => { + expect(sequenceToIslOsl('8k/625')).toEqual({ isl: 8192, osl: 625 }); + }); + it('returns null for unknown sequences', () => { expect(sequenceToIslOsl('2k/2k')).toBeNull(); expect(sequenceToIslOsl('')).toBeNull(); @@ -63,13 +67,17 @@ describe('islOslToSequence', () => { expect(islOslToSequence(8192, 256)).toBe('8k/256'); }); + it('converts 8192/625 to 8k/625', () => { + expect(islOslToSequence(8192, 625)).toBe('8k/625'); + }); + it('returns null for unmapped ISL/OSL pairs', () => { expect(islOslToSequence(2048, 2048)).toBeNull(); expect(islOslToSequence(0, 0)).toBeNull(); }); it('round-trips with sequenceToIslOsl for all known sequences', () => { - for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256']) { + for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256', '8k/625']) { const parsed = sequenceToIslOsl(seq)!; expect(islOslToSequence(parsed.isl, parsed.osl)).toBe(seq); } diff --git a/packages/constants/src/models.ts b/packages/constants/src/models.ts index 83b6540c..96b56818 100644 --- a/packages/constants/src/models.ts +++ b/packages/constants/src/models.ts @@ -42,6 +42,7 @@ export function sequenceToIslOsl(seq: string): { isl: number; osl: number } | nu '1k/8k': { isl: 1024, osl: 8192 }, '8k/1k': { isl: 8192, osl: 1024 }, '8k/256': { isl: 8192, osl: 256 }, + '8k/625': { isl: 8192, osl: 625 }, }; return map[seq] ?? null; } @@ -53,6 +54,7 @@ export function islOslToSequence(isl: number, osl: number): string | null { '1024_8192': '1k/8k', '8192_1024': '8k/1k', '8192_256': '8k/256', + '8192_625': '8k/625', }; return map[`${isl}_${osl}`] ?? null; } From 7b4dc87b128ee237582bb0255c16c719570dc024 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Sat, 13 Jun 2026 16:44:48 -0700 Subject: [PATCH 08/10] fix(unofficial): show available run sequence --- .../src/components/GlobalFilterContext.tsx | 19 +++++--- .../lib/unofficial-run-auto-switch.test.ts | 47 +++++++++++++++++++ .../app/src/lib/unofficial-run-auto-switch.ts | 30 +++++++++++- 3 files changed, 89 insertions(+), 7 deletions(-) diff --git a/packages/app/src/components/GlobalFilterContext.tsx b/packages/app/src/components/GlobalFilterContext.tsx index 089d385f..51178d22 100644 --- a/packages/app/src/components/GlobalFilterContext.tsx +++ b/packages/app/src/components/GlobalFilterContext.tsx @@ -28,6 +28,7 @@ import { import { computeAutoSwitchDecision, computeUnofficialOverrideDecision, + selectUnofficialDefaultSequence, } from '@/lib/unofficial-run-auto-switch'; import type { AvailabilityRow, WorkflowInfoResponse } from '@/lib/api'; @@ -204,10 +205,11 @@ export function GlobalFilterProvider({ children }: { children: ReactNode }) { } }, [unofficialAvailable, selectedModel]); - // TEMPORARY (this branch only): default the sequence to `8K / 256` when an - // unofficial run loads and the URL didn't pin `i_seq`. Same dedupe shape as - // the model auto-switch above — manual sequence picks stick because the URL - // gets `i_seq` written by the URL-sync effect after the override fires. + // TEMPORARY (this branch only): prefer `8K / 256` when an unofficial run + // provides it and the URL didn't pin `i_seq`. Otherwise use a sequence that + // is actually present in the run so an uncommon shape such as 8K/625 is + // visible on first load. Manual sequence picks stick because the URL gets + // `i_seq` written by the URL-sync effect after the override fires. const lastUnofficialSeqOverrideRef = useRef(''); useEffect(() => { const decision = computeUnofficialOverrideDecision( @@ -217,9 +219,14 @@ export function GlobalFilterProvider({ children }: { children: ReactNode }) { ); lastUnofficialSeqOverrideRef.current = decision.nextKey; if (decision.shouldOverride) { - setSelectedSequence(Sequence.EightK_256); + const sequence = selectUnofficialDefaultSequence( + unofficialAvailable, + selectedModel, + getUrlParam('g_model'), + ); + if (sequence !== null) setSelectedSequence(sequence); } - }, [unofficialAvailable]); + }, [unofficialAvailable, selectedModel]); // Sequences available for the selected model (DB ∪ unofficial run for this model) const availableSequences = useMemo(() => { diff --git a/packages/app/src/lib/unofficial-run-auto-switch.test.ts b/packages/app/src/lib/unofficial-run-auto-switch.test.ts index 485ca237..2d5fe5dc 100644 --- a/packages/app/src/lib/unofficial-run-auto-switch.test.ts +++ b/packages/app/src/lib/unofficial-run-auto-switch.test.ts @@ -6,6 +6,7 @@ import { Model, Sequence } from '@/lib/data-mappings'; import { computeAutoSwitchDecision, computeUnofficialOverrideDecision, + selectUnofficialDefaultSequence, } from './unofficial-run-auto-switch'; function entry(model: Model, sequence: Sequence): AvailableModelSequence { @@ -190,3 +191,49 @@ describe('computeUnofficialOverrideDecision', () => { expect(a.shouldOverride).toBe(b.shouldOverride); }); }); + +describe('selectUnofficialDefaultSequence', () => { + it('prefers 8k/256 when the displayed model provides it', () => { + const run = [ + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625), + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_256), + ]; + expect(selectUnofficialDefaultSequence(run, Model.DeepSeek_V4_Pro, undefined)).toBe( + Sequence.EightK_256, + ); + }); + + it('uses the run sequence when 8k/256 is unavailable', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625)]; + expect(selectUnofficialDefaultSequence(run, Model.DeepSeek_R1, undefined)).toBe( + Sequence.EightK_625, + ); + }); + + it('chooses a sequence for the current covered model', () => { + const run = [ + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625), + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + ]; + expect(selectUnofficialDefaultSequence(run, Model.Kimi_K2_5, undefined)).toBe( + Sequence.OneK_OneK, + ); + }); + + it('uses an explicitly pinned model when selecting the sequence', () => { + const run = [ + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625), + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + ]; + expect(selectUnofficialDefaultSequence(run, Model.DeepSeek_V4_Pro, Model.Kimi_K2_5)).toBe( + Sequence.OneK_OneK, + ); + }); + + it('returns null when an explicitly pinned model is absent from the run', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625)]; + expect( + selectUnofficialDefaultSequence(run, Model.DeepSeek_V4_Pro, Model.DeepSeek_R1), + ).toBeNull(); + }); +}); diff --git a/packages/app/src/lib/unofficial-run-auto-switch.ts b/packages/app/src/lib/unofficial-run-auto-switch.ts index 1910e95c..ea7becc5 100644 --- a/packages/app/src/lib/unofficial-run-auto-switch.ts +++ b/packages/app/src/lib/unofficial-run-auto-switch.ts @@ -1,5 +1,5 @@ import type { AvailableModelSequence } from '@/components/unofficial-run-provider'; -import type { Model } from '@/lib/data-mappings'; +import { Sequence, type Model } from '@/lib/data-mappings'; export interface AutoSwitchDecision { /** New value the caller should write into the dedupe ref. */ @@ -87,3 +87,31 @@ export function computeUnofficialOverrideDecision( } return { nextKey: key, shouldOverride: true }; } + +/** + * Pick the sequence shown when an unofficial run loads without an `i_seq` + * URL pin. Keep this branch's 8K/256 preference when that sequence exists for + * the model that will be displayed, otherwise fall back to an actual sequence + * from the run so its points are visible. + */ +export function selectUnofficialDefaultSequence( + unofficialAvailable: AvailableModelSequence[], + selectedModel: Model, + urlModel: string | undefined, +): Sequence | null { + if (unofficialAvailable.length === 0) return null; + + const sortedModels = [...new Set(unofficialAvailable.map((entry) => entry.model))].toSorted(); + const targetModel = + urlModel ?? (sortedModels.includes(selectedModel) ? selectedModel : sortedModels[0]); + const sequences = [ + ...new Set( + unofficialAvailable + .filter((entry) => entry.model === targetModel) + .map((entry) => entry.sequence), + ), + ]; + + if (sequences.includes(Sequence.EightK_256)) return Sequence.EightK_256; + return sequences.toSorted()[0] ?? null; +} From 747f1ad41399f3593935aa49162e6bc7b8014014 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 14 Jun 2026 00:37:23 +0000 Subject: [PATCH 09/10] =?UTF-8?q?feat(inference):=20multi-select=20ISL/OSL?= =?UTF-8?q?=20=E2=80=94=20overlay=20multiple=20sequences=20on=20one=20char?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sequence picker becomes a multi-select. Picking >1 (e.g. 1K/1K + 8K/1K) overlays both shapes on the same axes — each (GPU, sequence) becomes its own legend line via a `__seq` hwKey suffix, so vendor-zone colors still distinguish hardware while the legend caption reads e.g. "B200 — 1K / 1K" vs "B200 — 8K / 1K". State: new `extraSequences: Sequence[]` lives in InferenceContext (URL param `i_seq_extra`, empty default). Primary `selectedSequence` semantics are unchanged for the calculator / evaluation / trends tabs. Pipeline: `useChartData` filters rows by `IN [primary, ...extras]`, groups by sequence, runs `transformBenchmarkRows` per sequence, and rewrites each output point's hwKey with `makeSeqSynthKey`. Synth hardware entries get the sequence appended to their label. `filterByGPU` is widened to strip the `__seq` suffix so the GPU selector still matches across sequences. Unofficial-run merge: when `mergeAsIngested` is on with multiple sequences, the merger iterates over (primary + extras), composing `base__seq__uorun` synth keys so a (run, GPU, sequence) triple gets its own legend line. `parseSynthHwKey` keeps working because the seq delimiter precedes the run delimiter. Legend reset effect now also keys off `extraSequences` and `mergeAsIngested` so newly-introduced synth hwKeys become active when the user flips either toggle — fixes the Cursor "Merged series stay legend-inactive" finding on the prior commit where `reconcileActiveSet` only removed stale keys. Tests: - `sequence-synth-key.test.ts` (new) — round-trip, base prefix preservation, label tagging, compose with `__uorun`. - `unofficial-merge.test.ts` — multi-sequence merge produces `__seq__uorun` shape; primary-only with duplicate extra dedup is a no-op. - `useChartData.test.ts` — `filterByGPU` strips `__seq` suffix. Verified in Playwright at localhost:3000/inference: legend renders e.g. "GB300 NVL72 — 1K / 1K (Dynamo SGLang)" alongside the 8K / 1K variant, URL state restores from `?i_seq_extra=`, and overlay composition works with `?unofficialrun=27482213487&i_seq_extra=8k%2F1k&i_uoff_ingested=1` (B300 — 8K / 625 from the run + B300 — 8K / 1K official both render). Co-authored-by: Bryan Shan --- packages/app/cypress/support/mock-data.ts | 2 + .../components/inference/InferenceContext.tsx | 66 +++++++- .../inference/hooks/useChartData.test.ts | 14 ++ .../inference/hooks/useChartData.ts | 159 +++++++++++++++--- .../app/src/components/inference/types.ts | 7 + .../components/inference/ui/ChartControls.tsx | 26 ++- .../app/src/components/ui/chart-selectors.tsx | 87 ++++++++-- .../app/src/lib/sequence-synth-key.test.ts | 119 +++++++++++++ packages/app/src/lib/sequence-synth-key.ts | 101 +++++++++++ packages/app/src/lib/unofficial-merge.test.ts | 74 ++++++++ packages/app/src/lib/unofficial-merge.ts | 87 ++++++++-- packages/app/src/lib/url-state.ts | 2 + 12 files changed, 688 insertions(+), 56 deletions(-) create mode 100644 packages/app/src/lib/sequence-synth-key.test.ts create mode 100644 packages/app/src/lib/sequence-synth-key.ts diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts index 735cde8c..dc815084 100644 --- a/packages/app/cypress/support/mock-data.ts +++ b/packages/app/cypress/support/mock-data.ts @@ -251,6 +251,8 @@ export function createMockInferenceContext( setActivePresetId: namedStub('setActivePresetId'), presetGuardRef: { current: false } as React.RefObject, hwColorOverrides: {}, + extraSequences: [], + setExtraSequences: namedStub('setExtraSequences'), ...overrides, }; } diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index 2eb9da10..2dea7adf 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -48,7 +48,7 @@ import { import { useUrlState } from '@/hooks/useUrlState'; import { buildAvailabilityHwKey } from '@/lib/chart-utils'; import { getHardwareConfig, getModelSortIndex, isKnownGpu, TABLEAU_10 } from '@/lib/constants'; -import { hasMtpEngineExclusion, MODEL_PREFIX_MAPPING } from '@/lib/data-mappings'; +import { hasMtpEngineExclusion, MODEL_PREFIX_MAPPING, Sequence } from '@/lib/data-mappings'; import { MtpEngineConflictToast, type MtpEngineConflictDetail, @@ -141,6 +141,32 @@ export function InferenceProvider({ () => getUrlParam('i_gradlabel') === '1', ); const [showLineLabels, setShowLineLabels] = useState(() => getUrlParam('i_linelabel') === '1'); + + // ── Multi-sequence overlay ──────────────────────────────────────────────── + // `extraSequences` is the inference-tab-only list of additional ISL/OSL + // shapes to render alongside the primary `selectedSequence`. Empty by + // default — when non-empty, `useChartData` pulls rows for all + // [primary, ...extras] and suffixes each row's hwKey with `__seq` + // so e.g. B200@1K/1K and B200@8K/1K surface as two distinct legend lines. + const [extraSequences, setExtraSequencesRaw] = useState(() => { + const urlExtra = getUrlParam('i_seq_extra'); + if (!urlExtra) return []; + const valid = urlExtra + .split(',') + .filter((s): s is Sequence => Object.values(Sequence).includes(s as Sequence)); + return valid; + }); + const setExtraSequences = useCallback((seqs: Sequence[]) => { + // Dedup defensively; preserve order. + const seen = new Set(); + setExtraSequencesRaw( + seqs.filter((s) => { + if (seen.has(s)) return false; + seen.add(s); + return true; + }), + ); + }, []); const [showSpeedOverlay, setShowSpeedOverlay] = useState(() => getUrlParam('i_speed') === '1'); const [showMinecraftOverlay, setShowMinecraftOverlay] = useState( () => getUrlParam('i_mc') === '1', @@ -174,6 +200,20 @@ export function InferenceProvider({ // ── Data fetching (gated by isActive) ────────────────────────────────────── const latestDate = availableDates.length > 0 ? availableDates.at(-1) : undefined; + // Drop extras that aren't currently available or duplicate the primary — + // mirrors the `effectivePrecisions` pattern so a stale URL/preset selection + // doesn't pin a sequence the current model can't render. + const effectiveExtraSequences = useMemo(() => { + const out: Sequence[] = []; + const availSet = new Set(availableSequences as string[]); + for (const s of extraSequences) { + if (s === effectiveSequence) continue; + if (!availSet.has(s)) continue; + out.push(s); + } + return out; + }, [extraSequences, availableSequences, effectiveSequence]); + const { graphs: officialGraphs, loading: chartDataLoading, @@ -194,6 +234,7 @@ export function InferenceProvider({ effectiveRunDate, isActive, latestDate, + effectiveExtraSequences, ); // ── Promote unofficial rows to first-class series when toggled ──────────── @@ -241,6 +282,7 @@ export function InferenceProvider({ unofficialChartData, selectedModel, selectedSequence: effectiveSequence, + extraSequences: effectiveExtraSequences, selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric, @@ -264,6 +306,7 @@ export function InferenceProvider({ unofficialChartData, selectedModel, effectiveSequence, + effectiveExtraSequences, selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric, @@ -606,6 +649,13 @@ export function InferenceProvider({ // reset commits as soon as data for the new model arrives — without this, switching models // bails on the empty-data tick and never re-fires, leaving the legend at the prior intersection. const precisionsKey = effectivePrecisions.join(','); + // Include `extraSequences` and `mergeAsIngested` in the reset key so that + // toggling either flips the active set to include the newly-introduced + // synth hwKeys. Without these, `useChartDataFilter.reconcileActiveSet` only + // removes stale keys — never adds new ones — so merged or sequence-tagged + // series silently render hidden. (Addresses Cursor "Merged series stay + // legend-inactive" review finding.) + const extraSequencesKey = effectiveExtraSequences.join(','); const lastHwResetKeyRef = useRef(''); // Restore legend-active selection from URL on first availability of @@ -631,7 +681,7 @@ export function InferenceProvider({ } } setActiveHwTypes(restored); - lastHwResetKeyRef.current = `${selectedModel}|${effectiveSequence}|${precisionsKey}`; + lastHwResetKeyRef.current = `${selectedModel}|${effectiveSequence}|${precisionsKey}|${extraSequencesKey}|${mergeAsIngested ? '1' : '0'}`; setPendingActiveHwTypes(null); }, [ pendingActiveHwTypes, @@ -640,6 +690,8 @@ export function InferenceProvider({ selectedModel, effectiveSequence, precisionsKey, + extraSequencesKey, + mergeAsIngested, setActiveHwTypes, ]); @@ -647,7 +699,7 @@ export function InferenceProvider({ if (pendingHwFilterRef.current) return; if (pendingActiveHwTypes) return; if (hwTypesWithData.size === 0) return; - const key = `${selectedModel}|${effectiveSequence}|${precisionsKey}`; + const key = `${selectedModel}|${effectiveSequence}|${precisionsKey}|${extraSequencesKey}|${mergeAsIngested ? '1' : '0'}`; if (lastHwResetKeyRef.current === key) return; lastHwResetKeyRef.current = key; const presetFilter = presetHwFilterRef.current; @@ -681,6 +733,8 @@ export function InferenceProvider({ selectedModel, effectiveSequence, precisionsKey, + extraSequencesKey, + mergeAsIngested, hwTypesWithData, mtpExclusion, pendingActiveHwTypes, @@ -830,6 +884,7 @@ export function InferenceProvider({ i_speed: showSpeedOverlay ? '1' : '', i_mc: showMinecraftOverlay ? '1' : '', i_active: iActiveStr, + i_seq_extra: effectiveExtraSequences.join(','), }, [ selectedYAxisMetric, @@ -850,6 +905,7 @@ export function InferenceProvider({ showSpeedOverlay, showMinecraftOverlay, iActiveStr, + effectiveExtraSequences, ], ); @@ -1058,6 +1114,8 @@ export function InferenceProvider({ setActivePresetId, presetGuardRef, hwColorOverrides, + extraSequences: effectiveExtraSequences, + setExtraSequences, }), [ activeHwTypes, @@ -1112,6 +1170,8 @@ export function InferenceProvider({ clearTrackedConfigs, activePresetId, hwColorOverrides, + effectiveExtraSequences, + setExtraSequences, ], ); diff --git a/packages/app/src/components/inference/hooks/useChartData.test.ts b/packages/app/src/components/inference/hooks/useChartData.test.ts index 73582998..8dd06ee8 100644 --- a/packages/app/src/components/inference/hooks/useChartData.test.ts +++ b/packages/app/src/components/inference/hooks/useChartData.test.ts @@ -60,6 +60,20 @@ describe('filterByGPU', () => { it('excludes when neither key nor alias matches', () => { expect(filterByGPU([{ hwKey: 'unknown' }], ['h100'], {})).toHaveLength(0); }); + + it('matches multi-sequence synth keys by stripping the __seq suffix', () => { + // When extraSequences is on, useChartData rewrites each row's hwKey to + // `${origHwKey}__seq` so (gpu, sequence) splits into separate + // legend lines. The GPU selector still picks canonical keys, so the + // filter has to look past the suffix. + const data = [ + { hwKey: 'b200_vllm__seq1k1k' }, + { hwKey: 'b200_vllm__seq8k1k' }, + { hwKey: 'h100__seq1k1k' }, + ]; + const result = filterByGPU(data, ['b200_vllm'], {}); + expect(result.map((d) => d.hwKey)).toEqual(['b200_vllm__seq1k1k', 'b200_vllm__seq8k1k']); + }); }); describe('flipRooflineDirection', () => { diff --git a/packages/app/src/components/inference/hooks/useChartData.ts b/packages/app/src/components/inference/hooks/useChartData.ts index 625e63ab..9b2287ee 100644 --- a/packages/app/src/components/inference/hooks/useChartData.ts +++ b/packages/app/src/components/inference/hooks/useChartData.ts @@ -14,9 +14,14 @@ import type { } from '@/components/inference/types'; import { filterDataByCostLimit } from '@/components/inference/utils'; import { useBenchmarks, benchmarkQueryOptions } from '@/hooks/api/use-benchmarks'; -import { GPU_ALIAS_TO_CANONICAL, getModelSortIndex } from '@/lib/constants'; +import { GPU_ALIAS_TO_CANONICAL, getHardwareConfig, getModelSortIndex } from '@/lib/constants'; import { transformBenchmarkRows } from '@/lib/benchmark-transform'; import type { Model, Sequence } from '@/lib/data-mappings'; +import { + makeSeqSynthHardwareEntry, + makeSeqSynthKey, + makeSequenceFilter, +} from '@/lib/sequence-synth-key'; import { calculateCostsForGpus, calculatePowerForGpus } from '@/lib/utils'; /** Build deduplicated comparison dates, excluding the main run date. */ @@ -35,7 +40,13 @@ export function buildComparisonDates( return [...new Set(dates.filter((d) => d !== selectedRunDate))]; } -/** Filter data by GPU key, resolving aliases to canonical keys. */ +/** Filter data by GPU key, resolving aliases to canonical keys. + * + * Multi-sequence overlay rows arrive with hwKeys of the form + * `${origHwKey}__seq`. The GPU selector still picks canonical keys, + * so strip the `__seq` suffix before matching: a selected `b200_vllm` should + * match `b200_vllm__seq1k1k`. + */ export function filterByGPU( data: T[], selectedGPUs: string[], @@ -43,7 +54,9 @@ export function filterByGPU( ): T[] { if (selectedGPUs.length === 0) return data; return data.filter((dp) => { - const hwKey = String(dp.hwKey); + const rawKey = String(dp.hwKey); + const seqIdx = rawKey.indexOf('__seq'); + const hwKey = seqIdx === -1 ? rawKey : rawKey.slice(0, seqIdx); const canonical = aliasMap[hwKey]; return ( selectedGPUs.includes(hwKey) || (canonical !== undefined && selectedGPUs.includes(canonical)) @@ -79,6 +92,13 @@ export function useChartData( selectedRunDate?: string, enabled = true, latestAvailableDate?: string, + /** + * Additional sequences to overlay alongside `selectedSequence`. When this + * list is non-empty, rows for ALL (primary + extras) sequences are pulled + * in, and each row's hwKey gets a `__seq` suffix so the (hw, + * sequence) pair surfaces as its own series in the legend. + */ + extraSequences: Sequence[] = [], ) { // When the selected date is the latest available, use '' (empty string) to match // the initial no-date query key, reusing the eagerly-fetched benchmarks from the @@ -116,27 +136,71 @@ export function useChartData( // so we derive a stable key from dataUpdatedAt timestamps to avoid cascading memo invalidation. const comparisonDataKey = comparisonQueries.map((q) => q.dataUpdatedAt).join(','); + // Build the full sequence list (primary + extras, dedup, drop unresolvable). + // Order matters for synth-key stability: primary always comes first so that + // when `extraSequences` is later cleared the row hwKeys land back on the + // primary's compact form. + const allSequences = useMemo(() => { + const seen = new Set(); + const out: Sequence[] = []; + for (const s of [selectedSequence, ...extraSequences]) { + if (!s || seen.has(s)) continue; + if (!sequenceToIslOsl(s)) continue; + seen.add(s); + out.push(s); + } + return out; + }, [selectedSequence, extraSequences]); + + // Pre-compute a per-sequence ISL/OSL filter. Indexed parallel to allSequences. + type SeqFilter = ((r: { isl: number; osl: number }) => boolean) | null; + const sequenceFilters = useMemo( + () => allSequences.map((s) => makeSequenceFilter(s)), + [allSequences], + ); + + const isMultiSequence = allSequences.length > 1; + + // Match a row against the active sequence list, returning the matched + // sequence (or null). Used to drive hwKey suffixing for the overlay case. + const matchSequence = useMemo( + () => + (r: { isl: number; osl: number }): Sequence | null => { + for (let i = 0; i < sequenceFilters.length; i++) { + const f = sequenceFilters[i]; + if (f && f(r)) return allSequences[i]; + } + return null; + }, + [sequenceFilters, allSequences], + ); + // Merge main rows with comparison date rows. // Stamp each row with the *requested* date (not the actual DB date) so that // GPUGraph's activeDates filter (keyed by user-selected date) matches the points. - const sequenceIslOsl = useMemo(() => sequenceToIslOsl(selectedSequence), [selectedSequence]); const rows = useMemo(() => { - if (!allRows || !sequenceIslOsl) return []; - const seqFilter = (r: { isl: number; osl: number }) => - r.isl === sequenceIslOsl.isl && r.osl === sequenceIslOsl.osl; - const seqFiltered = allRows.filter(seqFilter); - - // For each (hw, framework, spec_method, disagg, precision) group, keep only - // rows from the most recent date. When parallelism settings change between runs, - // old config_ids create stale data points under the same legend line — drop them. + if (!allRows || allSequences.length === 0) return []; + + const seqMatcher = matchSequence; + type RowWithSeq = (typeof allRows)[number] & { _seq: Sequence }; + const seqFiltered: RowWithSeq[] = []; + for (const r of allRows) { + const seq = seqMatcher(r); + if (seq) seqFiltered.push({ ...r, _seq: seq }); + } + + // For each (hw, framework, spec_method, disagg, precision, sequence) group, + // keep only rows from the most recent date. The sequence is part of the + // dedup key so 1K/1K and 8K/1K rows don't shadow each other when both are + // selected. const maxDatePerGroup = new Map(); for (const r of seqFiltered) { - const key = `${r.hardware}|${r.framework}|${r.spec_method}|${r.disagg}|${r.precision}`; + const key = `${r.hardware}|${r.framework}|${r.spec_method}|${r.disagg}|${r.precision}|${r._seq}`; const cur = maxDatePerGroup.get(key); if (!cur || r.date > cur) maxDatePerGroup.set(key, r.date); } const deduped = seqFiltered.filter((r) => { - const key = `${r.hardware}|${r.framework}|${r.spec_method}|${r.disagg}|${r.precision}`; + const key = `${r.hardware}|${r.framework}|${r.spec_method}|${r.disagg}|${r.precision}|${r._seq}`; return r.date === maxDatePerGroup.get(key); }); @@ -145,19 +209,70 @@ export function useChartData( ); if (comparisonDates.length === 0) return mainRows; const extraRows = comparisonQueries.flatMap((q, i) => - (q.data ?? []) - .filter(seqFilter) - .map((r) => ({ ...r, date: comparisonDates[i], actualDate: r.date })), + (q.data ?? []).flatMap((r) => { + const seq = seqMatcher(r); + if (!seq) return []; + return [{ ...r, _seq: seq, date: comparisonDates[i], actualDate: r.date }]; + }), ); return [...mainRows, ...extraRows]; - }, [allRows, sequenceIslOsl, comparisonDates, comparisonDataKey, selectedRunDate]); - - // Transform filtered rows into chart data + }, [allRows, allSequences, matchSequence, comparisonDates, comparisonDataKey, selectedRunDate]); + + // Transform filtered rows into chart data. + // + // When `isMultiSequence` is on, we run `transformBenchmarkRows` once per + // sequence and rewrite each output point's `hwKey` to `${origHwKey}__seq`. + // This is what makes (B200, 1K/1K) and (B200, 8K/1K) surface as separate + // legend lines (and separate roofline groups) rather than collapsing onto + // a single B200 series. const { chartData, hardwareConfig: rawHardwareConfig } = useMemo(() => { if (rows.length === 0) return { chartData: [] as InferenceData[][], hardwareConfig: {} as HardwareConfig }; - return transformBenchmarkRows(rows); - }, [rows]); + + if (!isMultiSequence) { + return transformBenchmarkRows(rows); + } + + type RowWithSeq = (typeof rows)[number] & { _seq: Sequence }; + const groupedBySeq = new Map(); + for (const r of rows as RowWithSeq[]) { + const arr = groupedBySeq.get(r._seq); + if (arr) arr.push(r); + else groupedBySeq.set(r._seq, [r]); + } + + const mergedHardware: HardwareConfig = {} as HardwareConfig; + // One InferenceData[] per chart definition, accumulating across sequences. + const mergedChart: InferenceData[][] = (chartDefinitions as ChartDefinition[]).map( + () => [] as InferenceData[], + ); + + for (const [seq, seqRows] of groupedBySeq) { + const { chartData: perSeq, hardwareConfig: perSeqHw } = transformBenchmarkRows(seqRows); + // Synth hw entries: one per (origHwKey, seq) pair seen this iteration. + for (const origHwKey of Object.keys(perSeqHw)) { + const synthHwKey = makeSeqSynthKey(origHwKey, seq); + if (synthHwKey in mergedHardware) continue; + mergedHardware[synthHwKey] = makeSeqSynthHardwareEntry( + getHardwareConfig(origHwKey), + origHwKey, + seq, + synthHwKey, + ); + } + // Append each chart def's data with hwKey rewritten to the synth key. + for (let i = 0; i < perSeq.length; i++) { + for (const point of perSeq[i]) { + mergedChart[i].push({ + ...point, + hwKey: makeSeqSynthKey(point.hwKey, seq), + }); + } + } + } + + return { chartData: mergedChart, hardwareConfig: mergedHardware }; + }, [rows, isMultiSequence]); // Sort hardware config — stabilize reference when keys haven't changed. // Different sequences for the same model often have the same GPU configs, diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index c4f860fe..8bb37af1 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -537,6 +537,13 @@ export interface InferenceChartContextType { * the overlay legend would have shown. Empty when nothing is merged. */ hwColorOverrides: Record; + /** + * Additional ISL/OSL sequences to overlay on the inference chart alongside + * `selectedSequence`. Empty by default. When non-empty, each (GPU, sequence) + * pair becomes its own legend line via a `__seq` hwKey suffix. + */ + extraSequences: Sequence[]; + setExtraSequences: (sequences: Sequence[]) => void; } export interface CalculateUserCostsRequest { model: string; diff --git a/packages/app/src/components/inference/ui/ChartControls.tsx b/packages/app/src/components/inference/ui/ChartControls.tsx index 0b1705b0..f776c648 100644 --- a/packages/app/src/components/inference/ui/ChartControls.tsx +++ b/packages/app/src/components/inference/ui/ChartControls.tsx @@ -7,7 +7,7 @@ import { track } from '@/lib/analytics'; import { useInference } from '@/components/inference/InferenceContext'; import { ModelSelector, - SequenceSelector, + MultiSequenceSelector, PrecisionSelector, } from '@/components/ui/chart-selectors'; import { DateRangePicker } from '@/components/ui/date-range-picker'; @@ -92,6 +92,8 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro setSelectedModel, selectedSequence, setSelectedSequence, + extraSequences, + setExtraSequences, selectedPrecisions, setSelectedPrecisions, selectedYAxisMetric, @@ -146,6 +148,22 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro setTimeout(trackCombinedFilters, 0); }; + const handleSequencesChange = (values: Sequence[]) => { + if (values.length === 0) return; + const [primary, ...extras] = values; + if (primary !== selectedSequence) { + handleSequenceChange(primary); + } + setExtraSequences(extras); + if (extras.length > 0) { + track('inference_sequence_overlay_changed', { + primary, + extras: extras.join(','), + count: values.length, + }); + } + }; + const handlePrecisionChange = (value: string[]) => { setSelectedPrecisions(value); track('inference_precision_selected', { @@ -214,9 +232,9 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro availableModels={availableModels} data-testid="model-selector" /> - getSequenceCategory(s as Sequence)); - const sections = [ + return [ { id: 'default', options: groups.default.map((seq) => ({ @@ -165,6 +157,18 @@ export function SequenceSelector({ ] : []), ]; +} + +export function SequenceSelector({ + id = 'sequence-select', + value, + onChange, + open, + onOpenChange, + availableSequences, + 'data-testid': testId, +}: SequenceSelectorProps) { + const sections = buildSequenceSections(availableSequences); return (
@@ -200,6 +204,69 @@ export function SequenceSelector({ ); } +interface MultiSequenceSelectorProps { + id?: string; + /** Selected sequences, primary-first. minSelections=1 is enforced. */ + value: string[]; + onChange: (value: Sequence[]) => void; + open?: boolean; + onOpenChange?: (open: boolean) => void; + availableSequences: string[]; + maxSelections?: number; + 'data-testid'?: string; +} + +/** + * Sequence picker that allows multiple ISL/OSL selections so the inference + * chart can overlay e.g. 1K/1K and 8K/1K as separate series on the same + * axes. The first selected value is treated as the "primary" sequence by + * the data pipeline; additional picks become `extraSequences` and each + * (hw, sequence) row gets a synth hwKey so it lands in its own legend line. + */ +export function MultiSequenceSelector({ + id = 'sequence-multiselect', + value, + onChange, + open, + onOpenChange, + availableSequences, + maxSelections = 3, + 'data-testid': testId, +}: MultiSequenceSelectorProps) { + const sections = buildSequenceSections(availableSequences); + + return ( +
+ +
+ { + if (values.length === 0) return; + track('selector_sequence_changed', { sequence: values.join(',') }); + onChange(values as Sequence[]); + }} + open={open} + onOpenChange={onOpenChange} + triggerId={id} + triggerTestId={testId} + placeholder="ISL / OSL" + minSelections={1} + maxSelections={maxSelections} + showClearAll={false} + searchable={false} + showSelectionSummary={false} + /> +
+
+ ); +} + interface PrecisionSelectorProps { id?: string; value: string[]; diff --git a/packages/app/src/lib/sequence-synth-key.test.ts b/packages/app/src/lib/sequence-synth-key.test.ts new file mode 100644 index 00000000..39a8eeaf --- /dev/null +++ b/packages/app/src/lib/sequence-synth-key.test.ts @@ -0,0 +1,119 @@ +import { describe, expect, it } from 'vitest'; + +import { Sequence } from '@/lib/data-mappings'; +import { + isSeqSynthKey, + makeSeqSynthHardwareEntry, + makeSeqSynthKey, + makeSequenceFilter, + parseSeqSynthKey, + sequenceCompact, + stripSeqSuffix, +} from '@/lib/sequence-synth-key'; + +describe('sequence-synth-key', () => { + describe('sequenceCompact', () => { + it('maps known sequences to compact form', () => { + expect(sequenceCompact(Sequence.OneK_OneK)).toBe('1k1k'); + expect(sequenceCompact(Sequence.OneK_EightK)).toBe('1k8k'); + expect(sequenceCompact(Sequence.EightK_OneK)).toBe('8k1k'); + expect(sequenceCompact(Sequence.EightK_256)).toBe('8k256'); + expect(sequenceCompact(Sequence.EightK_625)).toBe('8k625'); + }); + }); + + describe('makeSeqSynthKey / parseSeqSynthKey', () => { + it('round-trips a base hwKey through compact form', () => { + const key = makeSeqSynthKey('b200_vllm', Sequence.OneK_OneK); + expect(key).toBe('b200_vllm__seq1k1k'); + expect(parseSeqSynthKey(key)).toEqual({ + origHwKey: 'b200_vllm', + sequence: Sequence.OneK_OneK, + }); + }); + + it('preserves the base GPU prefix so vendor-color helpers keep working', () => { + // getModelSortIndex / isKnownGpu split on '_' and read [0]; this must + // still be the canonical base for any synth key shape we produce. + const key = makeSeqSynthKey('gb300_dynamo-trt_mtp', Sequence.EightK_OneK); + expect(key.split('_')[0]).toBe('gb300'); + }); + + it('returns null when the suffix is absent', () => { + expect(parseSeqSynthKey('b200_vllm')).toBeNull(); + }); + + it('returns null when the compact form is unknown', () => { + // A made-up suffix shouldn't be silently accepted — better to fall back + // to "no parse" so callers can keep treating the key as opaque. + expect(parseSeqSynthKey('b200_vllm__seqbogus')).toBeNull(); + }); + + it('still parses when the seq suffix is followed by a __uorun chain', () => { + // Composing with the unofficial-merge `__uorun` shape is supported. + // parseSeqSynthKey strips the trailing chain before resolving the + // compact form. + const composed = `${makeSeqSynthKey('b200_vllm', Sequence.OneK_OneK)}__uorun123`; + expect(parseSeqSynthKey(composed)).toEqual({ + origHwKey: 'b200_vllm', + sequence: Sequence.OneK_OneK, + }); + }); + }); + + describe('isSeqSynthKey / stripSeqSuffix', () => { + it('isSeqSynthKey reports presence of the delimiter', () => { + expect(isSeqSynthKey('b200_vllm')).toBe(false); + expect(isSeqSynthKey('b200_vllm__seq1k1k')).toBe(true); + }); + + it('stripSeqSuffix is a no-op when absent', () => { + expect(stripSeqSuffix('b200_vllm')).toBe('b200_vllm'); + }); + + it('stripSeqSuffix removes the seq tail (and anything chained after)', () => { + expect(stripSeqSuffix('b200_vllm__seq1k1k')).toBe('b200_vllm'); + expect(stripSeqSuffix('b200_vllm__seq1k1k__uorun42')).toBe('b200_vllm'); + }); + }); + + describe('makeSeqSynthHardwareEntry', () => { + it('appends the sequence label so the legend can distinguish lines', () => { + const entry = makeSeqSynthHardwareEntry( + { + name: 'b200-vllm', + label: 'B200', + suffix: '(vLLM)', + gpu: "NVIDIA 'Blackwell' B200 vLLM", + framework: 'vllm', + }, + 'b200_vllm', + Sequence.OneK_OneK, + 'b200_vllm__seq1k1k', + ); + expect(entry.label).toBe('B200 — 1K / 1K'); + // suffix and framework are preserved so the legend line still renders + // its parens-tagged framework label. + expect(entry.suffix).toBe('(vLLM)'); + expect(entry.framework).toBe('vllm'); + // gpu tooltip carries the sequence too so a hover reveals which seq + // the line came from. + expect(entry.gpu).toContain('1K / 1K'); + }); + + it('falls back to the orig hwKey when the entry is missing', () => { + const entry = makeSeqSynthHardwareEntry(undefined, 'b200_vllm', Sequence.OneK_OneK, 'syn'); + expect(entry.label).toBe('b200_vllm — 1K / 1K'); + }); + }); + + describe('makeSequenceFilter', () => { + it('returns a predicate matching exact (isl, osl)', () => { + const filter = makeSequenceFilter(Sequence.EightK_OneK); + expect(filter).not.toBeNull(); + expect(filter!({ isl: 8192, osl: 1024 })).toBe(true); + expect(filter!({ isl: 8192, osl: 256 })).toBe(false); + expect(filter!({ isl: 1024, osl: 1024 })).toBe(false); + }); + }); +}); diff --git a/packages/app/src/lib/sequence-synth-key.ts b/packages/app/src/lib/sequence-synth-key.ts new file mode 100644 index 00000000..482168df --- /dev/null +++ b/packages/app/src/lib/sequence-synth-key.ts @@ -0,0 +1,101 @@ +/** + * Helpers for promoting per-sequence benchmark rows to first-class + * "ingested-style" series so a user can compare e.g. 1K/1K vs 8K/1K on the + * same scatter chart instead of having to flip between them with the picker. + * + * Each (origHwKey, sequence) pair becomes a synth hwKey of the form + * `${origHwKey}__seq` + * — preserving `hwKey.split('_')[0]` (the base GPU) so `getModelSortIndex`, + * `isKnownGpu`, and the vendor-color generator keep working. The `__seq` + * delimiter is also distinct from the `__uorun` delimiter used by + * unofficial-merge so the two can compose (`base__seq1k1k__uorun123`). + */ +import { sequenceToIslOsl } from '@semianalysisai/inferencex-constants'; + +import type { HardwareEntry } from '@/lib/constants'; +import { Sequence, getSequenceLabel } from '@/lib/data-mappings'; + +const SEQ_SYNTH_DELIM = '__seq'; + +const SEQUENCE_COMPACT: Record = { + [Sequence.OneK_OneK]: '1k1k', + [Sequence.OneK_EightK]: '1k8k', + [Sequence.EightK_OneK]: '8k1k', + [Sequence.EightK_256]: '8k256', + [Sequence.EightK_625]: '8k625', +}; + +const COMPACT_TO_SEQUENCE: Record = Object.fromEntries( + (Object.entries(SEQUENCE_COMPACT) as [Sequence, string][]).map(([s, c]) => [c, s]), +); + +/** Compact form for use as a URL/hwKey suffix (e.g. `1k1k`). */ +export function sequenceCompact(seq: Sequence): string { + return SEQUENCE_COMPACT[seq] ?? String(seq).replace('/', ''); +} + +/** Build a (hw, sequence) synth hwKey while keeping the original GPU base prefix. */ +export function makeSeqSynthKey(origHwKey: string, seq: Sequence): string { + return `${origHwKey}${SEQ_SYNTH_DELIM}${sequenceCompact(seq)}`; +} + +/** Reverse {@link makeSeqSynthKey}; returns null when the key has no sequence suffix. */ +export function parseSeqSynthKey(hwKey: string): { origHwKey: string; sequence: Sequence } | null { + const idx = hwKey.indexOf(SEQ_SYNTH_DELIM); + if (idx === -1) return null; + const origHwKey = hwKey.slice(0, idx); + // A trailing `__uorun` may follow the sequence compact form — strip it. + const rest = hwKey.slice(idx + SEQ_SYNTH_DELIM.length); + const compact = rest.split('__')[0]; + const sequence = COMPACT_TO_SEQUENCE[compact]; + if (!sequence) return null; + return { origHwKey, sequence }; +} + +export function isSeqSynthKey(hwKey: string): boolean { + return hwKey.includes(SEQ_SYNTH_DELIM); +} + +/** + * Strip a `__seq` suffix from a hwKey, returning the original key. + * No-op if the suffix is absent. Used by color resolution / sort helpers that + * already operate on the base hwKey via `split('_')[0]` but also want the + * fully-qualified original (e.g. for matching the official `hardwareConfig`). + */ +export function stripSeqSuffix(hwKey: string): string { + const idx = hwKey.indexOf(SEQ_SYNTH_DELIM); + if (idx === -1) return hwKey; + return hwKey.slice(0, idx); +} + +/** + * Build a synthesized HardwareEntry whose label is appended with the + * sequence label (e.g. "B200 — 1K/1K"). The base entry's other fields are + * preserved so downstream code (legend swatches, tooltip GPU string, etc.) + * keeps working. + */ +export function makeSeqSynthHardwareEntry( + origEntry: HardwareEntry | undefined, + origHwKey: string, + seq: Sequence, + synthHwKey: string, +): HardwareEntry { + const baseLabel = origEntry?.label ?? origHwKey; + const seqLabel = getSequenceLabel(seq); + return { + name: synthHwKey.replaceAll('_', '-'), + label: `${baseLabel} — ${seqLabel}`, + suffix: origEntry?.suffix ?? '', + gpu: origEntry?.gpu ? `${origEntry.gpu} [${seqLabel}]` : `[${seqLabel}]`, + framework: origEntry?.framework, + }; +} + +/** Build a stable ISL/OSL filter predicate for one sequence. */ +export function makeSequenceFilter( + seq: Sequence, +): ((r: { isl: number; osl: number }) => boolean) | null { + const islOsl = sequenceToIslOsl(seq); + if (!islOsl) return null; + return (r) => r.isl === islOsl.isl && r.osl === islOsl.osl; +} diff --git a/packages/app/src/lib/unofficial-merge.test.ts b/packages/app/src/lib/unofficial-merge.test.ts index 713dd2f7..09c9e016 100644 --- a/packages/app/src/lib/unofficial-merge.test.ts +++ b/packages/app/src/lib/unofficial-merge.test.ts @@ -332,6 +332,80 @@ describe('mergeUnofficialIntoOfficial', () => { expect(result.graphs).toHaveLength(2); expect(result.graphs.every((g) => g.data.length > 0)).toBe(true); }); + + it('merges overlay rows from multiple sequences and tags each synth hwKey with __seq', () => { + // Build an overlay map that has data for BOTH 1K/1K and 8K/1K so the + // multi-sequence path has something to fan out. + const data = makeOverlayChartData(); + const e2eData8k = [makeOverlayPoint({ conc: 64, tpPerGpu: { y: 600, roof: false } })]; + const interactivity8k = [makeOverlayPoint({ conc: 64, tpPerGpu: { y: 600, roof: false } })]; + data['DeepSeek-R1-0528_8k/1k'] = { + e2e: { + data: e2eData8k, + gpus: { + h100_vllm: { name: 'h100_vllm', label: 'H100', suffix: '(VLLM)', gpu: 'NVIDIA H100' }, + }, + }, + interactivity: { + data: interactivity8k, + gpus: { + h100_vllm: { name: 'h100_vllm', label: 'H100', suffix: '(VLLM)', gpu: 'NVIDIA H100' }, + }, + }, + }; + + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: data, + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + extraSequences: ['8k/1k'], + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + // Each chart graph receives rows from both sequences with a __seq tag + // landing BEFORE the __uorun tag so the resulting key is + // base__seq__uorun. + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + const synthKeys = e2eGraph.data.map((d) => String(d.hwKey)); + expect(synthKeys).toContain('h100_vllm__seq1k1k__uorun100'); + expect(synthKeys).toContain('h100_vllm__seq8k1k__uorun100'); + + // Hardware config carries the sequence in the label so the legend can + // tell the two H100 lines apart at a glance. + expect(result.hardwareConfig['h100_vllm__seq1k1k__uorun100'].label).toContain('1K / 1K'); + expect(result.hardwareConfig['h100_vllm__seq8k1k__uorun100'].label).toContain('8K / 1K'); + + // Base GPU prefix survives both suffixes — getModelSortIndex / + // isKnownGpu / getVendor all use split('_')[0]. + expect('h100_vllm__seq1k1k__uorun100'.split('_')[0]).toBe('h100'); + }); + + it('falls back to single-sequence behavior when extraSequences only contains the primary', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + extraSequences: ['1k/1k'], // duplicate — should dedup down to single-sequence + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + // No __seq suffix when only one sequence is effectively active. + expect(e2eGraph.data.every((d) => !String(d.hwKey).includes('__seq'))).toBe(true); + }); }); // Pull a hue out of an `oklch(L C H)` string for assertions below. diff --git a/packages/app/src/lib/unofficial-merge.ts b/packages/app/src/lib/unofficial-merge.ts index 248a1e62..3167e0bd 100644 --- a/packages/app/src/lib/unofficial-merge.ts +++ b/packages/app/src/lib/unofficial-merge.ts @@ -20,6 +20,8 @@ import type { import { processOverlayChartData } from '@/components/inference/utils'; import type { HardwareEntry } from '@/lib/constants'; import { overlayRunIndex } from '@/lib/overlay-run-style'; +import type { Sequence } from '@/lib/data-mappings'; +import { makeSeqSynthHardwareEntry, makeSeqSynthKey } from '@/lib/sequence-synth-key'; const SYNTH_KEY_DELIM = '__uorun'; @@ -86,6 +88,14 @@ interface MergeArgs { unofficialChartData: UnofficialChartDataMap | null; selectedModel: string; selectedSequence: string; + /** + * Additional sequences to overlay alongside `selectedSequence`. When this + * list is non-empty, the merger iterates over (primary + extras), fetching + * each sequence's overlay group separately and rewriting every synth hwKey + * with a `__seq` suffix so (run, GPU, sequence) triples land on + * distinct legend lines. + */ + extraSequences?: string[]; selectedYAxisMetric: string; selectedXAxisMetric: string | null; selectedE2eXAxisMetric: string | null; @@ -130,6 +140,7 @@ export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { unofficialChartData, selectedModel, selectedSequence, + extraSequences = [], selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric, @@ -138,14 +149,31 @@ export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { chartDefinitions, } = args; - const dataKey = `${selectedModel}_${selectedSequence}`; - const overlayGroup = unofficialChartData?.[dataKey]; - if (!overlayGroup) { + // Iterate primary + extras. Drop duplicates and any sequence with no overlay + // group — when none of the requested sequences have overlay data the merge + // is a no-op. + const requestedSequences = (() => { + const seen = new Set(); + const out: string[] = []; + for (const s of [selectedSequence, ...extraSequences]) { + if (!s || seen.has(s)) continue; + seen.add(s); + out.push(s); + } + return out; + })(); + const sequencesWithData = requestedSequences.filter( + (s) => unofficialChartData?.[`${selectedModel}_${s}`], + ); + if (sequencesWithData.length === 0) { return { graphs: inputGraphs, hardwareConfig, colorOverrides: {} }; } + const isMultiSequence = requestedSequences.length > 1; + // When there are no official graphs but caller supplied chartDefinitions, // synthesize empty stubs so the merge still has a place to inject points. + // (Stub uses the primary sequence label.) const graphs: RenderableGraph[] = inputGraphs.length === 0 && chartDefinitions ? buildStubGraphsForMerge(selectedModel, selectedSequence, chartDefinitions) @@ -155,21 +183,21 @@ export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { const colorOverrides: Record = {}; /** - * Process overlay rows for one chart type: re-key by (run, origHwKey), + * Process overlay rows for one chart type: re-key by (run, origHwKey [, seq]), * synthesize a HardwareEntry on first encounter, and apply the same * metric/x-axis pipeline that `useChartData` runs on official rows so the * resulting points sit in the same coordinate space. * - * No color override is set: the synth hwKey preserves the original GPU base - * prefix, so the standard vendor-zone color generator distributes hues - * across all (official + synth) keys for a vendor automatically — that's - * how two NVIDIA GPUs from one unofficial run end up as different shades - * of green rather than two copies of the same overlay-palette color. + * When multiple sequences are selected, the synth hwKey is suffixed with + * `__seq` BEFORE the `__uorun` suffix so each (run, gpu, seq) + * triple lands on its own legend line. The synth label appends the sequence + * (e.g. "B200 — 1K/1K") so users can tell the lines apart. */ const processForChart = ( chartType: 'e2e' | 'interactivity', rawRows: InferenceData[], overlayHwConfig: HardwareConfig, + seqStr: string, ): InferenceData[] => { if (rawRows.length === 0) return []; const effectiveXMetric = chartType === 'e2e' ? selectedE2eXAxisMetric : selectedXAxisMetric; @@ -186,12 +214,26 @@ export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { // data exists). Fall back to the original hwKey untouched. if (!run) return row; const origHwKey = String(row.hwKey); - const synthHwKey = makeSynthHwKey(origHwKey, run.id); + // When multi-sequence is on, suffix the hwKey with `__seq` so + // (gpu, sequence) splits BEFORE the per-run split applied below. The + // resulting key shape `${base}__seq__uorun` is what + // parseSynthHwKey expects (strips the trailing `__uorun`) so other + // unofficial-aware helpers continue to recover the (synth-without-run) + // key. + const seqAdjustedKey = isMultiSequence + ? makeSeqSynthKey(origHwKey, seqStr as Sequence) + : origHwKey; + const synthHwKey = makeSynthHwKey(seqAdjustedKey, run.id); if (!(synthHwKey in mergedHardwareConfig)) { const origEntry = hardwareConfig[origHwKey] ?? overlayHwConfig[origHwKey]; + // Build entry off of the seq-tagged label when in multi-sequence mode + // so the legend reads e.g. "B200 — 1K/1K" rather than just "B200". + const seqAdjustedEntry = isMultiSequence + ? makeSeqSynthHardwareEntry(origEntry, origHwKey, seqStr as Sequence, seqAdjustedKey) + : origEntry; mergedHardwareConfig[synthHwKey] = makeSynthHardwareEntry( - origEntry, - origHwKey, + seqAdjustedEntry, + seqAdjustedKey, run, synthHwKey, ); @@ -202,11 +244,22 @@ export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { const mergedGraphs: RenderableGraph[] = graphs.map((g) => { const ct = g.chartDefinition.chartType as 'e2e' | 'interactivity'; - const overlayRows = ct === 'e2e' ? overlayGroup.e2e.data : overlayGroup.interactivity.data; - const overlayHwCfg = ct === 'e2e' ? overlayGroup.e2e.gpus : overlayGroup.interactivity.gpus; - const merged = processForChart(ct, overlayRows, overlayHwCfg); - if (merged.length === 0) return g; - return { ...g, data: [...g.data, ...merged] }; + // Accumulate per-sequence overlay rows into the graph's data array. + let mergedData: InferenceData[] = g.data; + let appended = false; + for (const seqStr of sequencesWithData) { + const overlayGroup = unofficialChartData![`${selectedModel}_${seqStr}`]; + const overlayRows = ct === 'e2e' ? overlayGroup.e2e.data : overlayGroup.interactivity.data; + const overlayHwCfg = ct === 'e2e' ? overlayGroup.e2e.gpus : overlayGroup.interactivity.gpus; + const overlay = processForChart(ct, overlayRows, overlayHwCfg, seqStr); + if (overlay.length > 0) { + if (!appended) mergedData = [...mergedData]; + mergedData.push(...overlay); + appended = true; + } + } + if (!appended) return g; + return { ...g, data: mergedData }; }); return { diff --git a/packages/app/src/lib/url-state.ts b/packages/app/src/lib/url-state.ts index a962bca3..64eda87d 100644 --- a/packages/app/src/lib/url-state.ts +++ b/packages/app/src/lib/url-state.ts @@ -20,6 +20,7 @@ const URL_STATE_KEYS = [ 'g_runid', // Inference 'i_seq', + 'i_seq_extra', 'i_prec', 'i_metric', 'i_xmetric', @@ -65,6 +66,7 @@ export const PARAM_DEFAULTS: Record = { g_rundate: '', g_runid: '', i_seq: '8k/1k', + i_seq_extra: '', i_prec: 'fp4', i_metric: 'y_tpPerGpu', i_xmetric: 'p99_ttft', From c9c31a7c2f2dc10edff2e42a043a49d0303f13d5 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 14 Jun 2026 03:11:45 +0000 Subject: [PATCH 10/10] fix(unofficial): sync merge toggle on popstate, survive URL cleanup race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related Cursor findings on the `i_uoff_ingested` URL param: 1. **Merge toggle cleared after URL hydration** — the post-mount sync was reading `window.location.search`, but `url-state.ts` strips share-link params on a deferred `setTimeout(0)`. If that cleanup ran before our effect, a shared link with `i_uoff_ingested=1` would silently turn the toggle off. Both the `useState` initializer and the hydration effect now read `readUrlParams()` (in-memory snapshot captured before cleanup). 2. **Popstate skips ingested toggle sync** — the `popstate` listener re-loaded run data but never re-read `i_uoff_ingested`. Browser back/forward could leave the toggle out of sync with the URL. The load function now takes an `isPopstate` flag and re-syncs the toggle from `window.location.search` only on popstate (not on the initial call, which would race with the cleanup above). Also: drop a stale lint nit in `unofficial-merge.test.ts` by switching the hue-extraction regex to a named-capture-group + unicode flag. Co-authored-by: Bryan Shan --- .../components/unofficial-run-provider.tsx | 41 +++++++++++++------ packages/app/src/lib/unofficial-merge.test.ts | 4 +- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/packages/app/src/components/unofficial-run-provider.tsx b/packages/app/src/components/unofficial-run-provider.tsx index a17fa2e0..01f58e96 100644 --- a/packages/app/src/components/unofficial-run-provider.tsx +++ b/packages/app/src/components/unofficial-run-provider.tsx @@ -20,6 +20,7 @@ import { normalizeEvalHardwareKey } from '@/lib/chart-utils'; import chartDefinitions from '@/components/inference/inference-chart-config.json'; import { transformBenchmarkRows } from '@/lib/benchmark-transform'; import { Model, Sequence } from '@/lib/data-mappings'; +import { readUrlParams } from '@/lib/url-state'; interface UnofficialRunInfo { id: number; @@ -186,21 +187,23 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { >([]); // Promote unofficial rows to ingested-style series. Initial value seeded - // synchronously when running in the browser so the toggle starts checked - // when the user shares a URL like `?unofficialrun=…&i_uoff_ingested=1`. - // Under SSR the value is false; we sync from the URL again after mount via - // the popstate listener attached below. + // from the URL snapshot in `url-state.ts` (which captures share-link params + // at module load BEFORE its deferred cleanup strips them) so a share link + // like `?unofficialrun=…&i_uoff_ingested=1` starts checked. Reading + // `window.location.search` here would race against that cleanup and lose + // the value. Under SSR the value is false; we re-sync after mount and on + // popstate via the listener attached below. const [mergeAsIngested, setMergeAsIngestedRaw] = useState(() => { if (typeof window === 'undefined') return false; - const sp = new URLSearchParams(window.location.search); - return sp.get('i_uoff_ingested') === '1'; + return readUrlParams().i_uoff_ingested === '1'; }); // Re-sync after hydration in case the server rendered with the SSR default. + // Source of truth is `readUrlParams()` (snapshot captured before url-state's + // deferred cleanup), not the live address bar. useEffect(() => { if (typeof window === 'undefined') return; - const sp = new URLSearchParams(window.location.search); - const fromUrl = sp.get('i_uoff_ingested') === '1'; - setMergeAsIngestedRaw((prev) => (prev !== fromUrl ? fromUrl : prev)); + const fromUrl = readUrlParams().i_uoff_ingested === '1'; + setMergeAsIngestedRaw((prev) => (prev === fromUrl ? prev : fromUrl)); }, []); const setMergeAsIngested = useCallback((v: boolean) => { setMergeAsIngestedRaw(v); @@ -380,8 +383,19 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { ); useEffect(() => { - const load = () => { + const load = (isPopstate: boolean) => { const params = new URLSearchParams(window.location.search); + // On popstate the browser restored the previous history entry, so the + // URL is the source of truth for the merge toggle. Don't re-sync on + // the initial mount call — by then `url-state.ts`'s deferred cleanup + // may have stripped `i_uoff_ingested` from the address bar, and the + // seeded `useState` already reflects the snapshot. (`setMergeAsIngested` + // writes `i_uoff_ingested=1` via `replaceState` when the user toggles, + // so back/forward replays it.) + if (isPopstate) { + setMergeAsIngestedRaw(params.get('i_uoff_ingested') === '1'); + } + let unofficialRunIdParam: string | undefined; for (const [key, value] of params) { if (UNOFFICIAL_RUN_PARAM_RE.test(key) && value) { @@ -424,9 +438,10 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { .finally(() => setLoading(false)); }; - load(); - window.addEventListener('popstate', load); - return () => window.removeEventListener('popstate', load); + load(false); + const onPopstate = () => load(true); + window.addEventListener('popstate', onPopstate); + return () => window.removeEventListener('popstate', onPopstate); }, []); return ( diff --git a/packages/app/src/lib/unofficial-merge.test.ts b/packages/app/src/lib/unofficial-merge.test.ts index 09c9e016..38a64db9 100644 --- a/packages/app/src/lib/unofficial-merge.test.ts +++ b/packages/app/src/lib/unofficial-merge.test.ts @@ -410,8 +410,8 @@ describe('mergeUnofficialIntoOfficial', () => { // Pull a hue out of an `oklch(L C H)` string for assertions below. function hueOf(s: string): number { - const m = s.match(/oklch\([^)]*\s+([\d.]+)\)/); - return m ? Number(m[1]) : NaN; + const m = s.match(/oklch\([^)]*\s+(?[\d.]+)\)/u); + return m?.groups?.hue ? Number(m.groups.hue) : NaN; } describe('synth hwKey color integration with generateVendorColors', () => {