diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts index 7e300f45..e3b58116 100644 --- a/packages/app/cypress/support/mock-data.ts +++ b/packages/app/cypress/support/mock-data.ts @@ -250,6 +250,9 @@ export function createMockInferenceContext( activePresetId: null, setActivePresetId: namedStub('setActivePresetId'), presetGuardRef: { current: false } as React.RefObject, + hwColorOverrides: {}, + extraSequences: [], + setExtraSequences: namedStub('setExtraSequences'), compareGpuPair: null, ...overrides, }; @@ -442,6 +445,8 @@ export function createMockUnofficialRunContext( ): UnofficialRunContextType { return { isUnofficialRun: false, + mergeAsIngested: false, + setMergeAsIngested: namedStub('setMergeAsIngested'), unofficialRunInfo: null, unofficialRunInfos: [], runIndexByUrl: {}, diff --git a/packages/app/src/app/api/unofficial-run/route.test.ts b/packages/app/src/app/api/unofficial-run/route.test.ts index be71324b..e6fc0747 100644 --- a/packages/app/src/app/api/unofficial-run/route.test.ts +++ b/packages/app/src/app/api/unofficial-run/route.test.ts @@ -207,6 +207,54 @@ describe('normalizeArtifactRows', () => { expect(rows.every((r) => r.date === '2026-03-11')).toBe(true); }); + it('normalizes the offline B300 TRT compatibility row', () => { + const rows = normalizeArtifactRows( + [ + rawRow({ + hw: 'b300', + model: 'deepseek-ai/DeepSeek-V4-Pro', + infmax_model_prefix: 'dsv4', + framework: 'trt', + precision: 'fp4', + isl: 8192, + osl: 625, + conc: 32, + prefill_tp: 4, + prefill_ep: 1, + prefill_dp_attention: false, + prefill_num_workers: 0, + decode_tp: 4, + decode_ep: 1, + decode_dp_attention: false, + decode_num_workers: 0, + num_prefill_gpu: 4, + num_decode_gpu: 4, + spec_decoding: 'mtp', + tput_per_gpu: 489.17, + output_tput_per_gpu: 489.17, + mean_tpot: 0.01635, + }), + ], + '2026-06-13', + ); + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ + hardware: 'b300', + framework: 'trt', + model: 'dsv4', + precision: 'fp4', + spec_method: 'mtp', + isl: 8192, + osl: 625, + conc: 32, + decode_tp: 4, + decode_ep: 1, + num_decode_gpu: 4, + }); + expect(rows[0].metrics.output_tput_per_gpu).toBe(489.17); + expect(rows[0].metrics.mean_tpot).toBe(0.01635); + }); + it('surfaces the per-worker measured-power array on the BenchmarkRow', () => { const workers = [ { diff --git a/packages/app/src/app/api/unofficial-run/route.ts b/packages/app/src/app/api/unofficial-run/route.ts index 072c99f1..a3d9e5a1 100644 --- a/packages/app/src/app/api/unofficial-run/route.ts +++ b/packages/app/src/app/api/unofficial-run/route.ts @@ -224,15 +224,22 @@ async function processSingleRun( const bmkArtifact = artifacts .filter((a) => a.name === 'results_bmk') .toSorted((a, b) => b.id - a.id)[0]; + // Fallback: some workflows (e.g. the Mock-ascend uploader) emit one + // `bmk__conc__` artifact per concurrency instead of a + // single aggregated `results_bmk`. When the canonical artifact is absent, + // gather everything matching `bmk_*` and concatenate the rows. + const perConfigBmkArtifacts = bmkArtifact + ? [] + : artifacts.filter((a) => a.name.startsWith('bmk_')); const evalArtifact = artifacts .filter((a) => a.name === 'eval_results_all') .toSorted((a, b) => b.id - a.id)[0]; - if (!bmkArtifact && !evalArtifact) { + if (!bmkArtifact && perConfigBmkArtifacts.length === 0 && !evalArtifact) { return { errorResponse: NextResponse.json( { - error: `No results_bmk or eval_results_all artifact found for runId ${runId}`, + error: `No results_bmk, bmk_*, or eval_results_all artifact found for runId ${runId}`, }, { status: 404 }, ), @@ -253,6 +260,17 @@ async function processSingleRun( ); if (errorResponse) return { errorResponse }; benchmarks = normalizeArtifactRows(rows, date, runUrl || null); + } else if (perConfigBmkArtifacts.length > 0) { + const allRows: Record[] = []; + for (const artifact of perConfigBmkArtifacts) { + const { rows, errorResponse } = await downloadArtifactRows( + artifact.archive_download_url, + githubToken, + ); + if (errorResponse) return { errorResponse }; + allRows.push(...rows); + } + benchmarks = normalizeArtifactRows(allRows, date, runUrl || null); } if (evalArtifact) { diff --git a/packages/app/src/components/GlobalFilterContext.tsx b/packages/app/src/components/GlobalFilterContext.tsx index 62ae64ff..f88fb696 100644 --- a/packages/app/src/components/GlobalFilterContext.tsx +++ b/packages/app/src/components/GlobalFilterContext.tsx @@ -36,7 +36,11 @@ import { Sequence, SEQUENCE_OPTIONS, } from '@/lib/data-mappings'; -import { computeAutoSwitchDecision } from '@/lib/unofficial-run-auto-switch'; +import { + computeAutoSwitchDecision, + computeUnofficialOverrideDecision, + selectUnofficialDefaultSequence, +} from '@/lib/unofficial-run-auto-switch'; import type { AvailabilityRow, WorkflowInfoResponse } from '@/lib/api'; interface RunInfo { @@ -259,6 +263,29 @@ export function GlobalFilterProvider({ } }, [unofficialAvailable, selectedModel]); + // TEMPORARY (this branch only): prefer `8K / 256` when an unofficial run + // provides it and the URL didn't pin `i_seq`. Otherwise use a sequence that + // is actually present in the run so an uncommon shape such as 8K/625 is + // visible on first load. Manual sequence picks stick because the URL gets + // `i_seq` written by the URL-sync effect after the override fires. + const lastUnofficialSeqOverrideRef = useRef(''); + useEffect(() => { + const decision = computeUnofficialOverrideDecision( + unofficialAvailable, + getUrlParam('i_seq'), + lastUnofficialSeqOverrideRef.current, + ); + lastUnofficialSeqOverrideRef.current = decision.nextKey; + if (decision.shouldOverride) { + const sequence = selectUnofficialDefaultSequence( + unofficialAvailable, + selectedModel, + getUrlParam('g_model'), + ); + if (sequence !== null) setSelectedSequence(sequence); + } + }, [unofficialAvailable, selectedModel]); + // Sequences available for the selected model (DB ∪ unofficial run for this model) const availableSequences = useMemo(() => { const unofficialSeqs = unofficialAvailable diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index fc992ee4..be13c3a9 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -21,10 +21,15 @@ import { import { useGlobalFilters } from '@/components/GlobalFilterContext'; import type { + ChartDefinition, InferenceChartContextType, InferenceData, TrackedConfig, } from '@/components/inference/types'; +import { useUnofficialRun } from '@/components/unofficial-run-provider'; +import chartDefinitions from '@/components/inference/inference-chart-config.json'; +import { mergeUnofficialIntoOfficial } from '@/lib/unofficial-merge'; +import { computeUnofficialOverrideDecision } from '@/lib/unofficial-run-auto-switch'; import { Button } from '@/components/ui/button'; import { Dialog, @@ -43,7 +48,7 @@ import { import { useUrlState } from '@/hooks/useUrlState'; import { buildAvailabilityHwKey } from '@/lib/chart-utils'; import { getHardwareConfig, getModelSortIndex, isKnownGpu, TABLEAU_10 } from '@/lib/constants'; -import { getModelExclusion, MODEL_PREFIX_MAPPING } from '@/lib/data-mappings'; +import { getModelExclusion, MODEL_PREFIX_MAPPING, Sequence } from '@/lib/data-mappings'; import { MtpEngineConflictToast, type MtpEngineConflictDetail, @@ -164,6 +169,32 @@ export function InferenceProvider({ () => getUrlParam('i_gradlabel') === '1', ); const [showLineLabels, setShowLineLabels] = useState(() => getUrlParam('i_linelabel') === '1'); + + // ── Multi-sequence overlay ──────────────────────────────────────────────── + // `extraSequences` is the inference-tab-only list of additional ISL/OSL + // shapes to render alongside the primary `selectedSequence`. Empty by + // default — when non-empty, `useChartData` pulls rows for all + // [primary, ...extras] and suffixes each row's hwKey with `__seq` + // so e.g. B200@1K/1K and B200@8K/1K surface as two distinct legend lines. + const [extraSequences, setExtraSequencesRaw] = useState(() => { + const urlExtra = getUrlParam('i_seq_extra'); + if (!urlExtra) return []; + const valid = urlExtra + .split(',') + .filter((s): s is Sequence => Object.values(Sequence).includes(s as Sequence)); + return valid; + }); + const setExtraSequences = useCallback((seqs: Sequence[]) => { + // Dedup defensively; preserve order. + const seen = new Set(); + setExtraSequencesRaw( + seqs.filter((s) => { + if (seen.has(s)) return false; + seen.add(s); + return true; + }), + ); + }, []); const [showSpeedOverlay, setShowSpeedOverlay] = useState(() => getUrlParam('i_speed') === '1'); const [showMinecraftOverlay, setShowMinecraftOverlay] = useState( () => getUrlParam('i_mc') === '1', @@ -202,11 +233,25 @@ export function InferenceProvider({ // ── Data fetching (gated by isActive) ────────────────────────────────────── const latestDate = availableDates.length > 0 ? availableDates.at(-1) : undefined; + // Drop extras that aren't currently available or duplicate the primary — + // mirrors the `effectivePrecisions` pattern so a stale URL/preset selection + // doesn't pin a sequence the current model can't render. + const effectiveExtraSequences = useMemo(() => { + const out: Sequence[] = []; + const availSet = new Set(availableSequences as string[]); + for (const s of extraSequences) { + if (s === effectiveSequence) continue; + if (!availSet.has(s)) continue; + out.push(s); + } + return out; + }, [extraSequences, availableSequences, effectiveSequence]); + const { - graphs, + graphs: officialGraphs, loading: chartDataLoading, error: chartDataError, - hardwareConfig, + hardwareConfig: officialHardwareConfig, } = useChartData( selectedModel, effectiveSequence, @@ -222,9 +267,87 @@ export function InferenceProvider({ effectiveRunDate, isActive, latestDate, + effectiveExtraSequences, compareGpuPair ?? null, ); + // ── Promote unofficial rows to first-class series when toggled ──────────── + // When `mergeAsIngested` is on, overlay points are re-keyed with per-run + // synth hwKeys and merged into `graphs` so they participate in the same + // filter/optimal-only/legend pipeline as official data. The resulting + // `hwColorOverrides` map is consumed by ScatterGraph's color resolver. + const { + mergeAsIngested, + unofficialChartData, + unofficialRunInfos, + runIndexByUrl, + availableModelsAndSequences: unofficialAvailable, + } = useUnofficialRun(); + + // TEMPORARY (this branch only): default the y-axis metric to "Output Token + // Throughput per GPU" when an unofficial run loads and the URL didn't pin + // `i_metric`. Mirrors the sequence override in GlobalFilterContext — manual + // metric picks stick because the URL gets `i_metric` written after the + // override fires. + const lastUnofficialMetricOverrideRef = useRef(''); + useEffect(() => { + const decision = computeUnofficialOverrideDecision( + unofficialAvailable, + getUrlParam('i_metric'), + lastUnofficialMetricOverrideRef.current, + ); + lastUnofficialMetricOverrideRef.current = decision.nextKey; + if (decision.shouldOverride) { + setSelectedYAxisMetric('y_outputTputPerGpu'); + } + }, [unofficialAvailable]); + + const { graphs, hardwareConfig, hwColorOverrides } = useMemo(() => { + if (!mergeAsIngested) { + return { + graphs: officialGraphs, + hardwareConfig: officialHardwareConfig, + hwColorOverrides: {} as Record, + }; + } + const merged = mergeUnofficialIntoOfficial({ + graphs: officialGraphs, + hardwareConfig: officialHardwareConfig, + unofficialChartData, + selectedModel, + selectedSequence: effectiveSequence, + extraSequences: effectiveExtraSequences, + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + runIndexByUrl, + unofficialRunInfos: unofficialRunInfos.map((r) => ({ + id: r.id, + branch: r.branch, + url: r.url, + })), + chartDefinitions: chartDefinitions as ChartDefinition[], + }); + return { + graphs: merged.graphs, + hardwareConfig: merged.hardwareConfig, + hwColorOverrides: merged.colorOverrides, + }; + }, [ + mergeAsIngested, + officialGraphs, + officialHardwareConfig, + unofficialChartData, + selectedModel, + effectiveSequence, + effectiveExtraSequences, + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + runIndexByUrl, + unofficialRunInfos, + ]); + // For GPU comparison date picker — use shared availability data from global filters const dbModelKeys = useMemo( () => DISPLAY_MODEL_TO_DB[selectedModel] ?? [selectedModel], @@ -570,6 +693,13 @@ export function InferenceProvider({ // reset commits as soon as data for the new model arrives — without this, switching models // bails on the empty-data tick and never re-fires, leaving the legend at the prior intersection. const precisionsKey = effectivePrecisions.join(','); + // Include `extraSequences` and `mergeAsIngested` in the reset key so that + // toggling either flips the active set to include the newly-introduced + // synth hwKeys. Without these, `useChartDataFilter.reconcileActiveSet` only + // removes stale keys — never adds new ones — so merged or sequence-tagged + // series silently render hidden. (Addresses Cursor "Merged series stay + // legend-inactive" review finding.) + const extraSequencesKey = effectiveExtraSequences.join(','); const lastHwResetKeyRef = useRef(''); // Restore legend-active selection from URL on first availability of @@ -604,7 +734,7 @@ export function InferenceProvider({ } } setActiveHwTypes(restored); - lastHwResetKeyRef.current = `${selectedModel}|${effectiveSequence}|${precisionsKey}`; + lastHwResetKeyRef.current = `${selectedModel}|${effectiveSequence}|${precisionsKey}|${extraSequencesKey}|${mergeAsIngested ? '1' : '0'}`; setPendingActiveHwTypes(null); }, [ pendingActiveHwTypes, @@ -613,6 +743,8 @@ export function InferenceProvider({ selectedModel, effectiveSequence, precisionsKey, + extraSequencesKey, + mergeAsIngested, setActiveHwTypes, ]); @@ -620,7 +752,7 @@ export function InferenceProvider({ if (pendingHwFilterRef.current) return; if (pendingActiveHwTypes) return; if (hwTypesWithData.size === 0) return; - const key = `${selectedModel}|${effectiveSequence}|${precisionsKey}`; + const key = `${selectedModel}|${effectiveSequence}|${precisionsKey}|${extraSequencesKey}|${mergeAsIngested ? '1' : '0'}`; if (lastHwResetKeyRef.current === key) return; lastHwResetKeyRef.current = key; const presetFilter = presetHwFilterRef.current; @@ -654,6 +786,8 @@ export function InferenceProvider({ selectedModel, effectiveSequence, precisionsKey, + extraSequencesKey, + mergeAsIngested, hwTypesWithData, exclusion, pendingActiveHwTypes, @@ -803,6 +937,7 @@ export function InferenceProvider({ i_speed: showSpeedOverlay ? '1' : '', i_mc: showMinecraftOverlay ? '1' : '', i_active: iActiveStr, + i_seq_extra: effectiveExtraSequences.join(','), }, [ selectedYAxisMetric, @@ -823,6 +958,7 @@ export function InferenceProvider({ showSpeedOverlay, showMinecraftOverlay, iActiveStr, + effectiveExtraSequences, ], ); @@ -1030,6 +1166,9 @@ export function InferenceProvider({ activePresetId, setActivePresetId, presetGuardRef, + hwColorOverrides, + extraSequences: effectiveExtraSequences, + setExtraSequences, compareGpuPair: compareGpuPair ?? null, }), [ @@ -1084,6 +1223,9 @@ export function InferenceProvider({ removeTrackedConfig, clearTrackedConfigs, activePresetId, + hwColorOverrides, + effectiveExtraSequences, + setExtraSequences, compareGpuPair, ], ); diff --git a/packages/app/src/components/inference/hooks/useChartData.test.ts b/packages/app/src/components/inference/hooks/useChartData.test.ts index 73582998..8dd06ee8 100644 --- a/packages/app/src/components/inference/hooks/useChartData.test.ts +++ b/packages/app/src/components/inference/hooks/useChartData.test.ts @@ -60,6 +60,20 @@ describe('filterByGPU', () => { it('excludes when neither key nor alias matches', () => { expect(filterByGPU([{ hwKey: 'unknown' }], ['h100'], {})).toHaveLength(0); }); + + it('matches multi-sequence synth keys by stripping the __seq suffix', () => { + // When extraSequences is on, useChartData rewrites each row's hwKey to + // `${origHwKey}__seq` so (gpu, sequence) splits into separate + // legend lines. The GPU selector still picks canonical keys, so the + // filter has to look past the suffix. + const data = [ + { hwKey: 'b200_vllm__seq1k1k' }, + { hwKey: 'b200_vllm__seq8k1k' }, + { hwKey: 'h100__seq1k1k' }, + ]; + const result = filterByGPU(data, ['b200_vllm'], {}); + expect(result.map((d) => d.hwKey)).toEqual(['b200_vllm__seq1k1k', 'b200_vllm__seq8k1k']); + }); }); describe('flipRooflineDirection', () => { diff --git a/packages/app/src/components/inference/hooks/useChartData.ts b/packages/app/src/components/inference/hooks/useChartData.ts index 13b22951..d409fe4b 100644 --- a/packages/app/src/components/inference/hooks/useChartData.ts +++ b/packages/app/src/components/inference/hooks/useChartData.ts @@ -16,11 +16,17 @@ import { filterDataByCostLimit } from '@/components/inference/utils'; import { useBenchmarks, benchmarkQueryOptions } from '@/hooks/api/use-benchmarks'; import { GPU_ALIAS_TO_CANONICAL, + getHardwareConfig, getModelSortIndex, hardwareKeyMatchesAnyBase, } from '@/lib/constants'; import { transformBenchmarkRows } from '@/lib/benchmark-transform'; import type { Model, Sequence } from '@/lib/data-mappings'; +import { + makeSeqSynthHardwareEntry, + makeSeqSynthKey, + makeSequenceFilter, +} from '@/lib/sequence-synth-key'; import { calculateCostsForGpus, calculatePowerForGpus } from '@/lib/utils'; /** Build deduplicated comparison dates, excluding the main run date. */ @@ -39,7 +45,13 @@ export function buildComparisonDates( return [...new Set(dates.filter((d) => d !== selectedRunDate))]; } -/** Filter data by GPU key, resolving aliases to canonical keys. */ +/** Filter data by GPU key, resolving aliases to canonical keys. + * + * Multi-sequence overlay rows arrive with hwKeys of the form + * `${origHwKey}__seq`. The GPU selector still picks canonical keys, + * so strip the `__seq` suffix before matching: a selected `b200_vllm` should + * match `b200_vllm__seq1k1k`. + */ export function filterByGPU( data: T[], selectedGPUs: string[], @@ -47,7 +59,9 @@ export function filterByGPU( ): T[] { if (selectedGPUs.length === 0) return data; return data.filter((dp) => { - const hwKey = String(dp.hwKey); + const rawKey = String(dp.hwKey); + const seqIdx = rawKey.indexOf('__seq'); + const hwKey = seqIdx === -1 ? rawKey : rawKey.slice(0, seqIdx); const canonical = aliasMap[hwKey]; return ( selectedGPUs.includes(hwKey) || (canonical !== undefined && selectedGPUs.includes(canonical)) @@ -83,6 +97,13 @@ export function useChartData( selectedRunDate?: string, enabled = true, latestAvailableDate?: string, + /** + * Additional sequences to overlay alongside `selectedSequence`. When this + * list is non-empty, rows for ALL (primary + extras) sequences are pulled + * in, and each row's hwKey gets a `__seq` suffix so the (hw, + * sequence) pair surfaces as its own series in the legend. + */ + extraSequences: Sequence[] = [], /** When set, only series for these two registry GPU keys are shown (compare pages). */ compareGpuPair?: readonly [string, string] | null, ) { @@ -122,27 +143,71 @@ export function useChartData( // so we derive a stable key from dataUpdatedAt timestamps to avoid cascading memo invalidation. const comparisonDataKey = comparisonQueries.map((q) => q.dataUpdatedAt).join(','); + // Build the full sequence list (primary + extras, dedup, drop unresolvable). + // Order matters for synth-key stability: primary always comes first so that + // when `extraSequences` is later cleared the row hwKeys land back on the + // primary's compact form. + const allSequences = useMemo(() => { + const seen = new Set(); + const out: Sequence[] = []; + for (const s of [selectedSequence, ...extraSequences]) { + if (!s || seen.has(s)) continue; + if (!sequenceToIslOsl(s)) continue; + seen.add(s); + out.push(s); + } + return out; + }, [selectedSequence, extraSequences]); + + // Pre-compute a per-sequence ISL/OSL filter. Indexed parallel to allSequences. + type SeqFilter = ((r: { isl: number; osl: number }) => boolean) | null; + const sequenceFilters = useMemo( + () => allSequences.map((s) => makeSequenceFilter(s)), + [allSequences], + ); + + const isMultiSequence = allSequences.length > 1; + + // Match a row against the active sequence list, returning the matched + // sequence (or null). Used to drive hwKey suffixing for the overlay case. + const matchSequence = useMemo( + () => + (r: { isl: number; osl: number }): Sequence | null => { + for (let i = 0; i < sequenceFilters.length; i++) { + const f = sequenceFilters[i]; + if (f && f(r)) return allSequences[i]; + } + return null; + }, + [sequenceFilters, allSequences], + ); + // Merge main rows with comparison date rows. // Stamp each row with the *requested* date (not the actual DB date) so that // GPUGraph's activeDates filter (keyed by user-selected date) matches the points. - const sequenceIslOsl = useMemo(() => sequenceToIslOsl(selectedSequence), [selectedSequence]); const rows = useMemo(() => { - if (!allRows || !sequenceIslOsl) return []; - const seqFilter = (r: { isl: number; osl: number }) => - r.isl === sequenceIslOsl.isl && r.osl === sequenceIslOsl.osl; - const seqFiltered = allRows.filter(seqFilter); - - // For each (hw, framework, spec_method, disagg, precision) group, keep only - // rows from the most recent date. When parallelism settings change between runs, - // old config_ids create stale data points under the same legend line — drop them. + if (!allRows || allSequences.length === 0) return []; + + const seqMatcher = matchSequence; + type RowWithSeq = (typeof allRows)[number] & { _seq: Sequence }; + const seqFiltered: RowWithSeq[] = []; + for (const r of allRows) { + const seq = seqMatcher(r); + if (seq) seqFiltered.push({ ...r, _seq: seq }); + } + + // For each (hw, framework, spec_method, disagg, precision, sequence) group, + // keep only rows from the most recent date. The sequence is part of the + // dedup key so 1K/1K and 8K/1K rows don't shadow each other when both are + // selected. const maxDatePerGroup = new Map(); for (const r of seqFiltered) { - const key = `${r.hardware}|${r.framework}|${r.spec_method}|${r.disagg}|${r.precision}`; + const key = `${r.hardware}|${r.framework}|${r.spec_method}|${r.disagg}|${r.precision}|${r._seq}`; const cur = maxDatePerGroup.get(key); if (!cur || r.date > cur) maxDatePerGroup.set(key, r.date); } const deduped = seqFiltered.filter((r) => { - const key = `${r.hardware}|${r.framework}|${r.spec_method}|${r.disagg}|${r.precision}`; + const key = `${r.hardware}|${r.framework}|${r.spec_method}|${r.disagg}|${r.precision}|${r._seq}`; return r.date === maxDatePerGroup.get(key); }); @@ -151,19 +216,70 @@ export function useChartData( ); if (comparisonDates.length === 0) return mainRows; const extraRows = comparisonQueries.flatMap((q, i) => - (q.data ?? []) - .filter(seqFilter) - .map((r) => ({ ...r, date: comparisonDates[i], actualDate: r.date })), + (q.data ?? []).flatMap((r) => { + const seq = seqMatcher(r); + if (!seq) return []; + return [{ ...r, _seq: seq, date: comparisonDates[i], actualDate: r.date }]; + }), ); return [...mainRows, ...extraRows]; - }, [allRows, sequenceIslOsl, comparisonDates, comparisonDataKey, selectedRunDate]); - - // Transform filtered rows into chart data + }, [allRows, allSequences, matchSequence, comparisonDates, comparisonDataKey, selectedRunDate]); + + // Transform filtered rows into chart data. + // + // When `isMultiSequence` is on, we run `transformBenchmarkRows` once per + // sequence and rewrite each output point's `hwKey` to `${origHwKey}__seq`. + // This is what makes (B200, 1K/1K) and (B200, 8K/1K) surface as separate + // legend lines (and separate roofline groups) rather than collapsing onto + // a single B200 series. const { chartData, hardwareConfig: rawHardwareConfig } = useMemo(() => { if (rows.length === 0) return { chartData: [] as InferenceData[][], hardwareConfig: {} as HardwareConfig }; - return transformBenchmarkRows(rows); - }, [rows]); + + if (!isMultiSequence) { + return transformBenchmarkRows(rows); + } + + type RowWithSeq = (typeof rows)[number] & { _seq: Sequence }; + const groupedBySeq = new Map(); + for (const r of rows as RowWithSeq[]) { + const arr = groupedBySeq.get(r._seq); + if (arr) arr.push(r); + else groupedBySeq.set(r._seq, [r]); + } + + const mergedHardware: HardwareConfig = {} as HardwareConfig; + // One InferenceData[] per chart definition, accumulating across sequences. + const mergedChart: InferenceData[][] = (chartDefinitions as ChartDefinition[]).map( + () => [] as InferenceData[], + ); + + for (const [seq, seqRows] of groupedBySeq) { + const { chartData: perSeq, hardwareConfig: perSeqHw } = transformBenchmarkRows(seqRows); + // Synth hw entries: one per (origHwKey, seq) pair seen this iteration. + for (const origHwKey of Object.keys(perSeqHw)) { + const synthHwKey = makeSeqSynthKey(origHwKey, seq); + if (synthHwKey in mergedHardware) continue; + mergedHardware[synthHwKey] = makeSeqSynthHardwareEntry( + getHardwareConfig(origHwKey), + origHwKey, + seq, + synthHwKey, + ); + } + // Append each chart def's data with hwKey rewritten to the synth key. + for (let i = 0; i < perSeq.length; i++) { + for (const point of perSeq[i]) { + mergedChart[i].push({ + ...point, + hwKey: makeSeqSynthKey(point.hwKey, seq), + }); + } + } + } + + return { chartData: mergedChart, hardwareConfig: mergedHardware }; + }, [rows, isMultiSequence]); // Sort hardware config — stabilize reference when keys haven't changed. // Different sequences for the same model often have the same GPU configs, diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index cbf64787..d5c6081f 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -665,6 +665,19 @@ export interface InferenceChartContextType { activePresetId: string | null; setActivePresetId: (id: string | null) => void; presetGuardRef: React.RefObject; + /** + * Per-hwKey CSS color overrides. Populated when unofficial-as-ingested + * merging is on so each synthesized run series gets the same per-run color + * the overlay legend would have shown. Empty when nothing is merged. + */ + hwColorOverrides: Record; + /** + * Additional ISL/OSL sequences to overlay on the inference chart alongside + * `selectedSequence`. Empty by default. When non-empty, each (GPU, sequence) + * pair becomes its own legend line via a `__seq` hwKey suffix. + */ + extraSequences: Sequence[]; + setExtraSequences: (sequences: Sequence[]) => void; /** Compare pages only: slug GPU pair used to filter benchmark series. */ compareGpuPair: readonly [string, string] | null; } diff --git a/packages/app/src/components/inference/ui/ChartControls.tsx b/packages/app/src/components/inference/ui/ChartControls.tsx index 27274f02..20428e54 100644 --- a/packages/app/src/components/inference/ui/ChartControls.tsx +++ b/packages/app/src/components/inference/ui/ChartControls.tsx @@ -8,7 +8,7 @@ import { useFeatureGate } from '@/lib/use-feature-gate'; import { useInference } from '@/components/inference/InferenceContext'; import { ModelSelector, - SequenceSelector, + MultiSequenceSelector, PrecisionSelector, } from '@/components/ui/chart-selectors'; import { DateRangePicker } from '@/components/ui/date-range-picker'; @@ -100,6 +100,8 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro setSelectedModel, selectedSequence, setSelectedSequence, + extraSequences, + setExtraSequences, selectedPrecisions, setSelectedPrecisions, selectedYAxisMetric, @@ -178,6 +180,22 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro setTimeout(trackCombinedFilters, 0); }; + const handleSequencesChange = (values: Sequence[]) => { + if (values.length === 0) return; + const [primary, ...extras] = values; + if (primary !== selectedSequence) { + handleSequenceChange(primary); + } + setExtraSequences(extras); + if (extras.length > 0) { + track('inference_sequence_overlay_changed', { + primary, + extras: extras.join(','), + count: values.length, + }); + } + }; + const handlePrecisionChange = (value: string[]) => { setSelectedPrecisions(value); track('inference_precision_selected', { @@ -246,9 +264,9 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro availableModels={availableModels} data-testid="model-selector" /> - { - if (!unofficialRunInfo || !getOverlayData) { + if (mergeAsIngested || !unofficialRunInfo || !getOverlayData) { return { e2e: null, interactivity: null }; } @@ -250,6 +253,7 @@ export default function ChartDisplay() { interactivity: processData(interactivityRaw, 'interactivity'), }; }, [ + mergeAsIngested, unofficialRunInfo, unofficialRunInfos, runIndexByUrl, diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index fa383ada..5201ca6f 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -165,10 +165,13 @@ const ScatterGraph = React.memo( trackedConfigs, addTrackedConfig, removeTrackedConfig, + hwColorOverrides, } = useInference(); const { isUnofficialRun, + mergeAsIngested, + setMergeAsIngested, activeOverlayHwTypes, setActiveOverlayHwTypes, allOverlayHwTypes, @@ -234,12 +237,23 @@ const ScatterGraph = React.memo( () => [...effectiveOfficialHwTypes], [effectiveOfficialHwTypes], ); - const { resolveColor, getCssColor } = useThemeColors({ + const { resolveColor: baseResolveColor, getCssColor } = useThemeColors({ highContrast, identifiers: activeHwKeys, activeKeys: activeOfficialKeys, }); + // Wrap resolveColor so synthesized unofficial-as-ingested hwKeys (provided + // by InferenceContext via `hwColorOverrides`) get their per-run palette + // color even when the vendor system would otherwise pick a GPU-derived hue. + const resolveColor = useCallback( + (identifier: string, hardwareKey?: string): string => { + if (identifier in hwColorOverrides) return hwColorOverrides[identifier]; + return baseResolveColor(identifier, hardwareKey); + }, + [baseResolveColor, hwColorOverrides], + ); + // --- Changelog --- const changelog = availableRuns ? availableRuns[selectedRunId]?.changelog || null : null; const highlightConfigSuffixes = useMemo(() => { @@ -1961,6 +1975,19 @@ const ScatterGraph = React.memo( chartRef.current?.dismissTooltip(); }, [selectedPrecisions, selectedYAxisMetric, hideNonOptimal, overlayData, chartId]); + // Clean up overlay DOM elements when overlayData is removed (e.g. when + // unofficial-as-ingested is toggled on). The layer system has no built-in + // teardown for layers that drop out of the array, so the previous render's + // X-shape points / dashed rooflines would otherwise stick around. + useEffect(() => { + if (overlayData) return; + const svg = chartRef.current?.getSvgElement?.(); + if (!svg) return; + const root = d3.select(svg); + root.selectAll('.unofficial-overlay-pt').remove(); + root.selectAll('.overlay-roofline-path').remove(); + }, [overlayData]); + // Dismiss when pinned point's hardware becomes hidden useEffect(() => { const pp = chartRef.current?.getPinnedPoint() as InferenceData | null; @@ -2122,6 +2149,19 @@ const ScatterGraph = React.memo( track('latency_legend_expanded', { expanded }); }} switches={[ + ...(isUnofficialRun + ? [ + { + id: 'scatter-uoff-as-ingested', + label: 'Show as ingested', + checked: mergeAsIngested, + onCheckedChange: (checked: boolean) => { + setMergeAsIngested(checked); + track('latency_unofficial_as_ingested_toggled', { enabled: checked }); + }, + }, + ] + : []), ...(selectedYAxisMetric === 'y_inputTputPerGpu' ? [] : [ diff --git a/packages/app/src/components/ui/chart-selectors.tsx b/packages/app/src/components/ui/chart-selectors.tsx index a9e087b2..be9fc910 100644 --- a/packages/app/src/components/ui/chart-selectors.tsx +++ b/packages/app/src/components/ui/chart-selectors.tsx @@ -132,17 +132,9 @@ interface SequenceSelectorProps { 'data-testid'?: string; } -export function SequenceSelector({ - id = 'sequence-select', - value, - onChange, - open, - onOpenChange, - availableSequences, - 'data-testid': testId, -}: SequenceSelectorProps) { +function buildSequenceSections(availableSequences: string[]) { const groups = groupByCategory(availableSequences, (s) => getSequenceCategory(s as Sequence)); - const sections = [ + return [ { id: 'default', options: groups.default.map((seq) => ({ @@ -165,6 +157,18 @@ export function SequenceSelector({ ] : []), ]; +} + +export function SequenceSelector({ + id = 'sequence-select', + value, + onChange, + open, + onOpenChange, + availableSequences, + 'data-testid': testId, +}: SequenceSelectorProps) { + const sections = buildSequenceSections(availableSequences); return (
@@ -200,6 +204,69 @@ export function SequenceSelector({ ); } +interface MultiSequenceSelectorProps { + id?: string; + /** Selected sequences, primary-first. minSelections=1 is enforced. */ + value: string[]; + onChange: (value: Sequence[]) => void; + open?: boolean; + onOpenChange?: (open: boolean) => void; + availableSequences: string[]; + maxSelections?: number; + 'data-testid'?: string; +} + +/** + * Sequence picker that allows multiple ISL/OSL selections so the inference + * chart can overlay e.g. 1K/1K and 8K/1K as separate series on the same + * axes. The first selected value is treated as the "primary" sequence by + * the data pipeline; additional picks become `extraSequences` and each + * (hw, sequence) row gets a synth hwKey so it lands in its own legend line. + */ +export function MultiSequenceSelector({ + id = 'sequence-multiselect', + value, + onChange, + open, + onOpenChange, + availableSequences, + maxSelections = 3, + 'data-testid': testId, +}: MultiSequenceSelectorProps) { + const sections = buildSequenceSections(availableSequences); + + return ( +
+ +
+ { + if (values.length === 0) return; + track('selector_sequence_changed', { sequence: values.join(',') }); + onChange(values as Sequence[]); + }} + open={open} + onOpenChange={onOpenChange} + triggerId={id} + triggerTestId={testId} + placeholder="ISL / OSL" + minSelections={1} + maxSelections={maxSelections} + showClearAll={false} + searchable={false} + showSelectionSummary={false} + /> +
+
+ ); +} + interface PrecisionSelectorProps { id?: string; value: string[]; diff --git a/packages/app/src/components/unofficial-run-provider.test.ts b/packages/app/src/components/unofficial-run-provider.test.ts index 1863060d..aeac06de 100644 --- a/packages/app/src/components/unofficial-run-provider.test.ts +++ b/packages/app/src/components/unofficial-run-provider.test.ts @@ -163,6 +163,12 @@ describe('buildChartData', () => { expect(Object.keys(result)).toEqual(['DeepSeek-R1-0528_8k/1k']); }); + it('maps the offline TRT 8k/625 sequence correctly', () => { + const rows = [stubRow({ model: 'dsv4', isl: 8192, osl: 625 })]; + const result = buildChartData(rows); + expect(Object.keys(result)).toEqual(['DeepSeek-V4-Pro_8k/625']); + }); + it('skips rows with unmapped ISL/OSL', () => { const rows = [stubRow({ model: 'dsr1', isl: 4096, osl: 4096 })]; const result = buildChartData(rows); diff --git a/packages/app/src/components/unofficial-run-provider.tsx b/packages/app/src/components/unofficial-run-provider.tsx index 310a4d1a..01f58e96 100644 --- a/packages/app/src/components/unofficial-run-provider.tsx +++ b/packages/app/src/components/unofficial-run-provider.tsx @@ -20,6 +20,7 @@ import { normalizeEvalHardwareKey } from '@/lib/chart-utils'; import chartDefinitions from '@/components/inference/inference-chart-config.json'; import { transformBenchmarkRows } from '@/lib/benchmark-transform'; import { Model, Sequence } from '@/lib/data-mappings'; +import { readUrlParams } from '@/lib/url-state'; interface UnofficialRunInfo { id: number; @@ -51,6 +52,15 @@ export interface AvailableModelSequence { export interface UnofficialRunContextType { isUnofficialRun: boolean; + /** + * When true, unofficial-run rows are promoted to first-class series in the + * inference scatter — each (run, GPU config) pair becomes its own legend + * entry with the run's branch name, and the rows participate in the same + * filter pipeline as ingested data (Optimal-only, hardware toggles, etc.) + * instead of rendering as a separate X-shape overlay. + */ + mergeAsIngested: boolean; + setMergeAsIngested: (v: boolean) => void; /** First run in the loaded set — kept as a convenience alias for overlay labels. */ unofficialRunInfo: UnofficialRunInfo | null; /** All runs loaded from the `unofficialrun(s)` URL param (comma-separated). */ @@ -176,6 +186,34 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { AvailableModelSequence[] >([]); + // Promote unofficial rows to ingested-style series. Initial value seeded + // from the URL snapshot in `url-state.ts` (which captures share-link params + // at module load BEFORE its deferred cleanup strips them) so a share link + // like `?unofficialrun=…&i_uoff_ingested=1` starts checked. Reading + // `window.location.search` here would race against that cleanup and lose + // the value. Under SSR the value is false; we re-sync after mount and on + // popstate via the listener attached below. + const [mergeAsIngested, setMergeAsIngestedRaw] = useState(() => { + if (typeof window === 'undefined') return false; + return readUrlParams().i_uoff_ingested === '1'; + }); + // Re-sync after hydration in case the server rendered with the SSR default. + // Source of truth is `readUrlParams()` (snapshot captured before url-state's + // deferred cleanup), not the live address bar. + useEffect(() => { + if (typeof window === 'undefined') return; + const fromUrl = readUrlParams().i_uoff_ingested === '1'; + setMergeAsIngestedRaw((prev) => (prev === fromUrl ? prev : fromUrl)); + }, []); + const setMergeAsIngested = useCallback((v: boolean) => { + setMergeAsIngestedRaw(v); + if (typeof window === 'undefined') return; + const url = new URL(window.location.href); + if (v) url.searchParams.set('i_uoff_ingested', '1'); + else url.searchParams.delete('i_uoff_ingested'); + window.history.replaceState({}, '', url); + }, []); + // --- Shared overlay toggle state (unified across both charts) --- const [activeOverlayHwTypes, setActiveOverlayHwTypes] = useState>(new Set()); const [localOfficialOverride, setLocalOfficialOverrideRaw] = useState | null>(null); @@ -345,8 +383,19 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { ); useEffect(() => { - const load = () => { + const load = (isPopstate: boolean) => { const params = new URLSearchParams(window.location.search); + // On popstate the browser restored the previous history entry, so the + // URL is the source of truth for the merge toggle. Don't re-sync on + // the initial mount call — by then `url-state.ts`'s deferred cleanup + // may have stripped `i_uoff_ingested` from the address bar, and the + // seeded `useState` already reflects the snapshot. (`setMergeAsIngested` + // writes `i_uoff_ingested=1` via `replaceState` when the user toggles, + // so back/forward replays it.) + if (isPopstate) { + setMergeAsIngestedRaw(params.get('i_uoff_ingested') === '1'); + } + let unofficialRunIdParam: string | undefined; for (const [key, value] of params) { if (UNOFFICIAL_RUN_PARAM_RE.test(key) && value) { @@ -389,15 +438,18 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { .finally(() => setLoading(false)); }; - load(); - window.addEventListener('popstate', load); - return () => window.removeEventListener('popstate', load); + load(false); + const onPopstate = () => load(true); + window.addEventListener('popstate', onPopstate); + return () => window.removeEventListener('popstate', onPopstate); }, []); return ( 0, + mergeAsIngested, + setMergeAsIngested, unofficialRunInfo, unofficialRunInfos, runIndexByUrl, diff --git a/packages/app/src/lib/chart-utils.ts b/packages/app/src/lib/chart-utils.ts index 9bf83032..dbad963d 100644 --- a/packages/app/src/lib/chart-utils.ts +++ b/packages/app/src/lib/chart-utils.ts @@ -20,16 +20,19 @@ import { getVendor, type Vendor } from '@/lib/dynamic-colors'; * In Lab space: 0° = red, 90° = yellow, 180° = green, 270° = blue. * NVIDIA must not be red/rose/pink (wraps around 0°: 320–40°). * AMD must not be green (roughly 120–195°). + * Huawei must not be red (AMD zone) or green (NVIDIA zone). */ const BANNED_HUE_TEST: Record boolean) | null> = { nvidia: (hue) => hue >= 320 || hue <= 40, // red/rose/pink zone amd: (hue) => hue >= 120 && hue <= 195, // green zone + huawei: (hue) => hue >= 320 || hue <= 40 || (hue >= 120 && hue <= 195), // avoid red + green unknown: null, }; /** * Preferred hue ranges (CIELab) — used when a vendor has few items so they - * cluster in the brand-appropriate zone. NVIDIA = greens, AMD = reds/oranges. + * cluster in the brand-appropriate zone. NVIDIA = greens, AMD = reds/oranges, + * Huawei = amber/yellow. */ const PREFERRED_ZONE: Record< Vendor, @@ -37,6 +40,7 @@ const PREFERRED_ZONE: Record< > = { nvidia: { hmin: 100, hmax: 195 }, // greens/teals amd: { hmin: 20, hmax: 50, cmin: 70, lmin: 50 }, // vivid reds/oranges + huawei: { hmin: 50, hmax: 95, cmin: 60 }, // amber/yellow unknown: null, }; diff --git a/packages/app/src/lib/data-mappings.test.ts b/packages/app/src/lib/data-mappings.test.ts index 7edd4278..34a1960f 100644 --- a/packages/app/src/lib/data-mappings.test.ts +++ b/packages/app/src/lib/data-mappings.test.ts @@ -124,6 +124,24 @@ describe('getModelAndSequenceFromArtifact', () => { expect(result).toEqual({ model: Model.Kimi_K2_5, sequence: Sequence.EightK_OneK }); }); + it('parses structured artifact with dsv4 prefix and 8k/256 ISL/OSL', () => { + const result = getModelAndSequenceFromArtifact({ + infmax_model_prefix: 'dsv4', + isl: 8192, + osl: 256, + }); + expect(result).toEqual({ model: Model.DeepSeek_V4_Pro, sequence: Sequence.EightK_256 }); + }); + + it('parses structured artifact with dsv4 prefix and 8k/625 ISL/OSL', () => { + const result = getModelAndSequenceFromArtifact({ + infmax_model_prefix: 'dsv4', + isl: 8192, + osl: 625, + }); + expect(result).toEqual({ model: Model.DeepSeek_V4_Pro, sequence: Sequence.EightK_625 }); + }); + it('returns undefined for unknown model prefix', () => { const result = getModelAndSequenceFromArtifact({ infmax_model_prefix: 'unknown', @@ -207,6 +225,8 @@ describe('getSequenceLabel', () => { expect(getSequenceLabel(Sequence.OneK_OneK)).toBe('1K / 1K'); expect(getSequenceLabel(Sequence.OneK_EightK)).toBe('1K / 8K'); expect(getSequenceLabel(Sequence.EightK_OneK)).toBe('8K / 1K'); + expect(getSequenceLabel(Sequence.EightK_256)).toBe('8K / 256'); + expect(getSequenceLabel(Sequence.EightK_625)).toBe('8K / 625'); }); it('falls back to the sequence value for unknown sequence', () => { diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts index ee832d4a..bb3d71fa 100644 --- a/packages/app/src/lib/data-mappings.ts +++ b/packages/app/src/lib/data-mappings.ts @@ -1,3 +1,5 @@ +import { islOslToSequence } from '@semianalysisai/inferencex-constants'; + import type { ExclusionSpec } from './exclusion'; export enum Model { @@ -170,6 +172,8 @@ export enum Sequence { OneK_OneK = '1k/1k', OneK_EightK = '1k/8k', EightK_OneK = '8k/1k', + EightK_256 = '8k/256', + EightK_625 = '8k/625', } const SEQUENCE_CONFIG: Record = @@ -177,6 +181,8 @@ const SEQUENCE_CONFIG: Record { expect(sequenceToIslOsl('8k/1k')).toEqual({ isl: 8192, osl: 1024 }); }); + it('converts 8k/256 to 8192/256', () => { + expect(sequenceToIslOsl('8k/256')).toEqual({ isl: 8192, osl: 256 }); + }); + + it('converts 8k/625 to 8192/625', () => { + expect(sequenceToIslOsl('8k/625')).toEqual({ isl: 8192, osl: 625 }); + }); + it('returns null for unknown sequences', () => { expect(sequenceToIslOsl('4k/4k')).toBeNull(); expect(sequenceToIslOsl('')).toBeNull(); @@ -67,13 +75,21 @@ describe('islOslToSequence', () => { expect(islOslToSequence(8192, 1024)).toBe('8k/1k'); }); + it('converts 8192/256 to 8k/256', () => { + expect(islOslToSequence(8192, 256)).toBe('8k/256'); + }); + + it('converts 8192/625 to 8k/625', () => { + expect(islOslToSequence(8192, 625)).toBe('8k/625'); + }); + it('returns null for unknown ISL/OSL combos', () => { expect(islOslToSequence(4096, 4096)).toBeNull(); expect(islOslToSequence(0, 0)).toBeNull(); }); it('round-trips with sequenceToIslOsl', () => { - for (const seq of ['1k/1k', '1k/8k', '8k/1k']) { + for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256', '8k/625']) { const islOsl = sequenceToIslOsl(seq)!; expect(islOslToSequence(islOsl.isl, islOsl.osl)).toBe(seq); } diff --git a/packages/app/src/lib/sequence-synth-key.test.ts b/packages/app/src/lib/sequence-synth-key.test.ts new file mode 100644 index 00000000..39a8eeaf --- /dev/null +++ b/packages/app/src/lib/sequence-synth-key.test.ts @@ -0,0 +1,119 @@ +import { describe, expect, it } from 'vitest'; + +import { Sequence } from '@/lib/data-mappings'; +import { + isSeqSynthKey, + makeSeqSynthHardwareEntry, + makeSeqSynthKey, + makeSequenceFilter, + parseSeqSynthKey, + sequenceCompact, + stripSeqSuffix, +} from '@/lib/sequence-synth-key'; + +describe('sequence-synth-key', () => { + describe('sequenceCompact', () => { + it('maps known sequences to compact form', () => { + expect(sequenceCompact(Sequence.OneK_OneK)).toBe('1k1k'); + expect(sequenceCompact(Sequence.OneK_EightK)).toBe('1k8k'); + expect(sequenceCompact(Sequence.EightK_OneK)).toBe('8k1k'); + expect(sequenceCompact(Sequence.EightK_256)).toBe('8k256'); + expect(sequenceCompact(Sequence.EightK_625)).toBe('8k625'); + }); + }); + + describe('makeSeqSynthKey / parseSeqSynthKey', () => { + it('round-trips a base hwKey through compact form', () => { + const key = makeSeqSynthKey('b200_vllm', Sequence.OneK_OneK); + expect(key).toBe('b200_vllm__seq1k1k'); + expect(parseSeqSynthKey(key)).toEqual({ + origHwKey: 'b200_vllm', + sequence: Sequence.OneK_OneK, + }); + }); + + it('preserves the base GPU prefix so vendor-color helpers keep working', () => { + // getModelSortIndex / isKnownGpu split on '_' and read [0]; this must + // still be the canonical base for any synth key shape we produce. + const key = makeSeqSynthKey('gb300_dynamo-trt_mtp', Sequence.EightK_OneK); + expect(key.split('_')[0]).toBe('gb300'); + }); + + it('returns null when the suffix is absent', () => { + expect(parseSeqSynthKey('b200_vllm')).toBeNull(); + }); + + it('returns null when the compact form is unknown', () => { + // A made-up suffix shouldn't be silently accepted — better to fall back + // to "no parse" so callers can keep treating the key as opaque. + expect(parseSeqSynthKey('b200_vllm__seqbogus')).toBeNull(); + }); + + it('still parses when the seq suffix is followed by a __uorun chain', () => { + // Composing with the unofficial-merge `__uorun` shape is supported. + // parseSeqSynthKey strips the trailing chain before resolving the + // compact form. + const composed = `${makeSeqSynthKey('b200_vllm', Sequence.OneK_OneK)}__uorun123`; + expect(parseSeqSynthKey(composed)).toEqual({ + origHwKey: 'b200_vllm', + sequence: Sequence.OneK_OneK, + }); + }); + }); + + describe('isSeqSynthKey / stripSeqSuffix', () => { + it('isSeqSynthKey reports presence of the delimiter', () => { + expect(isSeqSynthKey('b200_vllm')).toBe(false); + expect(isSeqSynthKey('b200_vllm__seq1k1k')).toBe(true); + }); + + it('stripSeqSuffix is a no-op when absent', () => { + expect(stripSeqSuffix('b200_vllm')).toBe('b200_vllm'); + }); + + it('stripSeqSuffix removes the seq tail (and anything chained after)', () => { + expect(stripSeqSuffix('b200_vllm__seq1k1k')).toBe('b200_vllm'); + expect(stripSeqSuffix('b200_vllm__seq1k1k__uorun42')).toBe('b200_vllm'); + }); + }); + + describe('makeSeqSynthHardwareEntry', () => { + it('appends the sequence label so the legend can distinguish lines', () => { + const entry = makeSeqSynthHardwareEntry( + { + name: 'b200-vllm', + label: 'B200', + suffix: '(vLLM)', + gpu: "NVIDIA 'Blackwell' B200 vLLM", + framework: 'vllm', + }, + 'b200_vllm', + Sequence.OneK_OneK, + 'b200_vllm__seq1k1k', + ); + expect(entry.label).toBe('B200 — 1K / 1K'); + // suffix and framework are preserved so the legend line still renders + // its parens-tagged framework label. + expect(entry.suffix).toBe('(vLLM)'); + expect(entry.framework).toBe('vllm'); + // gpu tooltip carries the sequence too so a hover reveals which seq + // the line came from. + expect(entry.gpu).toContain('1K / 1K'); + }); + + it('falls back to the orig hwKey when the entry is missing', () => { + const entry = makeSeqSynthHardwareEntry(undefined, 'b200_vllm', Sequence.OneK_OneK, 'syn'); + expect(entry.label).toBe('b200_vllm — 1K / 1K'); + }); + }); + + describe('makeSequenceFilter', () => { + it('returns a predicate matching exact (isl, osl)', () => { + const filter = makeSequenceFilter(Sequence.EightK_OneK); + expect(filter).not.toBeNull(); + expect(filter!({ isl: 8192, osl: 1024 })).toBe(true); + expect(filter!({ isl: 8192, osl: 256 })).toBe(false); + expect(filter!({ isl: 1024, osl: 1024 })).toBe(false); + }); + }); +}); diff --git a/packages/app/src/lib/sequence-synth-key.ts b/packages/app/src/lib/sequence-synth-key.ts new file mode 100644 index 00000000..482168df --- /dev/null +++ b/packages/app/src/lib/sequence-synth-key.ts @@ -0,0 +1,101 @@ +/** + * Helpers for promoting per-sequence benchmark rows to first-class + * "ingested-style" series so a user can compare e.g. 1K/1K vs 8K/1K on the + * same scatter chart instead of having to flip between them with the picker. + * + * Each (origHwKey, sequence) pair becomes a synth hwKey of the form + * `${origHwKey}__seq` + * — preserving `hwKey.split('_')[0]` (the base GPU) so `getModelSortIndex`, + * `isKnownGpu`, and the vendor-color generator keep working. The `__seq` + * delimiter is also distinct from the `__uorun` delimiter used by + * unofficial-merge so the two can compose (`base__seq1k1k__uorun123`). + */ +import { sequenceToIslOsl } from '@semianalysisai/inferencex-constants'; + +import type { HardwareEntry } from '@/lib/constants'; +import { Sequence, getSequenceLabel } from '@/lib/data-mappings'; + +const SEQ_SYNTH_DELIM = '__seq'; + +const SEQUENCE_COMPACT: Record = { + [Sequence.OneK_OneK]: '1k1k', + [Sequence.OneK_EightK]: '1k8k', + [Sequence.EightK_OneK]: '8k1k', + [Sequence.EightK_256]: '8k256', + [Sequence.EightK_625]: '8k625', +}; + +const COMPACT_TO_SEQUENCE: Record = Object.fromEntries( + (Object.entries(SEQUENCE_COMPACT) as [Sequence, string][]).map(([s, c]) => [c, s]), +); + +/** Compact form for use as a URL/hwKey suffix (e.g. `1k1k`). */ +export function sequenceCompact(seq: Sequence): string { + return SEQUENCE_COMPACT[seq] ?? String(seq).replace('/', ''); +} + +/** Build a (hw, sequence) synth hwKey while keeping the original GPU base prefix. */ +export function makeSeqSynthKey(origHwKey: string, seq: Sequence): string { + return `${origHwKey}${SEQ_SYNTH_DELIM}${sequenceCompact(seq)}`; +} + +/** Reverse {@link makeSeqSynthKey}; returns null when the key has no sequence suffix. */ +export function parseSeqSynthKey(hwKey: string): { origHwKey: string; sequence: Sequence } | null { + const idx = hwKey.indexOf(SEQ_SYNTH_DELIM); + if (idx === -1) return null; + const origHwKey = hwKey.slice(0, idx); + // A trailing `__uorun` may follow the sequence compact form — strip it. + const rest = hwKey.slice(idx + SEQ_SYNTH_DELIM.length); + const compact = rest.split('__')[0]; + const sequence = COMPACT_TO_SEQUENCE[compact]; + if (!sequence) return null; + return { origHwKey, sequence }; +} + +export function isSeqSynthKey(hwKey: string): boolean { + return hwKey.includes(SEQ_SYNTH_DELIM); +} + +/** + * Strip a `__seq` suffix from a hwKey, returning the original key. + * No-op if the suffix is absent. Used by color resolution / sort helpers that + * already operate on the base hwKey via `split('_')[0]` but also want the + * fully-qualified original (e.g. for matching the official `hardwareConfig`). + */ +export function stripSeqSuffix(hwKey: string): string { + const idx = hwKey.indexOf(SEQ_SYNTH_DELIM); + if (idx === -1) return hwKey; + return hwKey.slice(0, idx); +} + +/** + * Build a synthesized HardwareEntry whose label is appended with the + * sequence label (e.g. "B200 — 1K/1K"). The base entry's other fields are + * preserved so downstream code (legend swatches, tooltip GPU string, etc.) + * keeps working. + */ +export function makeSeqSynthHardwareEntry( + origEntry: HardwareEntry | undefined, + origHwKey: string, + seq: Sequence, + synthHwKey: string, +): HardwareEntry { + const baseLabel = origEntry?.label ?? origHwKey; + const seqLabel = getSequenceLabel(seq); + return { + name: synthHwKey.replaceAll('_', '-'), + label: `${baseLabel} — ${seqLabel}`, + suffix: origEntry?.suffix ?? '', + gpu: origEntry?.gpu ? `${origEntry.gpu} [${seqLabel}]` : `[${seqLabel}]`, + framework: origEntry?.framework, + }; +} + +/** Build a stable ISL/OSL filter predicate for one sequence. */ +export function makeSequenceFilter( + seq: Sequence, +): ((r: { isl: number; osl: number }) => boolean) | null { + const islOsl = sequenceToIslOsl(seq); + if (!islOsl) return null; + return (r) => r.isl === islOsl.isl && r.osl === islOsl.osl; +} diff --git a/packages/app/src/lib/unofficial-merge.test.ts b/packages/app/src/lib/unofficial-merge.test.ts new file mode 100644 index 00000000..38a64db9 --- /dev/null +++ b/packages/app/src/lib/unofficial-merge.test.ts @@ -0,0 +1,456 @@ +import { describe, expect, it } from 'vitest'; + +import type { + ChartDefinition, + HardwareConfig, + InferenceData, + RenderableGraph, +} from '@/components/inference/types'; +import { generateVendorColors, getVendor } from '@/lib/dynamic-colors'; + +import { + isSynthHwKey, + makeSynthHwKey, + mergeUnofficialIntoOfficial, + parseSynthHwKey, + type UnofficialChartDataMap, +} from './unofficial-merge'; + +const E2E_DEF: ChartDefinition = { + chartType: 'e2e', + x: 'median_e2el', + y: 'tput_per_gpu', + x_label: 'End-to-end Latency (s)', + y_label: 'Throughput per GPU (tok/s/GPU)', + heading: 'Throughput vs Latency', + y_tpPerGpu_label: 'Throughput per GPU (tok/s/GPU)', +} as unknown as ChartDefinition; + +const INTERACTIVITY_DEF: ChartDefinition = { + chartType: 'interactivity', + x: 'median_intvty', + y: 'tput_per_gpu', + x_label: 'Interactivity (tok/s/user)', + y_label: 'Throughput per GPU (tok/s/GPU)', + heading: 'Throughput vs Interactivity', + y_tpPerGpu_label: 'Throughput per GPU (tok/s/GPU)', +} as unknown as ChartDefinition; + +const CHART_DEFS: ChartDefinition[] = [E2E_DEF, INTERACTIVITY_DEF]; + +function makeOverlayPoint(overrides: Partial = {}): InferenceData { + return { + hwKey: 'h100_vllm', + precision: 'fp8', + tp: 8, + conc: 64, + x: 0, + y: 0, + median_e2el: 2.3, + median_intvty: 12.5, + p99_ttft: 0.35, + median_ttft: 0.15, + tpPerGpu: { y: 450.5, roof: false }, + date: '2026-04-01', + run_url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/100', + ...overrides, + } as InferenceData; +} + +function makeOverlayChartData(): UnofficialChartDataMap { + const e2eData = [ + makeOverlayPoint({ conc: 32 }), + makeOverlayPoint({ + hwKey: 'a100_sglang', + conc: 64, + tpPerGpu: { y: 200.1, roof: false }, + }), + ]; + const interactivityData = [ + makeOverlayPoint({ conc: 32 }), + makeOverlayPoint({ + hwKey: 'a100_sglang', + conc: 64, + tpPerGpu: { y: 200.1, roof: false }, + }), + ]; + const gpus: HardwareConfig = { + h100_vllm: { name: 'h100_vllm', label: 'H100', suffix: '(VLLM)', gpu: 'NVIDIA H100' }, + a100_sglang: { name: 'a100_sglang', label: 'A100', suffix: '(SGLANG)', gpu: 'NVIDIA A100' }, + }; + return { + 'DeepSeek-R1-0528_1k/1k': { + e2e: { data: e2eData, gpus }, + interactivity: { data: interactivityData, gpus }, + }, + }; +} + +function emptyOfficial(): { graphs: RenderableGraph[]; hardwareConfig: HardwareConfig } { + return { + graphs: [ + { model: 'DeepSeek-R1-0528', sequence: '1k/1k', chartDefinition: E2E_DEF, data: [] }, + { + model: 'DeepSeek-R1-0528', + sequence: '1k/1k', + chartDefinition: INTERACTIVITY_DEF, + data: [], + }, + ], + hardwareConfig: {}, + }; +} + +const RUN_INDEX = { + 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/100': 0, + '100': 0, + 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/200': 1, + '200': 1, +}; + +const RUN_INFOS = [ + { + id: 100, + branch: 'feature-branch-a', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/100', + }, + { + id: 200, + branch: 'feature-branch-b', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/200', + }, +]; + +describe('synth hwKey helpers', () => { + it('encodes runId into hwKey while preserving the GPU base prefix', () => { + const synth = makeSynthHwKey('h100_vllm', 100); + expect(synth).toBe('h100_vllm__uorun100'); + // Critical: the base GPU is still recoverable via split('_')[0] so + // getModelSortIndex / isKnownGpu keep working. + expect(synth.split('_')[0]).toBe('h100'); + }); + + it('round-trips through parseSynthHwKey', () => { + const synth = makeSynthHwKey('a100_sglang', 200); + expect(parseSynthHwKey(synth)).toEqual({ origHwKey: 'a100_sglang', runId: 200 }); + }); + + it('parseSynthHwKey returns null for non-synth keys', () => { + expect(parseSynthHwKey('h100_vllm')).toBeNull(); + expect(parseSynthHwKey('mi300x')).toBeNull(); + }); + + it('isSynthHwKey detects synthesized keys', () => { + expect(isSynthHwKey(makeSynthHwKey('h100', 100))).toBe(true); + expect(isSynthHwKey('h100_vllm')).toBe(false); + }); +}); + +describe('mergeUnofficialIntoOfficial', () => { + it('is a no-op when unofficialChartData is null', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: null, + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: {}, + unofficialRunInfos: [], + }); + expect(result.graphs).toBe(graphs); + expect(result.hardwareConfig).toBe(hardwareConfig); + expect(result.colorOverrides).toEqual({}); + }); + + it('is a no-op when no overlay group matches the selected (model, sequence)', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'gpt-oss-120b', // not present in overlay map + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + expect(result.graphs).toBe(graphs); + expect(result.colorOverrides).toEqual({}); + }); + + it('rewrites overlay rows with synth hwKeys and adds matching hardwareConfig (no color override)', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + // Each chart graph received both overlay rows (different GPUs, both run 100). + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + expect(e2eGraph.data).toHaveLength(2); + const synthKeys = e2eGraph.data.map((d) => d.hwKey); + expect(synthKeys).toContain('h100_vllm__uorun100'); + expect(synthKeys).toContain('a100_sglang__uorun100'); + + // The synth keys are present in hardwareConfig with bare GPU labels — the + // branch is intentionally NOT in the legend label (the run is still + // recoverable from `gpu` for the row tooltip). + const h100Synth = result.hardwareConfig['h100_vllm__uorun100']; + expect(h100Synth.label).toBe('H100'); + expect(h100Synth.label).not.toContain('feature-branch-a'); + expect(h100Synth.gpu).toContain('UNOFFICIAL: feature-branch-a'); + + // No color overrides are populated — colors fall through to the + // vendor-aware system in dynamic-colors.ts so two NVIDIA GPUs from a + // single unofficial run get distinct shades of green instead of one + // shared overlay-palette color. + expect(result.colorOverrides).toEqual({}); + }); + + it('keeps multiple runs separate so each (run, GPU) becomes its own legend entry', () => { + const data = makeOverlayChartData(); + // Inject a second run's row alongside the first. + const secondRunPoint = makeOverlayPoint({ + hwKey: 'h100_vllm', + run_url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/200', + tpPerGpu: { y: 460, roof: false }, + }); + data['DeepSeek-R1-0528_1k/1k'].e2e.data.push(secondRunPoint); + data['DeepSeek-R1-0528_1k/1k'].interactivity.data.push(secondRunPoint); + + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: data, + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + // Same physical GPU (h100_vllm) appears twice — once per run — with distinct + // synth keys so they form separate roofline groups in the scatter chart. + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + const h100Keys = e2eGraph.data + .map((d) => d.hwKey) + .filter((k) => String(k).startsWith('h100_vllm__uorun')); + expect(h100Keys).toContain('h100_vllm__uorun100'); + expect(h100Keys).toContain('h100_vllm__uorun200'); + + // Both runs of the same GPU get the bare GPU label — visual disambiguation + // is done by the vendor-zone color system, which assigns distinct hues + // within the same vendor band. Provenance still surfaces via `gpu`. + expect(result.hardwareConfig['h100_vllm__uorun100'].label).toBe('H100'); + expect(result.hardwareConfig['h100_vllm__uorun200'].label).toBe('H100'); + expect(result.hardwareConfig['h100_vllm__uorun100'].gpu).toContain( + 'UNOFFICIAL: feature-branch-a', + ); + expect(result.hardwareConfig['h100_vllm__uorun200'].gpu).toContain( + 'UNOFFICIAL: feature-branch-b', + ); + expect(result.colorOverrides).toEqual({}); + }); + + it('preserves official rows alongside merged overlay rows', () => { + const { hardwareConfig } = emptyOfficial(); + const officialPoint = { + hwKey: 'b200_trt', + precision: 'fp4', + tp: 4, + conc: 8, + x: 1.5, + y: 800, + date: '2026-03-01', + } as InferenceData; + const graphs: RenderableGraph[] = [ + { + model: 'DeepSeek-R1-0528', + sequence: '1k/1k', + chartDefinition: E2E_DEF, + data: [officialPoint], + }, + { + model: 'DeepSeek-R1-0528', + sequence: '1k/1k', + chartDefinition: INTERACTIVITY_DEF, + data: [officialPoint], + }, + ]; + + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + expect(e2eGraph.data.some((d) => d.hwKey === 'b200_trt')).toBe(true); + expect(e2eGraph.data.some((d) => String(d.hwKey).startsWith('h100_vllm__uorun'))).toBe(true); + }); + + it('synthesizes stub graphs from chartDefinitions when official graphs is empty', () => { + const result = mergeUnofficialIntoOfficial({ + graphs: [], + hardwareConfig: {}, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + chartDefinitions: CHART_DEFS, + }); + + // Two stub graphs synthesized (e2e + interactivity), each carrying merged overlay rows. + expect(result.graphs).toHaveLength(2); + expect(result.graphs.every((g) => g.data.length > 0)).toBe(true); + }); + + it('merges overlay rows from multiple sequences and tags each synth hwKey with __seq', () => { + // Build an overlay map that has data for BOTH 1K/1K and 8K/1K so the + // multi-sequence path has something to fan out. + const data = makeOverlayChartData(); + const e2eData8k = [makeOverlayPoint({ conc: 64, tpPerGpu: { y: 600, roof: false } })]; + const interactivity8k = [makeOverlayPoint({ conc: 64, tpPerGpu: { y: 600, roof: false } })]; + data['DeepSeek-R1-0528_8k/1k'] = { + e2e: { + data: e2eData8k, + gpus: { + h100_vllm: { name: 'h100_vllm', label: 'H100', suffix: '(VLLM)', gpu: 'NVIDIA H100' }, + }, + }, + interactivity: { + data: interactivity8k, + gpus: { + h100_vllm: { name: 'h100_vllm', label: 'H100', suffix: '(VLLM)', gpu: 'NVIDIA H100' }, + }, + }, + }; + + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: data, + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + extraSequences: ['8k/1k'], + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + // Each chart graph receives rows from both sequences with a __seq tag + // landing BEFORE the __uorun tag so the resulting key is + // base__seq__uorun. + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + const synthKeys = e2eGraph.data.map((d) => String(d.hwKey)); + expect(synthKeys).toContain('h100_vllm__seq1k1k__uorun100'); + expect(synthKeys).toContain('h100_vllm__seq8k1k__uorun100'); + + // Hardware config carries the sequence in the label so the legend can + // tell the two H100 lines apart at a glance. + expect(result.hardwareConfig['h100_vllm__seq1k1k__uorun100'].label).toContain('1K / 1K'); + expect(result.hardwareConfig['h100_vllm__seq8k1k__uorun100'].label).toContain('8K / 1K'); + + // Base GPU prefix survives both suffixes — getModelSortIndex / + // isKnownGpu / getVendor all use split('_')[0]. + expect('h100_vllm__seq1k1k__uorun100'.split('_')[0]).toBe('h100'); + }); + + it('falls back to single-sequence behavior when extraSequences only contains the primary', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + extraSequences: ['1k/1k'], // duplicate — should dedup down to single-sequence + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + // No __seq suffix when only one sequence is effectively active. + expect(e2eGraph.data.every((d) => !String(d.hwKey).includes('__seq'))).toBe(true); + }); +}); + +// Pull a hue out of an `oklch(L C H)` string for assertions below. +function hueOf(s: string): number { + const m = s.match(/oklch\([^)]*\s+(?[\d.]+)\)/u); + return m?.groups?.hue ? Number(m.groups.hue) : NaN; +} + +describe('synth hwKey color integration with generateVendorColors', () => { + // Regression: previously, two NVIDIA GPUs from one unofficial run shared a + // single overlay-palette color (e.g. both rendered red), making B200 and + // B300 visually identical. Now the merge omits color overrides and the + // vendor-zone palette assigns each synth key its own hue within the + // vendor's band. + it('assigns distinct shades within the vendor zone to two NVIDIA GPUs from one unofficial run', () => { + const synthKeys = [makeSynthHwKey('b200_vllm', 100), makeSynthHwKey('b300_vllm', 100)]; + expect(getVendor(synthKeys[0])).toBe('nvidia'); + expect(getVendor(synthKeys[1])).toBe('nvidia'); + const colors = generateVendorColors(synthKeys, 'light'); + expect(colors[synthKeys[0]]).toBeDefined(); + expect(colors[synthKeys[1]]).toBeDefined(); + expect(colors[synthKeys[0]]).not.toBe(colors[synthKeys[1]]); + }); + + it('keeps NVIDIA synth keys inside the NVIDIA hue zone and AMD synth keys inside AMD', () => { + const nvidiaSynth = makeSynthHwKey('b200_vllm', 100); + const amdSynth = makeSynthHwKey('mi300x_sglang', 100); + const colors = generateVendorColors([nvidiaSynth, amdSynth], 'light'); + // VENDOR_OKLCH_ZONES.nvidia is 120–170 (greens/teals). + const nvidiaHue = hueOf(colors[nvidiaSynth]); + expect(nvidiaHue).toBeGreaterThanOrEqual(120); + expect(nvidiaHue).toBeLessThanOrEqual(170); + // VENDOR_OKLCH_ZONES.amd is 12–42 (reds/oranges). + const amdHue = hueOf(colors[amdSynth]); + expect(amdHue).toBeGreaterThanOrEqual(12); + expect(amdHue).toBeLessThanOrEqual(42); + }); + + it('does not pin two unofficial runs of the same GPU to one color', () => { + // Both synth keys share the `b200_vllm` base, so they fall in the same + // sort bucket — but generateVendorColors still spreads them across + // distinct hues within the NVIDIA zone. + const a = makeSynthHwKey('b200_vllm', 100); + const b = makeSynthHwKey('b200_vllm', 200); + const colors = generateVendorColors([a, b], 'light'); + expect(colors[a]).not.toBe(colors[b]); + }); +}); diff --git a/packages/app/src/lib/unofficial-merge.ts b/packages/app/src/lib/unofficial-merge.ts new file mode 100644 index 00000000..3167e0bd --- /dev/null +++ b/packages/app/src/lib/unofficial-merge.ts @@ -0,0 +1,288 @@ +/** + * Helpers for promoting unofficial-run benchmark rows to first-class + * "ingested-style" series so they participate in the regular scatter + * filter pipeline (Optimal-only, hardware toggles, precision filter, etc.) + * instead of being rendered as a separate overlay layer. + * + * Each (run, original hwKey) pair gets a synthesized hardware key of the form + * `${origHwKey}__uorun${runId}` + * — preserving the base GPU as `hwKey.split('_')[0]` so `getModelSortIndex` + * and `isKnownGpu` keep working — while still being unique per run so a single + * job with multiple GPUs surfaces as separate legend entries, and multiple + * runs don't collapse onto each other. + */ +import type { + ChartDefinition, + HardwareConfig, + InferenceData, + RenderableGraph, +} from '@/components/inference/types'; +import { processOverlayChartData } from '@/components/inference/utils'; +import type { HardwareEntry } from '@/lib/constants'; +import { overlayRunIndex } from '@/lib/overlay-run-style'; +import type { Sequence } from '@/lib/data-mappings'; +import { makeSeqSynthHardwareEntry, makeSeqSynthKey } from '@/lib/sequence-synth-key'; + +const SYNTH_KEY_DELIM = '__uorun'; + +export interface UnofficialRunInfoLite { + id: number; + branch: string; + url: string; +} + +export interface OverlayChartGroup { + e2e: { data: InferenceData[]; gpus: HardwareConfig }; + interactivity: { data: InferenceData[]; gpus: HardwareConfig }; +} + +export type UnofficialChartDataMap = Record; + +/** Build a unique per-run hwKey while keeping the original GPU base prefix. */ +export function makeSynthHwKey(origHwKey: string, runId: number): string { + return `${origHwKey}${SYNTH_KEY_DELIM}${runId}`; +} + +/** Reverse the encoding produced by {@link makeSynthHwKey}. */ +export function parseSynthHwKey(hwKey: string): { origHwKey: string; runId: number } | null { + const idx = hwKey.indexOf(SYNTH_KEY_DELIM); + if (idx === -1) return null; + const origHwKey = hwKey.slice(0, idx); + const runId = Number(hwKey.slice(idx + SYNTH_KEY_DELIM.length)); + if (!Number.isFinite(runId)) return null; + return { origHwKey, runId }; +} + +export function isSynthHwKey(hwKey: string): boolean { + return hwKey.includes(SYNTH_KEY_DELIM); +} + +function makeSynthHardwareEntry( + origEntry: HardwareEntry | undefined, + origHwKey: string, + run: UnofficialRunInfoLite, + synthHwKey: string, +): HardwareEntry { + const branch = run.branch || `run ${run.id}`; + const baseLabel = origEntry?.label ?? origHwKey; + // Legend label intentionally drops the branch — the color (assigned by the + // shared vendor-zone palette) is what disambiguates runs/GPUs visually. + // Branch + run URL stay in `gpu` so the row tooltip still shows provenance. + return { + name: synthHwKey.replaceAll('_', '-'), + label: baseLabel, + suffix: origEntry?.suffix ?? '', + gpu: origEntry?.gpu ? `${origEntry.gpu} (UNOFFICIAL: ${branch})` : `UNOFFICIAL: ${branch}`, + framework: origEntry?.framework, + }; +} + +interface MergeArgs { + graphs: RenderableGraph[]; + hardwareConfig: HardwareConfig; + /** + * Per-(model_sequence) overlay chart data, indexed exactly as produced by + * {@link unofficial-run-provider#buildChartData}. We look up the entry for + * the currently-selected `${model}_${sequence}` key. + */ + unofficialChartData: UnofficialChartDataMap | null; + selectedModel: string; + selectedSequence: string; + /** + * Additional sequences to overlay alongside `selectedSequence`. When this + * list is non-empty, the merger iterates over (primary + extras), fetching + * each sequence's overlay group separately and rewriting every synth hwKey + * with a `__seq` suffix so (run, GPU, sequence) triples land on + * distinct legend lines. + */ + extraSequences?: string[]; + selectedYAxisMetric: string; + selectedXAxisMetric: string | null; + selectedE2eXAxisMetric: string | null; + runIndexByUrl: Record; + unofficialRunInfos: UnofficialRunInfoLite[]; + /** + * Chart definitions to fall back on when `graphs` is empty. Lets the merger + * synthesize stub graphs so unofficial-only data (e.g. a model with no DB + * coverage but an unofficial sweep) still renders when the toggle is on. + * Optional — when omitted and `graphs` is empty, the merge is a no-op. + */ + chartDefinitions?: ChartDefinition[]; +} + +export interface MergeResult { + graphs: RenderableGraph[]; + hardwareConfig: HardwareConfig; + /** + * Map from synth hwKey → CSS color. ScatterGraph consults this before falling + * back to vendor colors. Currently empty — synth keys preserve the original + * GPU base prefix (`b200_vllm__uorun123`), so the standard + * `generateVendorColors` pipeline picks a vendor-appropriate hue for each + * synth key automatically. The override map is retained so callers can still + * pin a specific color per synth key if needed. + */ + colorOverrides: Record; +} + +/** + * Inject overlay rows into the official `graphs` as first-class points with + * synthesized per-run hwKeys, returning extended `hardwareConfig` and a + * color-override map for ScatterGraph's `resolveColor`. + * + * If `unofficialChartData` is null or has no rows for the selected + * (model, sequence), the result mirrors the input verbatim — the merge is a + * no-op and downstream behavior is unchanged. + */ +export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { + const { + graphs: inputGraphs, + hardwareConfig, + unofficialChartData, + selectedModel, + selectedSequence, + extraSequences = [], + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + runIndexByUrl, + unofficialRunInfos, + chartDefinitions, + } = args; + + // Iterate primary + extras. Drop duplicates and any sequence with no overlay + // group — when none of the requested sequences have overlay data the merge + // is a no-op. + const requestedSequences = (() => { + const seen = new Set(); + const out: string[] = []; + for (const s of [selectedSequence, ...extraSequences]) { + if (!s || seen.has(s)) continue; + seen.add(s); + out.push(s); + } + return out; + })(); + const sequencesWithData = requestedSequences.filter( + (s) => unofficialChartData?.[`${selectedModel}_${s}`], + ); + if (sequencesWithData.length === 0) { + return { graphs: inputGraphs, hardwareConfig, colorOverrides: {} }; + } + + const isMultiSequence = requestedSequences.length > 1; + + // When there are no official graphs but caller supplied chartDefinitions, + // synthesize empty stubs so the merge still has a place to inject points. + // (Stub uses the primary sequence label.) + const graphs: RenderableGraph[] = + inputGraphs.length === 0 && chartDefinitions + ? buildStubGraphsForMerge(selectedModel, selectedSequence, chartDefinitions) + : inputGraphs; + + const mergedHardwareConfig: HardwareConfig = { ...hardwareConfig }; + const colorOverrides: Record = {}; + + /** + * Process overlay rows for one chart type: re-key by (run, origHwKey [, seq]), + * synthesize a HardwareEntry on first encounter, and apply the same + * metric/x-axis pipeline that `useChartData` runs on official rows so the + * resulting points sit in the same coordinate space. + * + * When multiple sequences are selected, the synth hwKey is suffixed with + * `__seq` BEFORE the `__uorun` suffix so each (run, gpu, seq) + * triple lands on its own legend line. The synth label appends the sequence + * (e.g. "B200 — 1K/1K") so users can tell the lines apart. + */ + const processForChart = ( + chartType: 'e2e' | 'interactivity', + rawRows: InferenceData[], + overlayHwConfig: HardwareConfig, + seqStr: string, + ): InferenceData[] => { + if (rawRows.length === 0) return []; + const effectiveXMetric = chartType === 'e2e' ? selectedE2eXAxisMetric : selectedXAxisMetric; + const processed = processOverlayChartData( + rawRows, + chartType, + selectedYAxisMetric, + effectiveXMetric, + ); + return processed.map((row) => { + const runIdx = overlayRunIndex(row.run_url ?? null, runIndexByUrl); + const run = unofficialRunInfos[runIdx] ?? unofficialRunInfos[0]; + // No runs known (defensive — provider always populates one when overlay + // data exists). Fall back to the original hwKey untouched. + if (!run) return row; + const origHwKey = String(row.hwKey); + // When multi-sequence is on, suffix the hwKey with `__seq` so + // (gpu, sequence) splits BEFORE the per-run split applied below. The + // resulting key shape `${base}__seq__uorun` is what + // parseSynthHwKey expects (strips the trailing `__uorun`) so other + // unofficial-aware helpers continue to recover the (synth-without-run) + // key. + const seqAdjustedKey = isMultiSequence + ? makeSeqSynthKey(origHwKey, seqStr as Sequence) + : origHwKey; + const synthHwKey = makeSynthHwKey(seqAdjustedKey, run.id); + if (!(synthHwKey in mergedHardwareConfig)) { + const origEntry = hardwareConfig[origHwKey] ?? overlayHwConfig[origHwKey]; + // Build entry off of the seq-tagged label when in multi-sequence mode + // so the legend reads e.g. "B200 — 1K/1K" rather than just "B200". + const seqAdjustedEntry = isMultiSequence + ? makeSeqSynthHardwareEntry(origEntry, origHwKey, seqStr as Sequence, seqAdjustedKey) + : origEntry; + mergedHardwareConfig[synthHwKey] = makeSynthHardwareEntry( + seqAdjustedEntry, + seqAdjustedKey, + run, + synthHwKey, + ); + } + return { ...row, hwKey: synthHwKey }; + }); + }; + + const mergedGraphs: RenderableGraph[] = graphs.map((g) => { + const ct = g.chartDefinition.chartType as 'e2e' | 'interactivity'; + // Accumulate per-sequence overlay rows into the graph's data array. + let mergedData: InferenceData[] = g.data; + let appended = false; + for (const seqStr of sequencesWithData) { + const overlayGroup = unofficialChartData![`${selectedModel}_${seqStr}`]; + const overlayRows = ct === 'e2e' ? overlayGroup.e2e.data : overlayGroup.interactivity.data; + const overlayHwCfg = ct === 'e2e' ? overlayGroup.e2e.gpus : overlayGroup.interactivity.gpus; + const overlay = processForChart(ct, overlayRows, overlayHwCfg, seqStr); + if (overlay.length > 0) { + if (!appended) mergedData = [...mergedData]; + mergedData.push(...overlay); + appended = true; + } + } + if (!appended) return g; + return { ...g, data: mergedData }; + }); + + return { + graphs: mergedGraphs, + hardwareConfig: mergedHardwareConfig, + colorOverrides, + }; +} + +/** + * Build empty-data stub graphs from chart definitions, used when the official + * model has no DB data but we still want the unofficial rows to render after + * merge. Mirrors `effectiveGraphs` in ChartDisplay's no-data fallback. + */ +export function buildStubGraphsForMerge( + selectedModel: string, + selectedSequence: string, + chartDefinitions: ChartDefinition[], +): RenderableGraph[] { + return chartDefinitions.map((chartDefinition) => ({ + model: selectedModel, + sequence: selectedSequence, + chartDefinition, + data: [] as InferenceData[], + })); +} diff --git a/packages/app/src/lib/unofficial-run-auto-switch.test.ts b/packages/app/src/lib/unofficial-run-auto-switch.test.ts index f58776ad..2d5fe5dc 100644 --- a/packages/app/src/lib/unofficial-run-auto-switch.test.ts +++ b/packages/app/src/lib/unofficial-run-auto-switch.test.ts @@ -3,7 +3,11 @@ import { describe, expect, it } from 'vitest'; import type { AvailableModelSequence } from '@/components/unofficial-run-provider'; import { Model, Sequence } from '@/lib/data-mappings'; -import { computeAutoSwitchDecision } from './unofficial-run-auto-switch'; +import { + computeAutoSwitchDecision, + computeUnofficialOverrideDecision, + selectUnofficialDefaultSequence, +} from './unofficial-run-auto-switch'; function entry(model: Model, sequence: Sequence): AvailableModelSequence { return { model, sequence, precisions: [] }; @@ -112,3 +116,124 @@ describe('computeAutoSwitchDecision', () => { expect(a.nextKey).toBe(b.nextKey); }); }); + +describe('computeUnofficialOverrideDecision', () => { + it('returns no-op and resets the key when no unofficial run is loaded', () => { + expect(computeUnofficialOverrideDecision([], undefined, 'stale-key')).toEqual({ + nextKey: '', + shouldOverride: false, + }); + }); + + it('fires the override on a fresh run set when the URL does not pin the param', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const decision = computeUnofficialOverrideDecision(run, undefined, ''); + expect(decision.shouldOverride).toBe(true); + expect(decision.nextKey).toBe(Model.DeepSeek_V4_Pro); + }); + + it('respects an explicit URL pin even on a fresh run set', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const decision = computeUnofficialOverrideDecision(run, '1k/1k', ''); + expect(decision.shouldOverride).toBe(false); + // Ref must not be advanced — if the URL is later cleared we still want + // a fresh load of the same run to fire the override. + expect(decision.nextKey).toBe(''); + }); + + it('does not re-fire after the override has already been applied for this run set', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const lastKey = Model.DeepSeek_V4_Pro; + const decision = computeUnofficialOverrideDecision(run, undefined, lastKey); + expect(decision.shouldOverride).toBe(false); + expect(decision.nextKey).toBe(lastKey); + }); + + it('re-arms after the overlay set is cleared so a subsequent load can override again', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const first = computeUnofficialOverrideDecision(run, undefined, ''); + expect(first.shouldOverride).toBe(true); + + const cleared = computeUnofficialOverrideDecision([], undefined, first.nextKey); + expect(cleared).toEqual({ nextKey: '', shouldOverride: false }); + + const run2 = [entry(Model.Kimi_K2_5, Sequence.OneK_OneK)]; + const second = computeUnofficialOverrideDecision(run2, undefined, cleared.nextKey); + expect(second.shouldOverride).toBe(true); + }); + + it('ignores sequence-only deltas in the dedupe key', () => { + const oneK = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const both = [ + entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK), + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_OneK), + ]; + const first = computeUnofficialOverrideDecision(oneK, undefined, ''); + const second = computeUnofficialOverrideDecision(both, undefined, first.nextKey); + expect(first.nextKey).toBe(second.nextKey); + expect(second.shouldOverride).toBe(false); + }); + + it('produces a deterministic key across insertion orders', () => { + const orderA = [ + entry(Model.MiniMax_M2_5, Sequence.OneK_OneK), + entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK), + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + ]; + const orderB = [ + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK), + entry(Model.MiniMax_M2_5, Sequence.OneK_OneK), + ]; + const a = computeUnofficialOverrideDecision(orderA, undefined, ''); + const b = computeUnofficialOverrideDecision(orderB, undefined, ''); + expect(a.nextKey).toBe(b.nextKey); + expect(a.shouldOverride).toBe(b.shouldOverride); + }); +}); + +describe('selectUnofficialDefaultSequence', () => { + it('prefers 8k/256 when the displayed model provides it', () => { + const run = [ + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625), + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_256), + ]; + expect(selectUnofficialDefaultSequence(run, Model.DeepSeek_V4_Pro, undefined)).toBe( + Sequence.EightK_256, + ); + }); + + it('uses the run sequence when 8k/256 is unavailable', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625)]; + expect(selectUnofficialDefaultSequence(run, Model.DeepSeek_R1, undefined)).toBe( + Sequence.EightK_625, + ); + }); + + it('chooses a sequence for the current covered model', () => { + const run = [ + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625), + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + ]; + expect(selectUnofficialDefaultSequence(run, Model.Kimi_K2_5, undefined)).toBe( + Sequence.OneK_OneK, + ); + }); + + it('uses an explicitly pinned model when selecting the sequence', () => { + const run = [ + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625), + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + ]; + expect(selectUnofficialDefaultSequence(run, Model.DeepSeek_V4_Pro, Model.Kimi_K2_5)).toBe( + Sequence.OneK_OneK, + ); + }); + + it('returns null when an explicitly pinned model is absent from the run', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.EightK_625)]; + expect( + selectUnofficialDefaultSequence(run, Model.DeepSeek_V4_Pro, Model.DeepSeek_R1), + ).toBeNull(); + }); +}); diff --git a/packages/app/src/lib/unofficial-run-auto-switch.ts b/packages/app/src/lib/unofficial-run-auto-switch.ts index a4af4683..ea7becc5 100644 --- a/packages/app/src/lib/unofficial-run-auto-switch.ts +++ b/packages/app/src/lib/unofficial-run-auto-switch.ts @@ -1,5 +1,5 @@ import type { AvailableModelSequence } from '@/components/unofficial-run-provider'; -import type { Model } from '@/lib/data-mappings'; +import { Sequence, type Model } from '@/lib/data-mappings'; export interface AutoSwitchDecision { /** New value the caller should write into the dedupe ref. */ @@ -46,3 +46,72 @@ export function computeAutoSwitchDecision( } return { nextKey: key, modelToSet: sortedModels[0] }; } + +export interface UnofficialOverrideDecision { + /** New value the caller should write into the dedupe ref. */ + nextKey: string; + /** Whether the caller should apply the temporary override. */ + shouldOverride: boolean; +} + +/** + * TEMPORARY (this branch only): when an unofficial run loads, override the + * default sequence to `8K / 256` and the default y-axis metric to "Output + * Token Throughput per GPU" so the InfiniteBench-style sweeps land on a + * useful default view. Mirrors the dedupe behavior of + * {@link computeAutoSwitchDecision} so manual user changes stick once they + * are URL-synced, and a fresh run-set transition can re-arm the override. + * + * - When the overlay set is empty, the dedupe key is reset. + * - When the URL pinned the corresponding param explicitly, no override + * fires (respect intent). + * - The dedupe key is the sorted unique list of overlay models — same shape + * as the auto-switch key — so a sequence-only delta does not invalidate a + * manual user pick. + */ +export function computeUnofficialOverrideDecision( + unofficialAvailable: AvailableModelSequence[], + urlValue: string | undefined, + lastKey: string, +): UnofficialOverrideDecision { + if (unofficialAvailable.length === 0) { + return { nextKey: '', shouldOverride: false }; + } + if (urlValue) { + return { nextKey: lastKey, shouldOverride: false }; + } + const sortedModels = [...new Set(unofficialAvailable.map((a) => a.model))].toSorted(); + const key = sortedModels.join(','); + if (lastKey === key) { + return { nextKey: lastKey, shouldOverride: false }; + } + return { nextKey: key, shouldOverride: true }; +} + +/** + * Pick the sequence shown when an unofficial run loads without an `i_seq` + * URL pin. Keep this branch's 8K/256 preference when that sequence exists for + * the model that will be displayed, otherwise fall back to an actual sequence + * from the run so its points are visible. + */ +export function selectUnofficialDefaultSequence( + unofficialAvailable: AvailableModelSequence[], + selectedModel: Model, + urlModel: string | undefined, +): Sequence | null { + if (unofficialAvailable.length === 0) return null; + + const sortedModels = [...new Set(unofficialAvailable.map((entry) => entry.model))].toSorted(); + const targetModel = + urlModel ?? (sortedModels.includes(selectedModel) ? selectedModel : sortedModels[0]); + const sequences = [ + ...new Set( + unofficialAvailable + .filter((entry) => entry.model === targetModel) + .map((entry) => entry.sequence), + ), + ]; + + if (sequences.includes(Sequence.EightK_256)) return Sequence.EightK_256; + return sequences.toSorted()[0] ?? null; +} diff --git a/packages/app/src/lib/url-state.ts b/packages/app/src/lib/url-state.ts index a5ba51d7..1467b0fc 100644 --- a/packages/app/src/lib/url-state.ts +++ b/packages/app/src/lib/url-state.ts @@ -20,6 +20,7 @@ const URL_STATE_KEYS = [ 'g_runid', // Inference 'i_seq', + 'i_seq_extra', 'i_prec', 'i_metric', 'i_xmetric', @@ -37,6 +38,7 @@ const URL_STATE_KEYS = [ 'i_advlabel', 'i_gradlabel', 'i_linelabel', + 'i_uoff_ingested', 'i_speed', 'i_mc', 'i_active', @@ -64,6 +66,7 @@ export const PARAM_DEFAULTS: Record = { g_rundate: '', g_runid: '', i_seq: '8k/1k', + i_seq_extra: '', i_prec: 'fp4', i_metric: 'y_tpPerGpu', i_xmetric: 'p99_ttft', @@ -81,6 +84,7 @@ export const PARAM_DEFAULTS: Record = { i_advlabel: '', i_gradlabel: '', i_linelabel: '', + i_uoff_ingested: '', i_speed: '', i_mc: '', i_active: '', diff --git a/packages/constants/src/framework-aliases.ts b/packages/constants/src/framework-aliases.ts index 378ee0c0..06f27ec2 100644 --- a/packages/constants/src/framework-aliases.ts +++ b/packages/constants/src/framework-aliases.ts @@ -9,6 +9,7 @@ export const FW_REGISTRY: Record = { 'dynamo-sglang': { label: 'Dynamo SGLang' }, 'dynamo-trt': { label: 'Dynamo TRT' }, 'dynamo-vllm': { label: 'Dynamo vLLM' }, + 'huawei-mindie': { label: 'Huawei MindIE' }, 'mooncake-atom': { label: 'Mooncake ATOM¹' }, 'mori-sglang': { label: 'MoRI SGLang' }, sglang: { label: 'SGLang' }, diff --git a/packages/constants/src/gpu-keys.ts b/packages/constants/src/gpu-keys.ts index ec0ba96e..95590588 100644 --- a/packages/constants/src/gpu-keys.ts +++ b/packages/constants/src/gpu-keys.ts @@ -122,6 +122,17 @@ export const HW_REGISTRY: Record = { costn: 1.9, costr: 2.1, }, + '950dt': { + vendor: 'Huawei', + arch: 'Ascend', + label: 'Ascend 950DT', + sort: 9, + tdp: 9.99, + power: 9.99, + costh: 9.99, + costn: 9.99, + costr: 9.99, + }, }; /** Canonical set of GPU key strings used across all packages. */ @@ -146,7 +157,9 @@ export const GPU_VENDORS: Record = Object.fromEntries( * Layout (approximate): * 0-12 (gap) * 12-42 AMD reds/oranges - * 42-120 (gap) + * 42-60 (gap) + * 60-90 Huawei amber/yellow + * 90-120 (gap) * 120-170 NVIDIA greens * 170-275 (gap) * 275-330 unknown / fallback (purples) @@ -157,6 +170,7 @@ export const VENDOR_OKLCH_ZONES: Record< { start: number; end: number; chroma: { light: number; dark: number } } > = { amd: { start: 12, end: 42, chroma: { light: 0.18, dark: 0.22 } }, + huawei: { start: 60, end: 90, chroma: { light: 0.16, dark: 0.18 } }, nvidia: { start: 120, end: 170, chroma: { light: 0.15, dark: 0.15 } }, unknown: { start: 275, end: 330, chroma: { light: 0.14, dark: 0.16 } }, }; @@ -165,23 +179,25 @@ export const VENDOR_OKLCH_ZONES: Record< * Preferred HSL hue zones for high-contrast mode. * Each vendor gets a non-overlapping slice of the 360° hue wheel so items * from different vendors are visually distinct and vendor-appropriate - * (NVIDIA = greens, AMD = reds/oranges, unknown = blues/purples). + * (NVIDIA = greens, AMD = reds/oranges, Huawei = amber/yellow, unknown = blues/purples). * When a vendor has too many items to fit with sufficient spacing, the zone * expands symmetrically — these are preferred zones, not hard constraints. * * Layout (360° wheel): - * NVIDIA: 60–195 (135°) — greens through cyans - * AMD: 300–360 + 0–60 (120°, wraps) — magentas through oranges + * NVIDIA: 90–195 (105°) — greens through cyans + * Huawei: 30–60 (30°) — amber/yellow + * AMD: 300–360 + 0–30 (90°, wraps) — magentas through reds * unknown: 195–300 (105°) — blues/purples * * Each entry is an array of linear {start, span} segments (wrapping bands * are split into two segments). */ export const VENDOR_HSL_ZONES: Record = { - nvidia: [{ start: 60, span: 135 }], + nvidia: [{ start: 90, span: 105 }], + huawei: [{ start: 30, span: 30 }], amd: [ { start: 300, span: 60 }, - { start: 0, span: 60 }, + { start: 0, span: 30 }, ], unknown: [{ start: 195, span: 105 }], }; diff --git a/packages/constants/src/models.test.ts b/packages/constants/src/models.test.ts index 7a92c124..a2992a97 100644 --- a/packages/constants/src/models.test.ts +++ b/packages/constants/src/models.test.ts @@ -43,6 +43,14 @@ describe('sequenceToIslOsl', () => { expect(sequenceToIslOsl('8k/1k')).toEqual({ isl: 8192, osl: 1024 }); }); + it('parses 8k/256 to 8192/256', () => { + expect(sequenceToIslOsl('8k/256')).toEqual({ isl: 8192, osl: 256 }); + }); + + it('parses 8k/625 to 8192/625', () => { + expect(sequenceToIslOsl('8k/625')).toEqual({ isl: 8192, osl: 625 }); + }); + it('returns null for unknown sequences', () => { expect(sequenceToIslOsl('2k/2k')).toBeNull(); expect(sequenceToIslOsl('')).toBeNull(); @@ -59,13 +67,21 @@ describe('islOslToSequence', () => { expect(islOslToSequence(1024, 8192)).toBe('1k/8k'); }); + it('converts 8192/256 to 8k/256', () => { + expect(islOslToSequence(8192, 256)).toBe('8k/256'); + }); + + it('converts 8192/625 to 8k/625', () => { + expect(islOslToSequence(8192, 625)).toBe('8k/625'); + }); + it('returns null for unmapped ISL/OSL pairs', () => { expect(islOslToSequence(2048, 2048)).toBeNull(); expect(islOslToSequence(0, 0)).toBeNull(); }); it('round-trips with sequenceToIslOsl for all known sequences', () => { - for (const seq of ['1k/1k', '1k/8k', '8k/1k']) { + for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256', '8k/625']) { const parsed = sequenceToIslOsl(seq)!; expect(islOslToSequence(parsed.isl, parsed.osl)).toBe(seq); } diff --git a/packages/constants/src/models.ts b/packages/constants/src/models.ts index 06dfa09b..b92a9c15 100644 --- a/packages/constants/src/models.ts +++ b/packages/constants/src/models.ts @@ -43,6 +43,8 @@ export function sequenceToIslOsl(seq: string): { isl: number; osl: number } | nu '1k/1k': { isl: 1024, osl: 1024 }, '1k/8k': { isl: 1024, osl: 8192 }, '8k/1k': { isl: 8192, osl: 1024 }, + '8k/256': { isl: 8192, osl: 256 }, + '8k/625': { isl: 8192, osl: 625 }, }; return map[seq] ?? null; } @@ -53,6 +55,8 @@ export function islOslToSequence(isl: number, osl: number): string | null { '1024_1024': '1k/1k', '1024_8192': '1k/8k', '8192_1024': '8k/1k', + '8192_256': '8k/256', + '8192_625': '8k/625', }; return map[`${isl}_${osl}`] ?? null; }