Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/app/cypress/support/mock-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ export function createMockInferenceContext(
activePresetId: null,
setActivePresetId: namedStub('setActivePresetId'),
presetGuardRef: { current: false } as React.RefObject<boolean>,
hwColorOverrides: {},
extraSequences: [],
setExtraSequences: namedStub('setExtraSequences'),
compareGpuPair: null,
...overrides,
};
Expand Down Expand Up @@ -442,6 +445,8 @@ export function createMockUnofficialRunContext(
): UnofficialRunContextType {
return {
isUnofficialRun: false,
mergeAsIngested: false,
setMergeAsIngested: namedStub('setMergeAsIngested'),
unofficialRunInfo: null,
unofficialRunInfos: [],
runIndexByUrl: {},
Expand Down
48 changes: 48 additions & 0 deletions packages/app/src/app/api/unofficial-run/route.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,54 @@ describe('normalizeArtifactRows', () => {
expect(rows.every((r) => r.date === '2026-03-11')).toBe(true);
});

it('normalizes the offline B300 TRT compatibility row', () => {
const rows = normalizeArtifactRows(
[
rawRow({
hw: 'b300',
model: 'deepseek-ai/DeepSeek-V4-Pro',
infmax_model_prefix: 'dsv4',
framework: 'trt',
precision: 'fp4',
isl: 8192,
osl: 625,
conc: 32,
prefill_tp: 4,
prefill_ep: 1,
prefill_dp_attention: false,
prefill_num_workers: 0,
decode_tp: 4,
decode_ep: 1,
decode_dp_attention: false,
decode_num_workers: 0,
num_prefill_gpu: 4,
num_decode_gpu: 4,
spec_decoding: 'mtp',
tput_per_gpu: 489.17,
output_tput_per_gpu: 489.17,
mean_tpot: 0.01635,
}),
],
'2026-06-13',
);
expect(rows).toHaveLength(1);
expect(rows[0]).toMatchObject({
hardware: 'b300',
framework: 'trt',
model: 'dsv4',
precision: 'fp4',
spec_method: 'mtp',
isl: 8192,
osl: 625,
conc: 32,
decode_tp: 4,
decode_ep: 1,
num_decode_gpu: 4,
});
expect(rows[0].metrics.output_tput_per_gpu).toBe(489.17);
expect(rows[0].metrics.mean_tpot).toBe(0.01635);
});

it('surfaces the per-worker measured-power array on the BenchmarkRow', () => {
const workers = [
{
Expand Down
22 changes: 20 additions & 2 deletions packages/app/src/app/api/unofficial-run/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -224,15 +224,22 @@ async function processSingleRun(
const bmkArtifact = artifacts
.filter((a) => a.name === 'results_bmk')
.toSorted((a, b) => b.id - a.id)[0];
// Fallback: some workflows (e.g. the Mock-ascend uploader) emit one
// `bmk_<config>_conc<N>_<gpu>_<idx>` artifact per concurrency instead of a
// single aggregated `results_bmk`. When the canonical artifact is absent,
// gather everything matching `bmk_*` and concatenate the rows.
const perConfigBmkArtifacts = bmkArtifact
? []
: artifacts.filter((a) => a.name.startsWith('bmk_'));
const evalArtifact = artifacts
.filter((a) => a.name === 'eval_results_all')
.toSorted((a, b) => b.id - a.id)[0];

if (!bmkArtifact && !evalArtifact) {
if (!bmkArtifact && perConfigBmkArtifacts.length === 0 && !evalArtifact) {
return {
errorResponse: NextResponse.json(
{
error: `No results_bmk or eval_results_all artifact found for runId ${runId}`,
error: `No results_bmk, bmk_*, or eval_results_all artifact found for runId ${runId}`,
},
{ status: 404 },
),
Expand All @@ -253,6 +260,17 @@ async function processSingleRun(
);
if (errorResponse) return { errorResponse };
benchmarks = normalizeArtifactRows(rows, date, runUrl || null);
} else if (perConfigBmkArtifacts.length > 0) {
const allRows: Record<string, unknown>[] = [];
for (const artifact of perConfigBmkArtifacts) {
const { rows, errorResponse } = await downloadArtifactRows(
artifact.archive_download_url,
githubToken,
);
if (errorResponse) return { errorResponse };
allRows.push(...rows);
}
benchmarks = normalizeArtifactRows(allRows, date, runUrl || null);
}

if (evalArtifact) {
Expand Down
29 changes: 28 additions & 1 deletion packages/app/src/components/GlobalFilterContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ import {
Sequence,
SEQUENCE_OPTIONS,
} from '@/lib/data-mappings';
import { computeAutoSwitchDecision } from '@/lib/unofficial-run-auto-switch';
import {
computeAutoSwitchDecision,
computeUnofficialOverrideDecision,
selectUnofficialDefaultSequence,
} from '@/lib/unofficial-run-auto-switch';
import type { AvailabilityRow, WorkflowInfoResponse } from '@/lib/api';

interface RunInfo {
Expand Down Expand Up @@ -259,6 +263,29 @@ export function GlobalFilterProvider({
}
}, [unofficialAvailable, selectedModel]);

// TEMPORARY (this branch only): prefer `8K / 256` when an unofficial run
// provides it and the URL didn't pin `i_seq`. Otherwise use a sequence that
// is actually present in the run so an uncommon shape such as 8K/625 is
// visible on first load. Manual sequence picks stick because the URL gets
// `i_seq` written by the URL-sync effect after the override fires.
const lastUnofficialSeqOverrideRef = useRef<string>('');
useEffect(() => {
const decision = computeUnofficialOverrideDecision(
unofficialAvailable,
getUrlParam('i_seq'),
lastUnofficialSeqOverrideRef.current,
);
lastUnofficialSeqOverrideRef.current = decision.nextKey;
if (decision.shouldOverride) {
const sequence = selectUnofficialDefaultSequence(
unofficialAvailable,
selectedModel,
getUrlParam('g_model'),
);
if (sequence !== null) setSelectedSequence(sequence);
}
}, [unofficialAvailable, selectedModel]);

// Sequences available for the selected model (DB ∪ unofficial run for this model)
const availableSequences = useMemo(() => {
const unofficialSeqs = unofficialAvailable
Expand Down
Loading
Loading