diff --git a/.eslintignore b/.eslintignore new file mode 100644 index 00000000..513a873e --- /dev/null +++ b/.eslintignore @@ -0,0 +1,3 @@ +# Stale agent worktrees produced by parallel Claude Code sessions — they +# hold their own branches and are linted as part of their own runs. +.claude/worktrees/ diff --git a/.oxlintrc.json b/.oxlintrc.json index 3e2ccf26..6158a462 100644 --- a/.oxlintrc.json +++ b/.oxlintrc.json @@ -28,6 +28,7 @@ "no-undef": "off", "no-underscore-dangle": "off", "no-useless-undefined": "off", + "require-unicode-regexp": "off", "no-warning-comments": "off", "prefer-destructuring": "off", "sort-imports": "off", diff --git a/packages/app/cypress/e2e/url-params.cy.ts b/packages/app/cypress/e2e/url-params.cy.ts index 745964b7..d12eb82c 100644 --- a/packages/app/cypress/e2e/url-params.cy.ts +++ b/packages/app/cypress/e2e/url-params.cy.ts @@ -25,7 +25,7 @@ const assertNoHydrationMismatch = () => { cy.get('@consoleError').then((spy) => { const calls = (spy as unknown as { args: unknown[][] }).args; const hydration = calls.filter((args) => - args.some((a) => typeof a === 'string' && /hydrat(ion|ed) (mismatch|failed)/iu.test(a)), + args.some((a) => typeof a === 'string' && /hydrat(?:ion|ed) (?:mismatch|failed)/iu.test(a)), ); expect(hydration, JSON.stringify(hydration)).to.have.length(0); }); diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts index 7e300f45..152e3f98 100644 --- a/packages/app/cypress/support/mock-data.ts +++ b/packages/app/cypress/support/mock-data.ts @@ -189,10 +189,14 @@ export function createMockInferenceContext( workflowInfo: null, selectedYAxisMetric: 'y_tpPerGpu', setSelectedYAxisMetric: namedStub('setSelectedYAxisMetric'), + selectedPercentile: 'p90', + setSelectedPercentile: namedStub('setSelectedPercentile'), selectedXAxisMetric: null, setSelectedXAxisMetric: namedStub('setSelectedXAxisMetric'), selectedE2eXAxisMetric: null, setSelectedE2eXAxisMetric: namedStub('setSelectedE2eXAxisMetric'), + selectedXAxisMode: 'interactivity' as const, + setSelectedXAxisMode: namedStub('setSelectedXAxisMode'), scaleType: 'auto', setScaleType: namedStub('setScaleType'), isLegendExpanded: true, diff --git a/packages/app/src/app/(dashboard)/inference/agentic/[id]/page.tsx b/packages/app/src/app/(dashboard)/inference/agentic/[id]/page.tsx new file mode 100644 index 00000000..77f29805 --- /dev/null +++ b/packages/app/src/app/(dashboard)/inference/agentic/[id]/page.tsx @@ -0,0 +1,17 @@ +import type { Metadata } from 'next'; + +import { AgenticPointDetail } from '@/components/inference/agentic-point/agentic-point-detail'; + +export const metadata: Metadata = { + title: 'Agentic trace detail | InferenceX', + robots: { index: false }, +}; + +export default async function AgenticPointDetailPage({ + params, +}: { + params: Promise<{ id: string }>; +}) { + const { id } = await params; + return ; +} diff --git a/packages/app/src/app/api/unofficial-run/route.ts b/packages/app/src/app/api/unofficial-run/route.ts index 072c99f1..304ccb0b 100644 --- a/packages/app/src/app/api/unofficial-run/route.ts +++ b/packages/app/src/app/api/unofficial-run/route.ts @@ -33,6 +33,10 @@ export function normalizeArtifactRows( if (!params) continue; const { config } = params; results.push({ + // Synthetic id — overlay rows aren't persisted, so trace_replay lookups + // (keyed on benchmark_results.id) will always miss, which is the + // intended behaviour: overlays never have stored trace_replay blobs. + id: 0, hardware: config.hardware, framework: config.framework, model: config.model, @@ -50,6 +54,8 @@ export function normalizeArtifactRows( decode_num_workers: config.decodeNumWorkers, num_prefill_gpu: config.numPrefillGpu, num_decode_gpu: config.numDecodeGpu, + benchmark_type: params.benchmarkType, + offload_mode: params.offloadMode, isl: params.isl, osl: params.osl, conc: params.conc, diff --git a/packages/app/src/app/api/v1/agentic-aggregates/route.ts b/packages/app/src/app/api/v1/agentic-aggregates/route.ts new file mode 100644 index 00000000..63cb2dc0 --- /dev/null +++ b/packages/app/src/app/api/v1/agentic-aggregates/route.ts @@ -0,0 +1,64 @@ +import { type NextRequest, NextResponse } from 'next/server'; + +import { getDb } from '@semianalysisai/inferencex-db/connection'; +import { + getAgenticAggregates, + type AgenticAggregateMap, +} from '@semianalysisai/inferencex-db/queries/agentic-aggregates'; + +import { cachedJson, cachedQuery } from '@/lib/api-cache'; + +export const dynamic = 'force-dynamic'; + +// blobOnly: response stays small (a few numbers per id), but generating it +// parses ~5-10 MB of decompressed JSONL + JSON per id. Cache so the +// "Aggregates" toggle stays snappy. +const getCachedAgenticAggregates = cachedQuery( + (ids: number[]): Promise => getAgenticAggregates(getDb(), ids), + 'agentic-aggregates', + { blobOnly: true }, +); + +const MAX_IDS_PER_REQUEST = 200; + +/** + * GET /api/v1/agentic-aggregates?ids=1,2,3 + * + * Returns per-id mean/p50/p75/p90/p99 for ISL, OSL, KV cache utilization, + * and prefix cache hit rate — computed live from the stored aiperf + * profile_export.jsonl + server_metrics_json blobs. Ids without a + * trace_replay blob (or with no usable samples) get nulls. + */ +export async function GET(request: NextRequest) { + const raw = request.nextUrl.searchParams.get('ids'); + if (!raw) { + return NextResponse.json({ error: 'ids query param is required' }, { status: 400 }); + } + + const ids = [ + ...new Set( + raw + .split(',') + .map((s) => Number(s.trim())) + .filter((n) => Number.isFinite(n) && n > 0), + ), + ]; + if (ids.length === 0) { + return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 }); + } + if (ids.length > MAX_IDS_PER_REQUEST) { + return NextResponse.json( + { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` }, + { status: 400 }, + ); + } + + try { + const sorted = [...ids].toSorted((a, b) => a - b); + const result = await getCachedAgenticAggregates(sorted); + return cachedJson(result); + } catch (error) { + console.error('Error fetching agentic aggregates:', error); + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); + } +} diff --git a/packages/app/src/app/api/v1/benchmark-siblings/route.ts b/packages/app/src/app/api/v1/benchmark-siblings/route.ts new file mode 100644 index 00000000..14c1d461 --- /dev/null +++ b/packages/app/src/app/api/v1/benchmark-siblings/route.ts @@ -0,0 +1,38 @@ +import { type NextRequest, NextResponse } from 'next/server'; + +import { getDb } from '@semianalysisai/inferencex-db/connection'; +import { + getBenchmarkSiblings, + type BenchmarkSiblings, +} from '@semianalysisai/inferencex-db/queries/benchmark-siblings'; + +import { cachedJson, cachedQuery } from '@/lib/api-cache'; + +export const dynamic = 'force-dynamic'; + +const getCachedSiblings = cachedQuery( + (id: number): Promise => getBenchmarkSiblings(getDb(), id), + 'benchmark-siblings', +); + +/** + * GET /api/v1/benchmark-siblings?id=N + * + * Returns the SKU (hw/framework/model/precision/spec/benchmark_type) of the + * benchmark_result + all sibling rows that share that SKU within the same + * workflow_run. Used by the agentic detail page to render a navigator. + */ +export async function GET(request: NextRequest) { + const id = Number(request.nextUrl.searchParams.get('id')); + if (!id || !Number.isFinite(id)) { + return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 }); + } + try { + const data = await getCachedSiblings(id); + if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 }); + return cachedJson(data); + } catch (error) { + console.error('Error fetching benchmark siblings:', error); + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); + } +} diff --git a/packages/app/src/app/api/v1/benchmarks/route.test.ts b/packages/app/src/app/api/v1/benchmarks/route.test.ts index 780f775e..92d5f326 100644 --- a/packages/app/src/app/api/v1/benchmarks/route.test.ts +++ b/packages/app/src/app/api/v1/benchmarks/route.test.ts @@ -59,6 +59,7 @@ describe('GET /api/v1/benchmarks', () => { ['dsr1'], undefined, undefined, + undefined, ); }); @@ -72,6 +73,7 @@ describe('GET /api/v1/benchmarks', () => { ['dsr1'], '2026-03-01', undefined, + undefined, ); }); @@ -82,7 +84,27 @@ describe('GET /api/v1/benchmarks', () => { req('/api/v1/benchmarks?model=DeepSeek-R1-0528&date=2026-03-01&exact=true'), ); expect(res.status).toBe(200); - expect(mockGetLatestBenchmarks).toHaveBeenCalledWith('mock-sql', ['dsr1'], '2026-03-01', true); + expect(mockGetLatestBenchmarks).toHaveBeenCalledWith( + 'mock-sql', + ['dsr1'], + '2026-03-01', + true, + undefined, + ); + }); + + it('passes runId param to query when provided', async () => { + mockGetLatestBenchmarks.mockResolvedValueOnce([]); + + const res = await GET(req('/api/v1/benchmarks?model=DeepSeek-R1-0528&runId=26194160120')); + expect(res.status).toBe(200); + expect(mockGetLatestBenchmarks).toHaveBeenCalledWith( + 'mock-sql', + ['dsr1'], + undefined, + undefined, + '26194160120', + ); }); it('returns 500 when query throws', async () => { diff --git a/packages/app/src/app/api/v1/benchmarks/route.ts b/packages/app/src/app/api/v1/benchmarks/route.ts index c79f1aa7..c4037208 100644 --- a/packages/app/src/app/api/v1/benchmarks/route.ts +++ b/packages/app/src/app/api/v1/benchmarks/route.ts @@ -11,10 +11,10 @@ import { loadFixture } from '@/lib/test-fixtures'; export const dynamic = 'force-dynamic'; const getCachedBenchmarks = cachedQuery( - (dbModelKeys: string[], date?: string, exact?: boolean) => { + (dbModelKeys: string[], date?: string, exact?: boolean, runId?: string) => { if (JSON_MODE) return Promise.resolve(jsonProvider.getLatestBenchmarks(dbModelKeys, date, exact)); - return getLatestBenchmarks(getDb(), dbModelKeys, date, exact); + return getLatestBenchmarks(getDb(), dbModelKeys, date, exact, runId); }, 'benchmarks', { blobOnly: true }, @@ -25,6 +25,7 @@ export async function GET(request: NextRequest) { const model = params.get('model') ?? ''; const date = params.get('date') ?? undefined; const exact = params.get('exact') === 'true'; + const runId = params.get('runId') ?? undefined; const dbModelKeys = DISPLAY_MODEL_TO_DB[model]; if (!dbModelKeys || dbModelKeys.length === 0) { return NextResponse.json({ error: 'Unknown model' }, { status: 400 }); @@ -32,7 +33,7 @@ export async function GET(request: NextRequest) { if (FIXTURES_MODE) return cachedJson(loadFixture('benchmarks')); try { - const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined); + const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined, runId); return cachedJson(rows); } catch (error) { console.error('Error fetching benchmarks:', error); diff --git a/packages/app/src/app/api/v1/derived-agentic-metrics/route.ts b/packages/app/src/app/api/v1/derived-agentic-metrics/route.ts new file mode 100644 index 00000000..6ce7c017 --- /dev/null +++ b/packages/app/src/app/api/v1/derived-agentic-metrics/route.ts @@ -0,0 +1,71 @@ +import { type NextRequest, NextResponse } from 'next/server'; + +import { getDb } from '@semianalysisai/inferencex-db/connection'; +import { + getDerivedAgenticMetrics, + type DerivedAgenticMetricMap, +} from '@semianalysisai/inferencex-db/queries/derived-agentic-metrics'; + +import { cachedJson, cachedQuery } from '@/lib/api-cache'; + +export const dynamic = 'force-dynamic'; + +// blobOnly: the response is one entry per id with two numbers, but the +// derivation work parses thousands of JSONL records per blob — cache the +// computed result so a chart-refresh hits the warm path. +// Bumped to v2 when mean_p90_prefill_tps_per_user → p90_prefill_tps_per_user. +// Stale v1 cache entries return undefined for the new field and silently +// blank the chart with "No data available". +const getCachedDerivedAgenticMetrics = cachedQuery( + (ids: number[]): Promise => getDerivedAgenticMetrics(getDb(), ids), + 'derived-agentic-metrics-v2', + { blobOnly: true }, +); + +const MAX_IDS_PER_REQUEST = 200; + +/** + * GET /api/v1/derived-agentic-metrics?ids=1,2,3 + * + * Returns per-id derived metrics computed live from the stored aiperf + * profile_export.jsonl blobs: + * - normalized_session_time_s: mean across sessions of session e2e time + * (Σ per-turn request_latency) rescaled by mean_load / session_load. + * - p90_prefill_tps_per_user: P90 of per-turn prefill TPS/user (ISL / TTFT) + * across every turn in every session. + * + * Ids without a trace_replay blob or with unparseable records are omitted. + */ +export async function GET(request: NextRequest) { + const raw = request.nextUrl.searchParams.get('ids'); + if (!raw) { + return NextResponse.json({ error: 'ids query param is required' }, { status: 400 }); + } + + const ids = [ + ...new Set( + raw + .split(',') + .map((s) => Number(s.trim())) + .filter((n) => Number.isFinite(n) && n > 0), + ), + ]; + if (ids.length === 0) { + return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 }); + } + if (ids.length > MAX_IDS_PER_REQUEST) { + return NextResponse.json( + { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` }, + { status: 400 }, + ); + } + + try { + const sorted = [...ids].toSorted((a, b) => a - b); + const result = await getCachedDerivedAgenticMetrics(sorted); + return cachedJson(result); + } catch (error) { + console.error('Error fetching derived agentic metrics:', error); + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); + } +} diff --git a/packages/app/src/app/api/v1/request-timeline/route.ts b/packages/app/src/app/api/v1/request-timeline/route.ts new file mode 100644 index 00000000..6c884fb2 --- /dev/null +++ b/packages/app/src/app/api/v1/request-timeline/route.ts @@ -0,0 +1,40 @@ +import { type NextRequest, NextResponse } from 'next/server'; + +import { getDb } from '@semianalysisai/inferencex-db/connection'; +import { + getRequestTimeline, + type RequestTimeline, +} from '@semianalysisai/inferencex-db/queries/request-timeline'; + +import { cachedJson, cachedQuery } from '@/lib/api-cache'; + +export const dynamic = 'force-dynamic'; + +const getCachedRequestTimeline = cachedQuery( + (id: number): Promise => getRequestTimeline(getDb(), id), + 'request-timeline', + { blobOnly: true }, +); + +/** + * GET /api/v1/request-timeline?id=N + * + * Returns the per-request Gantt timeline for one agentic benchmark point. + * Each request entry has ns-from-start offsets for credit/start/ack/end, + * plus TTFT, ISL, OSL, conversation id, turn index, worker id. 404 if the + * point has no stored profile_export.jsonl blob. + */ +export async function GET(request: NextRequest) { + const id = Number(request.nextUrl.searchParams.get('id')); + if (!id || !Number.isFinite(id)) { + return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 }); + } + try { + const data = await getCachedRequestTimeline(id); + if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 }); + return cachedJson(data); + } catch (error) { + console.error('Error fetching request timeline:', error); + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); + } +} diff --git a/packages/app/src/app/api/v1/trace-availability/route.ts b/packages/app/src/app/api/v1/trace-availability/route.ts new file mode 100644 index 00000000..2484ceaf --- /dev/null +++ b/packages/app/src/app/api/v1/trace-availability/route.ts @@ -0,0 +1,59 @@ +import { type NextRequest, NextResponse } from 'next/server'; + +import { getDb } from '@semianalysisai/inferencex-db/connection'; +import { + getTraceAvailability, + type TraceAvailabilityMap, +} from '@semianalysisai/inferencex-db/queries/trace-availability'; + +import { cachedJson, cachedQuery } from '@/lib/api-cache'; + +export const dynamic = 'force-dynamic'; + +const getCachedTraceAvailability = cachedQuery( + (ids: number[]): Promise => getTraceAvailability(getDb(), ids), + 'trace-availability', +); + +const MAX_IDS_PER_REQUEST = 500; + +/** + * GET /api/v1/trace-availability?ids=1,2,3 + * + * Returns `{[id]: true}` for ids that have a stored trace_replay blob. + * Lightweight presence check used by the scatter tooltip to decide whether + * to render the "View charts" button — see queries/trace-availability.ts. + */ +export async function GET(request: NextRequest) { + const raw = request.nextUrl.searchParams.get('ids'); + if (!raw) { + return NextResponse.json({ error: 'ids query param is required' }, { status: 400 }); + } + + const ids = [ + ...new Set( + raw + .split(',') + .map((s) => Number(s.trim())) + .filter((n) => Number.isFinite(n) && n > 0), + ), + ]; + if (ids.length === 0) { + return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 }); + } + if (ids.length > MAX_IDS_PER_REQUEST) { + return NextResponse.json( + { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` }, + { status: 400 }, + ); + } + + try { + const sorted = [...ids].toSorted((a, b) => a - b); + const availability = await getCachedTraceAvailability(sorted); + return cachedJson(availability); + } catch (error) { + console.error('Error fetching trace availability:', error); + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); + } +} diff --git a/packages/app/src/app/api/v1/trace-histograms/route.ts b/packages/app/src/app/api/v1/trace-histograms/route.ts new file mode 100644 index 00000000..7a959a65 --- /dev/null +++ b/packages/app/src/app/api/v1/trace-histograms/route.ts @@ -0,0 +1,65 @@ +import { type NextRequest, NextResponse } from 'next/server'; + +import { getDb } from '@semianalysisai/inferencex-db/connection'; +import { + getTraceHistograms, + type TraceHistogramMap, +} from '@semianalysisai/inferencex-db/queries/trace-histograms'; + +import { cachedJson, cachedQuery } from '@/lib/api-cache'; + +export const dynamic = 'force-dynamic'; + +// blobOnly: a 50-id histogram payload can easily exceed Next.js's 2MB +// unstable_cache limit (each point carries one int per request, ~500-1000+ +// requests for agentic), which manifests as a 500 from the route. Blob +// storage lets us cache the larger response without losing the warm-cache hit. +const getCachedTraceHistograms = cachedQuery( + (ids: number[]): Promise => getTraceHistograms(getDb(), ids), + 'trace-histograms', + { blobOnly: true }, +); + +const MAX_IDS_PER_REQUEST = 200; + +/** + * GET /api/v1/trace-histograms?ids=1,2,3 + * + * Returns per-request ISL/OSL arrays parsed from the stored aiperf + * `profile_export.jsonl` blobs, keyed by `benchmark_results.id`. + * Ids without a trace_replay blob are omitted from the response. + */ +export async function GET(request: NextRequest) { + const raw = request.nextUrl.searchParams.get('ids'); + if (!raw) { + return NextResponse.json({ error: 'ids query param is required' }, { status: 400 }); + } + + const ids = [ + ...new Set( + raw + .split(',') + .map((s) => Number(s.trim())) + .filter((n) => Number.isFinite(n) && n > 0), + ), + ]; + if (ids.length === 0) { + return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 }); + } + if (ids.length > MAX_IDS_PER_REQUEST) { + return NextResponse.json( + { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` }, + { status: 400 }, + ); + } + + try { + // Sort the cache key so the same set of ids in any order hits the same entry. + const sorted = [...ids].toSorted((a, b) => a - b); + const histograms = await getCachedTraceHistograms(sorted); + return cachedJson(histograms); + } catch (error) { + console.error('Error fetching trace histograms:', error); + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); + } +} diff --git a/packages/app/src/app/api/v1/trace-server-metrics/route.ts b/packages/app/src/app/api/v1/trace-server-metrics/route.ts new file mode 100644 index 00000000..7346a3e8 --- /dev/null +++ b/packages/app/src/app/api/v1/trace-server-metrics/route.ts @@ -0,0 +1,40 @@ +import { type NextRequest, NextResponse } from 'next/server'; + +import { getDb } from '@semianalysisai/inferencex-db/connection'; +import { + getTraceServerMetrics, + type TraceServerMetrics, +} from '@semianalysisai/inferencex-db/queries/trace-server-metrics'; + +import { cachedJson, cachedQuery } from '@/lib/api-cache'; + +export const dynamic = 'force-dynamic'; + +const getCachedTraceServerMetrics = cachedQuery( + (id: number): Promise => getTraceServerMetrics(getDb(), id), + 'trace-server-metrics', + { blobOnly: true }, +); + +/** + * GET /api/v1/trace-server-metrics?id=N + * + * Returns parsed time-series for the agentic detail view: KV cache usage, + * prefix cache hit rate per interval, queue depth, and per-source prompt + * token rates. Times are in seconds from benchmark start. 404 if the point + * has no stored server_metrics_export.json blob. + */ +export async function GET(request: NextRequest) { + const id = Number(request.nextUrl.searchParams.get('id')); + if (!id || !Number.isFinite(id)) { + return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 }); + } + try { + const data = await getCachedTraceServerMetrics(id); + if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 }); + return cachedJson(data); + } catch (error) { + console.error('Error fetching trace server metrics:', error); + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); + } +} diff --git a/packages/app/src/components/GlobalFilterContext.tsx b/packages/app/src/components/GlobalFilterContext.tsx index 62ae64ff..771d680e 100644 --- a/packages/app/src/components/GlobalFilterContext.tsx +++ b/packages/app/src/components/GlobalFilterContext.tsx @@ -12,6 +12,8 @@ import { useState, } from 'react'; +import { DISPLAY_MODEL_TO_DB, rowToSequence } from '@semianalysisai/inferencex-constants'; + // useLayoutEffect warns during SSR; alias to useEffect on the server (no-op there anyway). const useIsomorphicLayoutEffect = typeof window === 'undefined' ? useEffect : useLayoutEffect; @@ -22,8 +24,6 @@ function isEnumValue>(e: T, v: string): v is T[ const RUNDATE_RE = /^\d{4}-\d{2}-\d{2}$/u; const RUNID_RE = /^[A-Za-z0-9_-]{1,64}$/u; -import { DISPLAY_MODEL_TO_DB, islOslToSequence } from '@semianalysisai/inferencex-constants'; - import { useAvailability } from '@/hooks/api/use-availability'; import { useWorkflowInfo } from '@/hooks/api/use-workflow-info'; import { useUrlState } from '@/hooks/useUrlState'; @@ -99,7 +99,9 @@ function buildRunInfo(data: WorkflowInfoResponse): Record { const runs: Record = {}; for (const run of data.runs) { const runId = String(run.github_run_id); - const runChangelogs = data.changelogs.filter((c) => c.workflow_run_id === run.github_run_id); + const runChangelogs = data.changelogs.filter( + (c) => String(c.workflow_run_id) === String(run.github_run_id), + ); runs[runId] = { runId, runDate: run.created_at, @@ -146,7 +148,11 @@ export function GlobalFilterProvider({ const [selectedSequence, setSelectedSequence] = useState(() => { if (initialSequence) return initialSequence; - return Sequence.EightK_OneK; + const urlSeq = getUrlParam('i_seq'); + if (urlSeq && Object.values(Sequence).includes(urlSeq as Sequence)) return urlSeq as Sequence; + // Prefer Agentic Traces by default when the selected model has it; the + // effectiveSequence fallback below handles models without agentic data. + return Sequence.AgenticTraces; }); const [selectedPrecisions, setSelectedPrecisionsRaw] = useState(() => { @@ -267,9 +273,7 @@ export function GlobalFilterProvider({ if (!availabilityRows) { return unofficialSeqs.length > 0 ? [...new Set(unofficialSeqs)] : SEQUENCE_OPTIONS; } - const dbSeqs = modelRows - .map((r) => islOslToSequence(r.isl, r.osl)) - .filter((s): s is Sequence => s !== null); + const dbSeqs = modelRows.map((r) => rowToSequence(r)).filter((s): s is Sequence => s !== null); const merged = [...new Set([...dbSeqs, ...unofficialSeqs])]; return merged.length > 0 ? merged : SEQUENCE_OPTIONS; }, [availabilityRows, modelRows, unofficialAvailable, selectedModel]); @@ -288,7 +292,7 @@ export function GlobalFilterProvider({ if (!availabilityRows) { return unofficialPrecs.length > 0 ? [...new Set(unofficialPrecs)].toSorted() : ['fp4']; } - const rows = modelRows.filter((r) => islOslToSequence(r.isl, r.osl) === effectiveSequence); + const rows = modelRows.filter((r) => rowToSequence(r) === effectiveSequence); const dbPrecs = rows.map((r) => r.precision); const merged = [...new Set([...dbPrecs, ...unofficialPrecs])].toSorted(); return merged.length > 0 ? merged : ['fp4']; @@ -304,7 +308,7 @@ export function GlobalFilterProvider({ // Dates available for selected model + sequence + precisions const availableDates = useMemo(() => { if (!availabilityRows) return []; - const seqRows = modelRows.filter((r) => islOslToSequence(r.isl, r.osl) === effectiveSequence); + const seqRows = modelRows.filter((r) => rowToSequence(r) === effectiveSequence); const rows = seqRows.filter((r) => effectivePrecisions.includes(r.precision)); if (rows.length === 0) { return [...new Set(seqRows.map((r) => r.date))].toSorted(); diff --git a/packages/app/src/components/evaluation/ui/BarChartD3.tsx b/packages/app/src/components/evaluation/ui/BarChartD3.tsx index 808e233d..b86aa465 100644 --- a/packages/app/src/components/evaluation/ui/BarChartD3.tsx +++ b/packages/app/src/components/evaluation/ui/BarChartD3.tsx @@ -180,7 +180,7 @@ export default function EvalBarChartD3({ caption }: { caption?: ReactNode }) { if (url) { const direct = runIndexByUrl[url]; if (direct !== undefined) return unofficialRunInfos[direct]?.branch; - const idMatch = url.match(/\/runs\/(\d+)/u); + const idMatch = url.match(/\/runs\/(?\d+)/u); if (idMatch) { const viaId = runIndexByUrl[idMatch[1]]; if (viaId !== undefined) return unofficialRunInfos[viaId]?.branch; diff --git a/packages/app/src/components/evaluation/ui/EvalSamplesDrawer.tsx b/packages/app/src/components/evaluation/ui/EvalSamplesDrawer.tsx index 8bef7b03..d28f2728 100644 --- a/packages/app/src/components/evaluation/ui/EvalSamplesDrawer.tsx +++ b/packages/app/src/components/evaluation/ui/EvalSamplesDrawer.tsx @@ -338,7 +338,7 @@ interface FilterChipProps { */ function extractRunIdFromUrl(url: string | undefined): string | null { if (!url) return null; - const m = url.match(/\/actions\/runs\/(\d+)/u); + const m = url.match(/\/actions\/runs\/(?\d+)/u); return m ? m[1] : null; } diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index fc992ee4..5d165e60 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -11,7 +11,7 @@ import { useState, } from 'react'; -import { DISPLAY_MODEL_TO_DB, islOslToSequence } from '@semianalysisai/inferencex-constants'; +import { DISPLAY_MODEL_TO_DB, rowToSequence } from '@semianalysisai/inferencex-constants'; import { track } from '@/lib/analytics'; import { FAVORITE_PRESETS, @@ -43,7 +43,7 @@ import { import { useUrlState } from '@/hooks/useUrlState'; import { buildAvailabilityHwKey } from '@/lib/chart-utils'; import { getHardwareConfig, getModelSortIndex, isKnownGpu, TABLEAU_10 } from '@/lib/constants'; -import { getModelExclusion, MODEL_PREFIX_MAPPING } from '@/lib/data-mappings'; +import { getModelExclusion, MODEL_PREFIX_MAPPING, sequenceKind } from '@/lib/data-mappings'; import { MtpEngineConflictToast, type MtpEngineConflictDetail, @@ -142,10 +142,51 @@ export function InferenceProvider({ () => getUrlParam('i_metric') || initialYAxisMetric || 'y_tpPerGpu', ); const [selectedXAxisMetric, setSelectedXAxisMetric] = useState( - () => getUrlParam('i_xmetric') || 'p99_ttft', + () => getUrlParam('i_xmetric') || 'p90_ttft', ); const [selectedE2eXAxisMetric, setSelectedE2eXAxisMetric] = useState( - () => getUrlParam('i_e2e_xmetric') || null, + () => getUrlParam('i_e2e_xmetric') || 'p90_ttft', + ); + // Selected chart variant. Initialize from URL only — SSR cannot read URL, so + // computing a kind-based default here would diverge between server and client + // and cause a hydration mismatch. The scenario-kind default is applied in a + // post-mount effect below (and a ref tracks whether the user has overridden). + type XAxisMode = 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps'; + const VALID_X_MODES: XAxisMode[] = [ + 'ttft', + 'e2e', + 'interactivity', + 'session-time', + 'prefill-tps', + ]; + // SSR has no URL access, so seed with a fixed default and apply the URL + // value (if any) in a post-mount effect — keeps server + client first render + // identical and avoids "didn't match" hydration warnings when the URL holds + // a non-default mode. + const [selectedXAxisMode, setSelectedXAxisMode] = useState('ttft'); + const xAxisModeFromUrlRef = useRef(false); + useEffect(() => { + if (xAxisModeFromUrlRef.current) return; + const v = getUrlParam('i_xmode'); + if (v && (VALID_X_MODES as string[]).includes(v)) { + xAxisModeFromUrlRef.current = true; + setSelectedXAxisMode(v as XAxisMode); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + // Wrap the setter so a button click also aligns selectedE2eXAxisMetric — the + // existing useChartData pipeline keys off that flag for the e2e chart's x-axis. + const handleSetXAxisMode = useCallback((mode: XAxisMode) => { + xAxisModeFromUrlRef.current = true; + setSelectedXAxisMode(mode); + // The e2e chart's x-axis metric is reconciled in a separate effect below, + // because it depends on sequence kind (fixed-seq has no p90_* metrics) and + // the agentic percentile, both of which can change independently. + }, []); + // Latency percentile applied to the chart x-axis for agentic scenarios. + // Values: 'p90' | 'p99'. Non-agentic charts ignore. + const [selectedPercentile, setSelectedPercentile] = useState( + () => getUrlParam('i_pctl') || 'p90', ); const [scaleType, setScaleType] = useState<'auto' | 'linear' | 'log'>( () => (getUrlParam('i_scale') as 'auto' | 'linear' | 'log') || 'auto', @@ -202,6 +243,60 @@ export function InferenceProvider({ // ── Data fetching (gated by isActive) ────────────────────────────────────── const latestDate = availableDates.length > 0 ? availableDates.at(-1) : undefined; + // Run-selector scoping: only constrain benchmark data to a specific run when + // there's actually a disambiguation to make for the CURRENT model. The + // raw `availableRuns` is across ALL models on the date, so the picker may + // auto-select a run that produced nothing for the current model — passing + // that runId would return zero rows and hide the chart entirely. + // Compute the set of runs whose CHANGELOG explicitly mentions this model + + // precision. We can't reuse `filterRunsByModel` here because it has a + // fallback that returns all runs when nothing matches (so the picker still + // renders) — which would make us pass a runId that produced no rows for + // the current model, hiding the chart. + const modelPrefixesEarly = Object.entries(MODEL_PREFIX_MAPPING) + .filter(([, model]) => model === selectedModel) + .map(([prefix]) => prefix); + // Map each FULL config_key (model-precision-hardware-framework) a run's + // changelog claims to the set of runs claiming it. Single-run scoping should + // only kick in when two runs contest the SAME full key — e.g. a same-day + // re-run of one hardware — because then a DISTINCT ON merge could mix them + // and the user needs to pick which run wins. Runs covering DIFFERENT hardware + // of the same model (e.g. a B300 run and a B200 run on the same date) are + // complementary: both must render via carry-forward. Matching on model+ + // precision alone (the old behavior) wrongly treated those as alternatives + // and scoped the chart to one run, hiding the other GPU's curve. + const runsByConfigKey = new Map>(); + if (availableRuns) { + for (const [runId, runInfo] of Object.entries(availableRuns)) { + if (!runInfo.changelog) continue; + for (const entry of runInfo.changelog.entries) { + for (const key of entry.config_keys) { + const parts = key.split('-'); + if (modelPrefixesEarly.includes(parts[0]!) && effectivePrecisions.includes(parts[1]!)) { + let runs = runsByConfigKey.get(key); + if (!runs) { + runs = new Set(); + runsByConfigKey.set(key, runs); + } + runs.add(runId); + } + } + } + } + } + // A run is "contested" only if some full config_key it claims is also claimed + // by another run. Only then does picking a run disambiguate anything. + // Downstream (useChartData / mergeRunScopedRows) this no longer scopes the + // WHOLE chart to the run: only the configs the run actually produced are + // pinned to it, and every other config (e.g. another framework's same-day + // run) still carries forward from the normal latest-per-config rows. + const contestedRunIds = new Set(); + for (const runs of runsByConfigKey.values()) { + if (runs.size > 1) for (const r of runs) contestedRunIds.add(r); + } + const benchmarkRunId = + selectedRunId && contestedRunIds.has(String(selectedRunId)) ? String(selectedRunId) : undefined; + const { graphs, loading: chartDataLoading, @@ -222,7 +317,10 @@ export function InferenceProvider({ effectiveRunDate, isActive, latestDate, + selectedPercentile, compareGpuPair ?? null, + benchmarkRunId, + selectedXAxisMode, ); // For GPU comparison date picker — use shared availability data from global filters @@ -236,7 +334,7 @@ export function InferenceProvider({ if (!availabilityRows) return availableDates; const rows = availabilityRows.filter((r) => { if (!dbModelKeys.includes(r.model)) return false; - if (islOslToSequence(r.isl, r.osl) !== effectiveSequence) return false; + if (rowToSequence(r) !== effectiveSequence) return false; if (!effectivePrecisions.includes(r.precision)) return false; if (!r.hardware) return false; const hwKey = buildAvailabilityHwKey(r.hardware, r.framework, r.spec_method, r.disagg); @@ -261,7 +359,7 @@ export function InferenceProvider({ const hwKeys = new Set(); for (const r of availabilityRows) { if (!dbModelKeys.includes(r.model)) continue; - if (islOslToSequence(r.isl, r.osl) !== effectiveSequence) continue; + if (rowToSequence(r) !== effectiveSequence) continue; if (!effectivePrecisions.includes(r.precision)) continue; if (!r.hardware) continue; const hwKey = buildAvailabilityHwKey(r.hardware, r.framework, r.spec_method, r.disagg); @@ -333,6 +431,61 @@ export function InferenceProvider({ setTrackedConfigs((prev) => (prev.length > 0 ? [] : prev)); }, [selectedModel, effectiveSequence, effectivePrecisions, selectedYAxisMetric]); + // Reconcile the x-axis mode with the scenario kind: + // - On mount with no `i_xmode` URL param: snap to the kind's natural default + // (agentic → ttft, fixed → interactivity). The state itself was initialized + // to a SSR-stable constant so server and client render the same DOM; this + // effect fixes it up after hydration. + // - When the user later switches sequence kinds: snap to the new kind's + // natural default (the prior selection was for a different kind, so it + // doesn't carry over). + const lastSeqKindRef = useRef | null>(null); + useEffect(() => { + const kind = sequenceKind(effectiveSequence); + const isInitialMount = lastSeqKindRef.current === null; + const isAgenticOnlyMode = + selectedXAxisMode === 'session-time' || selectedXAxisMode === 'prefill-tps'; + // On a stale render where kind hasn't changed, bail unless the current + // mode is agentic-only and we just landed on a fixed-seq scenario — in + // that case force the snap so the chart doesn't try to plot trace-derived + // metrics against rows that have no trace_replay. + if (!isInitialMount && lastSeqKindRef.current === kind) { + if (kind === 'fixed-seq' && isAgenticOnlyMode) { + handleSetXAxisMode('interactivity'); + } + return; + } + lastSeqKindRef.current = kind; + if ( + isInitialMount && + xAxisModeFromUrlRef.current && + !(kind === 'fixed-seq' && isAgenticOnlyMode) + ) { + // URL-restored agentic-only mode on a fixed-seq sequence makes no sense + // — fall through to the default snap below. + return; + } + handleSetXAxisMode(kind === 'agentic' ? 'ttft' : 'interactivity'); + }, [effectiveSequence, selectedXAxisMode, handleSetXAxisMode]); + + // Reconcile selectedE2eXAxisMetric whenever the mode, sequence kind, or + // agentic percentile changes. For fixed-seq the JSONB only carries + // median_* / p99_* (no p90_*), so the TTFT button there has to point at + // median_ttft — otherwise the chart goes blank. For agentic, we point at + // the user's chosen percentile so the dropdown actually drives the axis. + useEffect(() => { + const isAgentic = sequenceKind(effectiveSequence) === 'agentic'; + if (selectedXAxisMode === 'ttft') { + setSelectedE2eXAxisMetric(isAgentic ? `${selectedPercentile}_ttft` : 'median_ttft'); + } else if (selectedXAxisMode === 'e2e') { + // null = use the chart-config natural x (median_e2el), which useChartData + // rewrites to _e2el for agentic via withPercentile(). + setSelectedE2eXAxisMetric(null); + } + // 'interactivity' mode renders the interactivity chart, which keys off + // selectedXAxisMetric (not the e2e one), so nothing to do here. + }, [selectedXAxisMode, effectiveSequence, selectedPercentile]); + // Ref guard: when true, filter changes don't clear the active preset. // FavoritePresetsDropdown sets this while applying a preset so its own // programmatic setter calls don't accidentally deactivate it. @@ -785,6 +938,7 @@ export function InferenceProvider({ useUrlStateSync( { i_metric: selectedYAxisMetric, + i_pctl: selectedPercentile, i_gpus: selectedGPUs.join(','), i_dates: selectedDates.join(','), i_dstart: selectedDateRange.startDate, @@ -795,6 +949,7 @@ export function InferenceProvider({ i_log: logScale ? '1' : '', i_xmetric: selectedXAxisMetric || '', i_e2e_xmetric: selectedE2eXAxisMetric || '', + i_xmode: selectedXAxisMode, i_scale: scaleType, i_legend: isLegendExpanded ? '' : '0', i_advlabel: useAdvancedLabels ? '1' : '', @@ -808,6 +963,7 @@ export function InferenceProvider({ selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric, + selectedXAxisMode, scaleType, selectedGPUs, selectedDates, @@ -978,6 +1134,8 @@ export function InferenceProvider({ setSelectedXAxisMetric, selectedE2eXAxisMetric, setSelectedE2eXAxisMetric, + selectedXAxisMode, + setSelectedXAxisMode: handleSetXAxisMode, scaleType, setScaleType, loading, @@ -985,6 +1143,8 @@ export function InferenceProvider({ workflowInfo, selectedYAxisMetric, setSelectedYAxisMetric: setSelectedYAxisMetricAndClear, + selectedPercentile, + setSelectedPercentile, selectedGPUs, setSelectedGPUs: setSelectedGPUsAndClear, availableGPUs, @@ -1049,6 +1209,7 @@ export function InferenceProvider({ selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric, + selectedXAxisMode, scaleType, selectedGPUs, selectedDates, diff --git a/packages/app/src/components/inference/agentic-point/agentic-point-detail.tsx b/packages/app/src/components/inference/agentic-point/agentic-point-detail.tsx new file mode 100644 index 00000000..1ce321ee --- /dev/null +++ b/packages/app/src/components/inference/agentic-point/agentic-point-detail.tsx @@ -0,0 +1,621 @@ +'use client'; + +import Link from 'next/link'; +import { useRouter } from 'next/navigation'; +import { useState } from 'react'; +import { ArrowLeft } from 'lucide-react'; + +import { useAgenticAggregates, type AgenticAggregateMap } from '@/hooks/api/use-agentic-aggregates'; +import { useRequestTimeline } from '@/hooks/api/use-request-timeline'; +import { useTraceHistograms } from '@/hooks/api/use-trace-histograms'; +import { + useTraceServerMetrics, + type PointMeta, + type QueueDepthPoint, + type TimeSeriesPoint, +} from '@/hooks/api/use-trace-server-metrics'; +import { useBenchmarkSiblings } from '@/hooks/api/use-benchmark-siblings'; +import { SegmentedToggle, type SegmentedToggleOption } from '@/components/ui/segmented-toggle'; + +import { AggregateChart, type AggregatePoint, type PercentileKey } from './aggregate-chart'; +import { Distribution } from './distribution'; +import { ExpandableChart } from './expandable-chart'; +import { RequestTimelineView } from './request-timeline'; +import { SiblingNav, chipLabel } from './sibling-nav'; +import { + StackedAreaChart, + TimeSeriesChart, + cumulativeAverage, + cumulativeDifferenceMonotonic, + inflightUniqueTokens, + rollingAverage, + sumSeries, + timeRollingAverage, +} from './time-series-chart'; + +interface Props { + id: number; +} + +const fmtPct = (v: number | null | undefined): string => + v === null || v === undefined || Number.isNaN(v) ? '—' : `${(v * 100).toFixed(2)}%`; + +function MetaLine({ label, value }: { label: string; value: React.ReactNode }) { + return ( +
+ {label} + {value} +
+ ); +} + +function PointSummary({ meta }: { meta: PointMeta }) { + return ( +
+
+

+ Selected point + {meta.disagg ? ' · disagg' : ''} + {meta.spec_method && meta.spec_method !== 'none' ? ` · spec=${meta.spec_method}` : ''} +

+ {meta.run_url && ( + + GitHub Actions run → + + )} +
+
+ + + + + {meta.isl !== null && } + {meta.osl !== null && } +
+
+ ); +} + +/** Sizes passed to charts for the inline (small) vs expanded (dialog) render. */ +const CHART_SIZES = { + inline: { width: 720, height: 260 }, + expanded: { width: 1300, height: 520 }, +}; + +// Per-DP-rank color palette for DEP runs (one distinct color per rank in +// the KV cache utilization overlay). Mirrors the request-timeline row +// palette so the same DP index reads as the same color across both views. +// Wraps mod-N if more than 12 ranks ever land. +const DP_RANK_PALETTE = [ + '#3b82f6', + '#ef4444', + '#10b981', + '#f59e0b', + '#a855f7', + '#06b6d4', + '#f97316', + '#84cc16', + '#ec4899', + '#14b8a6', + '#8b5cf6', + '#eab308', +]; + +type DetailView = 'point' | 'timeline' | 'aggregates'; +const VIEW_OPTIONS: SegmentedToggleOption[] = [ + { value: 'point', label: 'Per-point', testId: 'detail-view-point' }, + { value: 'timeline', label: 'Request timeline', testId: 'detail-view-timeline' }, + { value: 'aggregates', label: 'Aggregates across configs', testId: 'detail-view-aggregates' }, +]; + +/** Bundle per-percentile values for one sibling into the shape AggregateChart wants. */ +function toAggPoint( + sibling: { id: number; label: string }, + pct: { mean: number; p50: number; p75: number; p90: number; p99: number } | null | undefined, +): AggregatePoint { + const values: Partial> = {}; + if (pct) { + values.mean = pct.mean; + values.p50 = pct.p50; + values.p75 = pct.p75; + values.p90 = pct.p90; + values.p99 = pct.p99; + } + return { id: sibling.id, label: sibling.label, values }; +} + +export function AgenticPointDetail({ id }: Props) { + const router = useRouter(); + const histQuery = useTraceHistograms([id], true); + const metricsQuery = useTraceServerMetrics(id, true); + const siblingsQuery = useBenchmarkSiblings(id); + + const hist = histQuery.data?.[id]; + const metrics = metricsQuery.data; + const siblingsData = siblingsQuery.data; + + const [view, setView] = useState('point'); + // Fetch aggregates only when the aggregates view is active. Uses the full + // sibling set (across parallelism + concurrency configs) so each chart + // shows how the metric varies across the SKU. + const siblingIds = siblingsData?.siblings.map((s) => s.id) ?? []; + const aggregatesQuery = useAgenticAggregates(siblingIds, view === 'aggregates'); + // Per-request timeline used by both the timeline view AND the per-point + // "Unique input tokens in flight" chart, so fetch whenever we're on + // either view. + const timelineQuery = useRequestTimeline(id, view === 'timeline' || view === 'point'); + + return ( +
+
+ + · + + Inference chart + +
+ + {siblingsData ? ( + + ) : siblingsQuery.isLoading ? ( +
Loading SKU navigator…
+ ) : null} + + {metrics ? ( + + ) : metricsQuery.isLoading ? ( +
Loading point metadata…
+ ) : null} + + {metricsQuery.isError && ( +
+ Failed to load trace data for benchmark point #{id}. +
+ )} + {metricsQuery.data === null && !metricsQuery.isLoading && ( +
+ No stored trace_replay blob for benchmark point #{id}. This point predates the aiperf + time-series capture, or its source artifacts have expired on GitHub. +
+ )} + +
+ + {view === 'aggregates' && ( + + {siblingIds.length} configs in SKU + {aggregatesQuery.isLoading ? ' · loading…' : ''} + + )} + {view === 'timeline' && timelineQuery.data && ( + + {timelineQuery.data.requests.length} requests + + )} +
+ + {view === 'aggregates' ? ( + + ) : view === 'timeline' ? ( + timelineQuery.isLoading ? ( +
+ Loading request timeline… +
+ ) : timelineQuery.data ? ( + + ) : ( +
+ No per-request timeline for benchmark point #{id} — the profile_export.jsonl artifact + isn't stored for this row. +
+ ) + ) : ( +
+ { + const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline; + if (hist) return ; + return histQuery.isLoading ? : ; + }} + /> + { + const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline; + if (hist) return ; + return histQuery.isLoading ? : ; + }} + /> + + { + const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline; + if (!metrics) return ; + // For SGLang hicache rows we have both GPU (HBM) util and + // host (CPU offload pool) util — overlay them as two lines. + const hasHost = metrics.hostKvCacheUsage.length > 0; + // DEP runs report one series per engine. When there's more + // than one, draw one line per rank in distinct colors so + // load skew is visible at a glance; cluster-average sits on + // top in white so it stands out. + const perEngine = metrics.kvCacheUsageByEngine ?? []; + const hasPerEngine = perEngine.length > 1; + // Render order matters: per-engine first → average drawn on top. + const series = [ + ...(hasPerEngine + ? perEngine.map((e, i) => ({ + name: `DP ${e.engineLabel}`, + data: rollingAverage(e.points, 50), + color: DP_RANK_PALETTE[i % DP_RANK_PALETTE.length]!, + // Thin + translucent so the Avg line on top reads as + // the headline number, not just one more series. + strokeWidth: 1, + strokeOpacity: 0.5, + })) + : []), + { + name: hasHost + ? 'GPU HBM (avg n=50)' + : hasPerEngine + ? 'Avg' + : 'GPU KV cache (avg n=50)', + data: rollingAverage(metrics.kvCacheUsage, 50), + // Skip raw scatter when per-engine overlay is on — the + // DP-rank lines already convey the spread, dots would be noise. + rawData: hasPerEngine ? undefined : metrics.kvCacheUsage, + // Bold red Avg sits on top of the translucent per-DP lines. + // DP 1 in the palette is #ef4444 (lighter red); the darker + // #dc2626 here plus the heavier stroke keeps it distinct. + color: hasPerEngine ? '#dc2626' : '#3b82f6', + strokeWidth: hasPerEngine ? 3.5 : 2, + }, + ...(hasHost + ? [ + { + name: 'CPU offload pool (avg n=50)', + data: rollingAverage(metrics.hostKvCacheUsage, 50), + rawData: metrics.hostKvCacheUsage, + color: '#f97316', + strokeWidth: 2, + }, + ] + : []), + ]; + return ( + `${(v * 100).toFixed(0)}%`} + yAxisLabel="KV cache (%)" + {...size} + /> + ); + }} + /> + + { + const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline; + if (!metrics) return ; + return ( + ({ + t: p.t, + value: p.running, + })), + 50, + ), + color: '#22c55e', + strokeWidth: 2, + }, + { + name: 'Waiting (avg n=50)', + data: rollingAverage( + metrics.queueDepth.map((p: QueueDepthPoint) => ({ + t: p.t, + value: p.waiting, + })), + 50, + ), + color: '#ef4444', + strokeWidth: 2, + }, + { + name: 'Total (avg n=50)', + data: rollingAverage( + metrics.queueDepth.map((p: QueueDepthPoint) => ({ + t: p.t, + value: p.total, + })), + 50, + ), + color: '#3b82f6', + strokeWidth: 2, + }, + ]} + durationS={metrics.durationS} + yAxisLabel="Requests" + {...size} + /> + ); + }} + /> + + { + const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline; + if (!metrics) return ; + return ( + `${(v * 100).toFixed(0)}%`} + yAxisLabel="Hit rate (%)" + {...size} + /> + ); + }} + /> + + { + const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline; + if (!metrics) return ; + const total = sumSeries(metrics.prefillTps, metrics.decodeTps); + return ( + + ); + }} + /> + + { + const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline; + if (!metrics) return ; + return ( + + ); + }} + /> + + { + const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline; + if (!metrics) return ; + // Unique = total prompt tokens received minus tokens served + // from the prefix cache. Equivalent to cumsum of + // vllm:request_prefill_kv_computed_tokens. We compute it as + // monotonic-non-decreasing cumulative-diff so per-scrape + // timing skew between the prompt_tokens and prefix_cache_hits + // counters can't make the line dip negative. + return ( + + ); + }} + /> + + { + const size = expanded ? CHART_SIZES.expanded : CHART_SIZES.inline; + if (!timelineQuery.data) { + return timelineQuery.isLoading ? : ; + } + // Step function: at each request start/end, sum the ISLs of + // currently-active requests across distinct cids. Within one + // cid turns are sequential so each cid contributes at most + // one in-flight ISL; across cids we treat content as + // independent (cross-conv prefix sharing adds <1pp in + // practice). Smooth with a 30s time-weighted rolling average + // so brief turn-handoff dips don't dominate the chart. + const raw = inflightUniqueTokens(timelineQuery.data.requests); + const smoothed = timeRollingAverage(raw, 30); + return ( + + ); + }} + /> +
+ )} +
+ ); +} + +function AggregatesGrid({ + siblings, + aggregates, + isLoading, +}: { + siblings: { + id: number; + conc: number; + decode_tp: number; + decode_ep: number; + disagg: boolean; + num_prefill_gpu: number; + num_decode_gpu: number; + offload_mode?: string | null; + }[]; + aggregates: AgenticAggregateMap | undefined; + isLoading: boolean; +}) { + if (siblings.length === 0) { + return ( +
+ SKU sibling list not loaded yet — open a point to populate. +
+ ); + } + if (isLoading && !aggregates) { + return ( +
+ Computing aggregates across {siblings.length} configs… (parsing trace blobs) +
+ ); + } + const labeled = siblings.map((s) => ({ id: s.id, label: chipLabel(s as any) })); + const islPoints = labeled.map((s) => toAggPoint(s, aggregates?.[s.id]?.isl)); + const oslPoints = labeled.map((s) => toAggPoint(s, aggregates?.[s.id]?.osl)); + const kvPoints = labeled.map((s) => toAggPoint(s, aggregates?.[s.id]?.kvCacheUtil)); + const prefixPoints = labeled.map((s) => toAggPoint(s, aggregates?.[s.id]?.prefixCacheHitRate)); + return ( +
+ ( + + )} + /> + ( + + )} + /> + ( + `${(v * 100).toFixed(0)}%`} + {...(expanded ? CHART_SIZES.expanded : CHART_SIZES.inline)} + /> + )} + /> + ( + `${(v * 100).toFixed(0)}%`} + {...(expanded ? CHART_SIZES.expanded : CHART_SIZES.inline)} + /> + )} + /> +
+ ); +} + +function Skeleton() { + return
; +} + +function Empty() { + return ( +
No data
+ ); +} + +// Re-export type for use by sub-components +export type { TimeSeriesPoint, QueueDepthPoint }; diff --git a/packages/app/src/components/inference/agentic-point/aggregate-chart.tsx b/packages/app/src/components/inference/agentic-point/aggregate-chart.tsx new file mode 100644 index 00000000..55ac8061 --- /dev/null +++ b/packages/app/src/components/inference/agentic-point/aggregate-chart.tsx @@ -0,0 +1,286 @@ +'use client'; + +import { useMemo } from 'react'; + +import { ChartHover, type HoverItem } from './chart-hover'; + +export type PercentileKey = 'mean' | 'p50' | 'p75' | 'p90' | 'p99'; + +interface PercentileLine { + key: PercentileKey; + /** Display label in legend / tooltip. */ + label: string; + color: string; +} + +const PERCENTILE_LINES: PercentileLine[] = [ + { key: 'mean', label: 'Mean', color: '#ef4444' }, + { key: 'p50', label: 'P50', color: '#3b82f6' }, + { key: 'p75', label: 'P75', color: '#22c55e' }, + { key: 'p90', label: 'P90', color: '#f59e0b' }, + { key: 'p99', label: 'P99', color: '#a855f7' }, +]; + +export interface AggregatePoint { + /** Sibling label rendered on x-axis (e.g. "TP8 • c=8"). */ + label: string; + /** Per-percentile value; missing percentiles are dropped from the plot. */ + values: Partial>; + /** Sibling id — purely informational, used in the tooltip title. */ + id?: number; +} + +/** + * Multi-line chart: one x-position per sibling config, one line per + * percentile (mean/p50/p75/p90/p99). Designed for the "Aggregates across + * configs" view on the agentic detail page. + */ +export function AggregateChart({ + points, + unit, + yMax, + yFmt, + width = 720, + height = 320, +}: { + points: readonly AggregatePoint[]; + unit: string; + /** Optional fixed y-axis upper bound (e.g. 1 for percentages). */ + yMax?: number; + /** Optional value formatter (e.g. percentage → "30%"). */ + yFmt?: (v: number) => string; + width?: number; + height?: number; +}) { + const W = width; + const H = height; + const PAD = { top: 16, right: 16, bottom: 90, left: 64 }; + const fmt = (v: number) => + yFmt + ? yFmt(v) + : v >= 10000 + ? new Intl.NumberFormat('en-US').format(Math.round(v)) + : v.toFixed(v < 10 ? 2 : 0); + + const computed = useMemo(() => { + if (points.length === 0) return null; + let yMaxComputed = 0; + for (const p of points) { + for (const line of PERCENTILE_LINES) { + const v = p.values[line.key]; + if (typeof v === 'number' && Number.isFinite(v) && v > yMaxComputed) yMaxComputed = v; + } + } + const yTop = yMax ?? (yMaxComputed === 0 ? 1 : yMaxComputed * 1.05); + const innerW = W - PAD.left - PAD.right; + const innerH = H - PAD.top - PAD.bottom; + return { yTop, innerW, innerH }; + }, [points, W, H, PAD.left, PAD.right, PAD.top, PAD.bottom, yMax]); + + if (!computed) { + return ( +
+ No data +
+ ); + } + const { yTop, innerW, innerH } = computed; + + // X positions: evenly spaced across the inner width. + const xOf = (i: number) => + points.length === 1 ? PAD.left + innerW / 2 : PAD.left + (i / (points.length - 1)) * innerW; + const yOf = (v: number) => PAD.top + (1 - v / yTop) * innerH; + + // 5 y-axis ticks evenly between 0 and yTop. + const yTicks = Array.from({ length: 5 }, (_, i) => (yTop * i) / 4); + + // Resolve hover: snap to nearest sibling index and emit all percentiles + // that have data at that x. + const resolve = (fraction: number) => { + const idx = Math.round(fraction * (points.length - 1)); + const p = points[Math.max(0, Math.min(points.length - 1, idx))]; + if (!p) return null; + const items: HoverItem[] = []; + for (const line of PERCENTILE_LINES) { + const v = p.values[line.key]; + if (typeof v !== 'number' || !Number.isFinite(v)) continue; + items.push({ color: line.color, label: line.label, value: fmt(v) }); + } + return { items, title: p.label }; + }; + + return ( +
+
+ {PERCENTILE_LINES.map((line) => ( +
+ + {line.label} +
+ ))} + + {points.length} configs · units: {unit} + +
+ + {/* y-axis ticks + gridlines */} + {yTicks.map((v, i) => { + const y = yOf(v); + return ( + + + + {fmt(v)} + + + ); + })} + + {/* X-axis tick labels — one per sibling, rotated 30° to fit. */} + {points.map((p, i) => { + const x = xOf(i); + return ( + + + + {p.label} + + + ); + })} + + {/* X axis baseline */} + + + {/* Horizontal connecting lines per percentile — faint backdrop so the + eye can follow how each percentile changes across configs. */} + {PERCENTILE_LINES.map((line) => { + const segments: { x1: number; y1: number; x2: number; y2: number }[] = []; + let prev: { x: number; y: number } | null = null; + for (let i = 0; i < points.length; i++) { + const v = points[i]!.values[line.key]; + if (typeof v !== 'number' || !Number.isFinite(v)) { + prev = null; + continue; + } + const x = xOf(i); + const y = yOf(v); + if (prev) segments.push({ x1: prev.x, y1: prev.y, x2: x, y2: y }); + prev = { x, y }; + } + return ( + + {segments.map((s, j) => ( + + ))} + + ); + })} + + {/* Per-sibling vertical bar spanning the percentile range, with a + colored tick at each percentile level. Mean rendered as a small + diamond to distinguish from the percentile ticks. */} + {points.map((p, i) => { + const x = xOf(i); + // Collect percentile values present for this sibling. + const present = PERCENTILE_LINES.filter( + (line) => + typeof p.values[line.key] === 'number' && Number.isFinite(p.values[line.key]!), + ).map((line) => ({ ...line, value: p.values[line.key]! })); + if (present.length === 0) return null; + // Only the *percentile* values define the bar extent; mean might be + // outside the percentile span on weird distributions. + const pctlOnly = present.filter((p2) => p2.key !== 'mean'); + const bandValues = pctlOnly.length > 0 ? pctlOnly : present; + const bandYs = bandValues.map((b) => yOf(b.value)); + const yLo = Math.min(...bandYs); + const yHi = Math.max(...bandYs); + return ( + + + {present.map((b) => { + const ty = yOf(b.value); + if (b.key === 'mean') { + // Diamond marker for mean. + const s = 4; + return ( + + ); + } + // Horizontal tick at each percentile. + return ( + + ); + })} + + ); + })} + +
+ ); +} diff --git a/packages/app/src/components/inference/agentic-point/chart-hover.tsx b/packages/app/src/components/inference/agentic-point/chart-hover.tsx new file mode 100644 index 00000000..24270122 --- /dev/null +++ b/packages/app/src/components/inference/agentic-point/chart-hover.tsx @@ -0,0 +1,148 @@ +'use client'; + +import { useState, type ReactNode } from 'react'; + +/** Vertical crosshair + floating value tooltip overlay shared by every chart. */ +export interface HoverItem { + /** Color swatch to render next to the label. */ + color: string; + label: string; + value: string; + /** Optional faint secondary line (e.g. timestamp under main values). */ + hint?: string; +} + +interface ChartHoverProps { + /** Padding inside the SVG; matches the chart's CHART_PAD. */ + pad: { top: number; right: number; bottom: number; left: number }; + /** SVG viewBox dimensions used to render the chart. */ + width: number; + height: number; + /** + * Called with the cursor's normalized x in [0..1] across the plot area. + * Returns `null` to hide the tooltip (e.g. cursor outside data range). + */ + resolve: (xFraction: number) => { items: HoverItem[]; title?: string } | null; + children: ReactNode; +} + +/** + * Wrap a chart's render to add mouse-driven crosshair + tooltip. + * + * The chart owner renders its bars / lines / axes via `children`; this wrapper + * adds an invisible across the plot area to capture pointer events, a + * vertical line that follows the cursor, and a floating tooltip on the right + * of the cursor (auto-flipping to the left when it would overflow). + */ +export function ChartHover({ pad, width, height, resolve, children }: ChartHoverProps) { + const [hover, setHover] = useState<{ + xPx: number; + yPx: number; + fraction: number; + items: HoverItem[]; + title?: string; + } | null>(null); + + const innerW = width - pad.left - pad.right; + const innerH = height - pad.top - pad.bottom; + + const onMove = (e: React.MouseEvent) => { + const svg = e.currentTarget.ownerSVGElement; + if (!svg) return; + const rect = svg.getBoundingClientRect(); + // Convert client coords → SVG viewBox coords. + const sx = ((e.clientX - rect.left) * width) / rect.width; + const sy = ((e.clientY - rect.top) * height) / rect.height; + const fraction = Math.max(0, Math.min(1, (sx - pad.left) / innerW)); + const resolved = resolve(fraction); + if (!resolved) { + setHover(null); + return; + } + setHover({ xPx: sx, yPx: sy, fraction, items: resolved.items, title: resolved.title }); + }; + + const onLeave = () => setHover(null); + + return ( +
+ + {children} + {hover && ( + + )} + + + {hover && hover.items.length > 0 && ( + + )} +
+ ); +} + +function HoverTooltip({ + xFraction, + containerWidth, + padLeft, + innerW, + title, + items, +}: { + xFraction: number; + containerWidth: number; + padLeft: number; + innerW: number; + title?: string; + items: HoverItem[]; +}) { + // Position tooltip near the crosshair as a % of the container. + // We flip to the cursor's left side when it would overflow the right edge. + const xPx = padLeft + xFraction * innerW; + const onRight = xPx < containerWidth * 0.55; + const left = onRight ? `${(xPx / containerWidth) * 100}%` : 'auto'; + const right = onRight ? 'auto' : `${((containerWidth - xPx) / containerWidth) * 100}%`; + return ( +
+ {title &&
{title}
} + {items.map((it, i) => ( +
+ + {it.label} + {it.value} +
+ ))} +
+ ); +} diff --git a/packages/app/src/components/inference/agentic-point/distribution.tsx b/packages/app/src/components/inference/agentic-point/distribution.tsx new file mode 100644 index 00000000..685b73f3 --- /dev/null +++ b/packages/app/src/components/inference/agentic-point/distribution.tsx @@ -0,0 +1,242 @@ +'use client'; + +import { useMemo } from 'react'; + +import { ChartHover, type HoverItem } from './chart-hover'; + +const fmtNum = (n: number) => + n >= 10000 ? new Intl.NumberFormat('en-US').format(Math.round(n)) : String(Math.round(n)); + +/** + * Bar histogram with vertical p50/p75/p90/p95 guide lines. Designed for the + * detail-page card — fills its container width via `viewBox` + 100% width. + * Hover shows the bin range + count + cumulative percentile. + */ +export function Distribution({ + values, + unit, + width = 720, + height = 260, +}: { + values: readonly number[]; + unit: string; + width?: number; + height?: number; +}) { + const W = width; + const H = height; + const PAD = { top: 12, right: 16, bottom: 56, left: 60 }; + + const computed = useMemo(() => { + if (values.length === 0) return null; + const sorted = [...values].toSorted((a, b) => a - b); + const min = sorted[0]!; + const max = sorted.at(-1)!; + const range = Math.max(1e-9, max - min); + const innerW = W - PAD.left - PAD.right; + const innerH = H - PAD.top - PAD.bottom; + const nBins = Math.min(50, Math.max(15, Math.ceil(Math.sqrt(values.length)))); + const counts: number[] = Array.from({ length: nBins }, () => 0); + for (const v of values) { + const i = Math.min(nBins - 1, Math.floor(((v - min) / range) * nBins)); + counts[i]!++; + } + return { sorted, min, max, range, innerW, innerH, nBins, counts }; + }, [values, W, H, PAD.bottom, PAD.left, PAD.right, PAD.top]); + + if (!computed) { + return ( +
No data
+ ); + } + const { sorted, min, max, range, innerW, innerH, nBins, counts } = computed; + const maxCount = Math.max(...counts, 1); + const xScale = (v: number) => PAD.left + ((v - min) / range) * innerW; + const yScale = (c: number) => PAD.top + (1 - c / maxCount) * innerH; + const barW = innerW / nBins; + + const fmt = fmtNum; + + const quantile = (q: number): number => { + const pos = (sorted.length - 1) * q; + const lo = Math.floor(pos); + const hi = Math.ceil(pos); + return sorted[lo]! + (sorted[hi]! - sorted[lo]!) * (pos - lo); + }; + + const GUIDES = [ + { label: 'p50', q: 0.5, color: '#3b82f6' }, + { label: 'p75', q: 0.75, color: '#22c55e' }, + { label: 'p90', q: 0.9, color: '#f59e0b' }, + { label: 'p95', q: 0.95, color: '#ef4444' }, + ] as const; + + // Hover: report the bin range under cursor, its count, and what percentile + // the bin's midpoint represents in the empirical distribution. + const resolve = (fraction: number) => { + const v = min + fraction * range; + const binIdx = Math.min(nBins - 1, Math.floor(((v - min) / range) * nBins)); + const binLo = min + (binIdx * range) / nBins; + const binHi = min + ((binIdx + 1) * range) / nBins; + const count = counts[binIdx] ?? 0; + // Cumulative % at the bin's right edge. + let cumCount = 0; + for (let i = 0; i <= binIdx; i++) cumCount += counts[i] ?? 0; + const cumPct = (cumCount / values.length) * 100; + const items: HoverItem[] = [ + { color: 'currentColor', label: 'Bin', value: `${fmt(binLo)}–${fmt(binHi)} ${unit}` }, + { color: 'currentColor', label: 'Count', value: count.toLocaleString() }, + { color: 'currentColor', label: 'Cumulative', value: `${cumPct.toFixed(1)}%` }, + ]; + return { items }; + }; + + const xTickVals = [min, min + range / 3, min + (2 * range) / 3, max]; + const yTickVals = Array.from({ length: 5 }, (_, i) => (maxCount * i) / 4); + + return ( +
+
+ {values.length.toLocaleString()} requests · range {fmt(min)}–{fmt(max)} {unit} +
+ + {/* y-axis gridlines + labels */} + {yTickVals.map((v, i) => { + const y = yScale(v); + return ( + + + + {fmt(v)} + + + ); + })} + + {/* Bars */} + {counts.map((c, i) => { + const h = (c / maxCount) * innerH; + const x = PAD.left + i * barW; + const y = PAD.top + (innerH - h); + return ( + + ); + })} + + {/* Percentile guide lines */} + {GUIDES.map(({ q, color }) => { + const v = quantile(q); + const x = xScale(v); + return ( + + ); + })} + + {/* X axis */} + + {xTickVals.map((v, i) => { + const anchor = i === 0 ? 'start' : i === xTickVals.length - 1 ? 'end' : 'middle'; + return ( + + {fmt(v)} + + ); + })} + + value ({unit}) + + + count + + + {/* Percentile legend chips */} + {(() => { + const chipY = H - 8; + const chipW = innerW / GUIDES.length; + return GUIDES.map(({ label: ql, q, color }, i) => { + const v = quantile(q); + const x = PAD.left + i * chipW; + return ( + + + + {ql} {fmt(v)} + + + ); + }); + })()} + +
+ ); +} diff --git a/packages/app/src/components/inference/agentic-point/expandable-chart.tsx b/packages/app/src/components/inference/agentic-point/expandable-chart.tsx new file mode 100644 index 00000000..7c8e4538 --- /dev/null +++ b/packages/app/src/components/inference/agentic-point/expandable-chart.tsx @@ -0,0 +1,46 @@ +'use client'; + +import { useState, type ReactNode } from 'react'; +import { Maximize2 } from 'lucide-react'; + +import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui/dialog'; + +/** + * Wraps a chart in a card with a header + expand button. Click the button to + * open the chart in a large dialog. The `render` prop receives `expanded:true` + * inside the dialog so charts can pick larger width/height. + */ +export function ExpandableChart({ + title, + render, +}: { + title: string; + render: (expanded: boolean) => ReactNode; +}) { + const [open, setOpen] = useState(false); + + return ( +
+
+

{title}

+ +
+ {render(false)} + + + + {title} + +
{render(true)}
+
+
+
+ ); +} diff --git a/packages/app/src/components/inference/agentic-point/request-timeline.tsx b/packages/app/src/components/inference/agentic-point/request-timeline.tsx new file mode 100644 index 00000000..3c032fdd --- /dev/null +++ b/packages/app/src/components/inference/agentic-point/request-timeline.tsx @@ -0,0 +1,948 @@ +'use client'; + +import { useCallback, useMemo, useRef, useState } from 'react'; + +import { type RequestRecord, type RequestTimeline } from '@/hooks/api/use-request-timeline'; +import { SegmentedToggle, type SegmentedToggleOption } from '@/components/ui/segmented-toggle'; + +/** + * Gantt-style request timeline for one agentic benchmark point. + * + * Rows are conversations (or workers — toggle in the header). Bars are + * individual HTTP requests, drawn from request_start to request_end with a + * thin lead-in segment from credit_issued (load gen queue). Scroll-wheel + * zooms, drag pans, hover shows per-request stats. + * + * The reference for this layout is the agent-timeline in semianalysis-claude-code-proxy. + */ + +type RowMode = 'conversation' | 'worker'; + +const ROW_MODE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'conversation', label: 'By conversation', testId: 'timeline-mode-conversation' }, + { value: 'worker', label: 'By worker', testId: 'timeline-mode-worker' }, +]; + +type PhaseFilter = 'all' | 'profiling'; + +const PHASE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'profiling', label: 'Profiling', testId: 'timeline-phase-profiling' }, + { value: 'all', label: 'All (incl. warmup)', testId: 'timeline-phase-all' }, +]; + +/** A stable color palette indexed by row-key hash. */ +const ROW_COLORS = [ + '#3b82f6', + '#ef4444', + '#10b981', + '#f59e0b', + '#a855f7', + '#06b6d4', + '#f97316', + '#84cc16', + '#ec4899', + '#14b8a6', + '#8b5cf6', + '#eab308', +]; + +/** Phase color overlay drawn as a thin strip at the bottom of each bar. */ +const PHASE_COLORS: Record = { + profiling: '#22c55e', + warmup: '#94a3b8', + unknown: '#64748b', +}; + +/** + * Row kinds: + * parent — top-level conversation (depth 0) + * worker — worker swimlane (depth 0, worker mode) + * subagent — a subagent invocation (depth 1). Either a single + * stream (renders its own bars), or a multi-stream + * container whose bars are the union of its streams + * when collapsed. + * stream — one :sN stream of a multi-stream subagent (depth 2). + * Hidden by default; toggled in via the parent's chevron. + */ +type RowKind = 'parent' | 'worker' | 'subagent' | 'stream'; + +interface Row { + key: string; + label: string; + color: string; + requests: RequestRecord[]; + depth: number; + kind: RowKind; + /** Number of streams under this subagent (>=1). Only set for subagent rows. */ + streamCount?: number; + /** For stream rows: the parent subagent's row key (drives expand/collapse). */ + parentRowKey?: string; +} + +/** + * Conversation ids for subagent calls look like + * ::sa:[:s] + * The optional `:s` suffix is set when the harness fans a single + * subagent into multiple parallel "streams" (interval-graph + * decomposition in weka_trace._pack_into_streams). We split it off so + * we can group all streams of one subagent under a single header row. + */ +function splitCid(cid: string): { + parent: string; + subagentBase: string | null; + stream: number | null; +} { + const sep = cid.indexOf('::sa:'); + if (sep === -1) return { parent: cid, subagentBase: null, stream: null }; + const parent = cid.slice(0, sep); + const raw = cid.slice(sep + 5); + const m = /^(?.*):s(?\d+)$/.exec(raw); + if (m) return { parent, subagentBase: m[1]!, stream: Number(m[2]) }; + return { parent, subagentBase: raw, stream: null }; +} + +/** + * Group requests into rows. In conversation mode, output order is: + * parent_conv + * subagent_001 (collapsed by default, container) + * :s0 (hidden unless expanded) + * :s1 + * subagent_002 + * ... + * + * `expandedSubagents` controls which subagent containers reveal their + * stream children. Bars on a collapsed subagent are the UNION of all its + * streams' requests — overlapping bars visually communicate the + * stream-level parallelism without expanding. + */ +function buildRows( + requests: RequestRecord[], + mode: RowMode, + expandedSubagents: ReadonlySet, +): Row[] { + if (mode !== 'conversation') { + // Worker mode: flat rows, sorted by first activity. + const groups = new Map(); + for (const r of requests) { + let list = groups.get(r.wid); + if (!list) { + list = []; + groups.set(r.wid, list); + } + list.push(r); + } + const rows: Row[] = []; + let i = 0; + for (const [key, list] of groups) { + list.sort((a, b) => a.start - b.start); + rows.push({ + key, + label: shortenWid(key), + color: ROW_COLORS[i % ROW_COLORS.length]!, + requests: list, + depth: 0, + kind: 'worker', + }); + i++; + } + rows.sort((a, b) => a.requests[0]!.start - b.requests[0]!.start); + return rows; + } + + // Conversation mode — tree: parent → subagent → stream. + interface Tree { + parentCid: string; + parentReqs: RequestRecord[]; + // subagentBase → (streamIndex|null → requests) + subagents: Map>; + firstStart: number; + } + const trees = new Map(); + for (const r of requests) { + const { parent, subagentBase, stream } = splitCid(r.cid); + let tree = trees.get(parent); + if (!tree) { + tree = { + parentCid: parent, + parentReqs: [], + subagents: new Map(), + firstStart: Number.POSITIVE_INFINITY, + }; + trees.set(parent, tree); + } + if (subagentBase === null) { + tree.parentReqs.push(r); + } else { + let saMap = tree.subagents.get(subagentBase); + if (!saMap) { + saMap = new Map(); + tree.subagents.set(subagentBase, saMap); + } + const list = saMap.get(stream); + if (list) list.push(r); + else saMap.set(stream, [r]); + } + if (r.start < tree.firstStart) tree.firstStart = r.start; + } + + const sortedTrees = [...trees.values()].toSorted((a, b) => a.firstStart - b.firstStart); + const rows: Row[] = []; + let colorIdx = 0; + for (const tree of sortedTrees) { + const color = ROW_COLORS[colorIdx % ROW_COLORS.length]!; + colorIdx++; + // Parent row (use a placeholder key if the parent itself wasn't replayed). + tree.parentReqs.sort((a, b) => a.start - b.start); + rows.push({ + key: tree.parentReqs.length > 0 ? tree.parentCid : `__parent_${tree.parentCid}`, + label: tree.parentCid, + color, + requests: tree.parentReqs, + depth: 0, + kind: 'parent', + }); + + // One subagent row per base (which may contain N streams). + const subagentEntries = [...tree.subagents.entries()].toSorted((a, b) => { + const aStart = Math.min( + ...[...a[1].values()].map((reqs) => reqs[0]?.start ?? Number.POSITIVE_INFINITY), + ); + const bStart = Math.min( + ...[...b[1].values()].map((reqs) => reqs[0]?.start ?? Number.POSITIVE_INFINITY), + ); + return aStart - bStart; + }); + for (const [saBase, streams] of subagentEntries) { + const subagentKey = `${tree.parentCid}::sa:${saBase}`; + // Union of all stream requests for collapsed-view bars. + const allReqs: RequestRecord[] = []; + for (const reqs of streams.values()) allReqs.push(...reqs); + allReqs.sort((a, b) => a.start - b.start); + const streamCount = streams.size; + rows.push({ + key: subagentKey, + label: `↳ ${formatSubagentLabel(saBase)}`, + color, + requests: allReqs, + depth: 1, + kind: 'subagent', + streamCount, + }); + + // Stream children only when expanded AND there's more than one + // stream (a single-stream subagent has nothing extra to show). + if (streamCount > 1 && expandedSubagents.has(subagentKey)) { + const streamEntries = [...streams.entries()].toSorted((a, b) => { + // Sort by stream index (null first as the "default" stream) + const ai = a[0] ?? -1; + const bi = b[0] ?? -1; + return ai - bi; + }); + for (const [streamIdx, reqs] of streamEntries) { + reqs.sort((a, b) => a.start - b.start); + rows.push({ + key: `${subagentKey}:s${streamIdx ?? '∅'}`, + label: `stream ${streamIdx ?? '∅'}`, + color, + requests: reqs, + depth: 2, + kind: 'stream', + parentRowKey: subagentKey, + }); + } + } + } + } + return rows; +} + +/** `subagent_001_bf1c5c16` → `subagent 001 · bf1c` (compact, readable). */ +function formatSubagentLabel(raw: string): string { + const m = /^subagent_(?\d+)_(?[0-9a-f]+)$/iu.exec(raw); + if (!m) return raw; + return `subagent ${m[1]} · ${m[2]!.slice(0, 4)}`; +} + +function shortenWid(wid: string): string { + // worker_4ae87bea → w_4ae8 + return wid.replace(/^worker_/, 'w_').slice(0, 12); +} + +/** Format ns offset → "+12.3s" / "+1.2m". */ +function formatTickLabel(ns: number): string { + const ms = ns / 1e6; + if (ms < 1000) return `+${ms.toFixed(0)}ms`; + if (ms < 60_000) return `+${(ms / 1000).toFixed(ms < 10_000 ? 1 : 0)}s`; + return `+${(ms / 60_000).toFixed(1)}m`; +} + +function formatDuration(ms: number): string { + if (ms < 1000) return `${ms.toFixed(0)}ms`; + if (ms < 60_000) return `${(ms / 1000).toFixed(2)}s`; + return `${(ms / 60_000).toFixed(2)}m`; +} + +/** Number of values in a sorted ascending array that are <= target. */ +function countLeq(sorted: number[], target: number): number { + let lo = 0; + let hi = sorted.length; + while (lo < hi) { + const mid = (lo + hi) >>> 1; + if (sorted[mid]! <= target) lo = mid + 1; + else hi = mid; + } + return lo; +} +/** Number of values in a sorted ascending array that are < target. */ +function countLt(sorted: number[], target: number): number { + let lo = 0; + let hi = sorted.length; + while (lo < hi) { + const mid = (lo + hi) >>> 1; + if (sorted[mid]! < target) lo = mid + 1; + else hi = mid; + } + return lo; +} + +interface TooltipData { + x: number; + y: number; + row: Row; + req: RequestRecord; +} + +function Tooltip({ data }: { data: TooltipData }) { + const { row, req } = data; + const totalMs = (req.end - req.start) / 1e6; + const queueMs = (req.start - req.credit) / 1e6; + return ( +
+
+ + {row.label} + · turn {req.ti} + {req.cancelled && · cancelled} +
+
+ Total + {formatDuration(totalMs)} + Queue wait + + {queueMs > 0.5 ? formatDuration(queueMs) : '—'} + + {req.ttftMs !== null && ( + <> + TTFT + + {formatDuration(req.ttftMs)} + + + )} + {req.isl !== null && ( + <> + ISL + + {req.isl.toLocaleString()} + + + )} + {req.osl !== null && ( + <> + OSL + + {req.osl.toLocaleString()} + + + )} + Phase + {req.phase} + {req.ad > 0 && ( + <> + Agent depth + {req.ad} + + )} + Worker + {shortenWid(req.wid)} +
+
+ Started at {formatTickLabel(req.start)} +
+
+ ); +} + +export function RequestTimelineView({ data }: { data: RequestTimeline }) { + const [rowMode, setRowMode] = useState('conversation'); + const [phaseFilter, setPhaseFilter] = useState('profiling'); + const [tooltip, setTooltip] = useState(null); + // Which multi-stream subagents currently have their per-stream rows + // expanded. Key is the subagent row's `key` (parent_cid::sa:agent_id). + const [expandedSubagents, setExpandedSubagents] = useState>(() => new Set()); + const toggleSubagent = useCallback((key: string) => { + setExpandedSubagents((prev) => { + const next = new Set(prev); + if (next.has(key)) next.delete(key); + else next.add(key); + return next; + }); + }, []); + const dragRef = useRef<{ startX: number; vs: number; ve: number } | null>(null); + + // Apply phase filter, then group into rows. + const filtered = useMemo( + () => + phaseFilter === 'all' ? data.requests : data.requests.filter((r) => r.phase === 'profiling'), + [data.requests, phaseFilter], + ); + const rows = useMemo( + () => buildRows(filtered, rowMode, expandedSubagents), + [filtered, rowMode, expandedSubagents], + ); + + // Pre-sort the timestamp columns so the cursor-time stats popover can + // count "running / waiting at time t" in O(log n). With a few hundred + // requests this is overkill — but it stays smooth on huge runs too. + const sortedTimes = useMemo(() => { + const credits = filtered.map((r) => r.credit).toSorted((a, b) => a - b); + const starts = filtered.map((r) => r.start).toSorted((a, b) => a - b); + const ends = filtered.map((r) => r.end).toSorted((a, b) => a - b); + return { credits, starts, ends }; + }, [filtered]); + + // Cursor state (vertical line + stats popover). null when the mouse + // isn't over the chart. xPx is svg-local; tNs is the ns offset from + // dataStart that the cursor is pointing at. + const [cursor, setCursor] = useState<{ + xPx: number; + tNs: number; + clientX: number; + clientY: number; + } | null>(null); + + // Timeline extent (clamped to actual data — if we filtered out warmup + // the visible window should shrink to just the profiling phase). + const dataStart = filtered.length === 0 ? 0 : Math.min(...filtered.map((r) => r.credit)); + const dataEnd = filtered.length === 0 ? 1 : Math.max(...filtered.map((r) => r.end)); + const totalNs = Math.max(dataEnd - dataStart, 1); + + // Visible window state (ns offsets, relative to dataStart). + const [viewStart, setViewStart] = useState(0); + const [viewEnd, setViewEnd] = useState(null); + const vStart = viewStart; + const vEnd = viewEnd ?? totalNs; + const visibleDur = Math.max(vEnd - vStart, 1); + const isZoomed = viewEnd !== null; + + // Layout + // Wide enough for a full 36-char conversation id at 10px font, plus the + // indent + color stripe + count badge. Subagent rows inherit the same + // width but truncate the longer "↳ subagent N · hash" tail with ellipsis. + const LABEL_WIDTH = 360; + const ROW_HEIGHT = 22; + const ROW_GAP = 3; + const HEADER_HEIGHT = 24; + const PADDING_RIGHT = 12; + const chartWidth = 920; + const svgHeight = HEADER_HEIGHT + rows.length * (ROW_HEIGHT + ROW_GAP) + 6; + const scale = (chartWidth - PADDING_RIGHT) / visibleDur; + // Local coords: convert ns offset from dataStart to x px. + const xOf = (ns: number) => (ns - dataStart - vStart) * scale; + + // Time-axis ticks (~8 across visible window, snapped to nice second multiples). + const niceMs = [ + 100, 250, 500, 1000, 2000, 5000, 10_000, 30_000, 60_000, 120_000, 300_000, 600_000, 1_800_000, + ]; + const targetMs = visibleDur / 1e6 / 8; + const tickMs = niceMs.find((n) => n >= targetMs) ?? targetMs; + const tickNs = tickMs * 1e6; + const ticks: number[] = []; + const tickStart = Math.floor(vStart / tickNs) * tickNs; + for (let t = tickStart; t <= vEnd + tickNs; t += tickNs) { + if (t >= vStart && t <= vEnd) ticks.push(t); + } + + const handleWheel = useCallback( + (e: React.WheelEvent) => { + e.preventDefault(); + const rect = e.currentTarget.getBoundingClientRect(); + const mouseX = e.clientX - rect.left; + const mouseRatio = Math.max(0, Math.min(1, mouseX / (chartWidth - PADDING_RIGHT))); + const curStart = vStart; + const curEnd = vEnd; + const curDur = curEnd - curStart; + const factor = e.deltaY > 0 ? 1.2 : 1 / 1.2; + const newDur = Math.min(Math.max(curDur * factor, totalNs * 0.001), totalNs); + const pivot = curStart + mouseRatio * curDur; + let newStart = pivot - mouseRatio * newDur; + let newEnd = pivot + (1 - mouseRatio) * newDur; + if (newStart < 0) { + newEnd -= newStart; + newStart = 0; + } + if (newEnd > totalNs) { + newStart -= newEnd - totalNs; + newEnd = totalNs; + if (newStart < 0) newStart = 0; + } + if (newEnd - newStart >= totalNs * 0.99) { + setViewStart(0); + setViewEnd(null); + } else { + setViewStart(newStart); + setViewEnd(newEnd); + } + }, + [vStart, vEnd, totalNs, chartWidth], + ); + + const handleMouseDown = useCallback( + (e: React.MouseEvent) => { + if (e.button !== 0) return; + dragRef.current = { startX: e.clientX, vs: vStart, ve: vEnd }; + }, + [vStart, vEnd], + ); + + const handleMouseMove = useCallback( + (e: React.MouseEvent) => { + // Dragging takes precedence over cursor tracking — panning the view. + if (dragRef.current) { + const dx = e.clientX - dragRef.current.startX; + const nsPerPx = visibleDur / (chartWidth - PADDING_RIGHT); + const delta = -dx * nsPerPx; + let ns = dragRef.current.vs + delta; + let ne = dragRef.current.ve + delta; + const dur = ne - ns; + if (ns < 0) { + ns = 0; + ne = dur; + } + if (ne > totalNs) { + ne = totalNs; + ns = totalNs - dur; + if (ns < 0) ns = 0; + } + setViewStart(ns); + setViewEnd(ne); + setTooltip(null); + setCursor(null); + return; + } + // Track the cursor position in svg-local px and the matching ns offset + // so the crosshair + stats popover can render. Clamped to the chart + // plot area (don't show a cursor on the axis labels gutter). + const rect = e.currentTarget.getBoundingClientRect(); + const xPx = Math.max(0, Math.min(chartWidth - PADDING_RIGHT, e.clientX - rect.left)); + const nsPerPx = visibleDur / (chartWidth - PADDING_RIGHT); + const tNs = vStart + xPx * nsPerPx; + setCursor({ xPx, tNs, clientX: e.clientX, clientY: e.clientY }); + }, + [visibleDur, chartWidth, totalNs, vStart], + ); + + const handleMouseUp = useCallback(() => { + dragRef.current = null; + }, []); + + const handleMouseLeave = useCallback(() => { + dragRef.current = null; + setCursor(null); + }, []); + + const resetZoom = useCallback(() => { + setViewStart(0); + setViewEnd(null); + }, []); + + if (rows.length === 0) { + return ( +
+ No requests in the current filter. +
+ ); + } + + const totalRequests = filtered.length; + + return ( +
+ {/* Controls */} +
+ + + + {totalRequests} request{totalRequests === 1 ? '' : 's'} · {rows.length}{' '} + {rowMode === 'conversation' ? 'conversations' : 'workers'} · span{' '} + {formatDuration((dataEnd - dataStart) / 1e6)} + {isZoomed && ( + <> + {' · '} + + + )} + +
+ + {/* Chart container */} +
+
+ {/* Label column — sticky, doesn't scroll horizontally with the chart. */} +
+
+ + {rowMode === 'conversation' ? 'Conversation' : 'Worker'} + +
+ {rows.map((row) => { + const isSubagentRow = row.kind === 'subagent'; + const isStreamRow = row.kind === 'stream'; + const isExpandable = isSubagentRow && (row.streamCount ?? 1) > 1; + const isExpanded = isExpandable && expandedSubagents.has(row.key); + return ( +
+ {isExpandable ? ( + + ) : ( + + )} + + + {row.label} + {isExpandable && ( + ×{row.streamCount} + )} + + + {row.requests.length > 0 ? row.requests.length : '—'} + +
+ ); + })} +
+ + {/* Scrollable SVG */} +
+ + {/* Header / time-axis baseline */} + + + {/* Time axis ticks */} + {ticks.map((t) => { + // Convert visible-window ns offset → x px (the tick array + // is already in dataStart-relative coords). + const x = (t - vStart) * scale; + return ( + + + + {formatTickLabel(t)} + + + ); + })} + + {/* Row separators */} + {rows.map((row, idx) => ( + + ))} + + {/* Request bars */} + {rows.map((row, rowIdx) => { + const yTop = HEADER_HEIGHT + rowIdx * (ROW_HEIGHT + ROW_GAP) + 2; + const barH = ROW_HEIGHT - 4; + // For multi-stream subagent containers, suppress the union + // bars when expanded — the child stream rows draw them + // individually instead, so we'd double-draw otherwise. + if ( + row.kind === 'subagent' && + (row.streamCount ?? 1) > 1 && + expandedSubagents.has(row.key) + ) { + return null; + } + return row.requests.map((req) => { + const xCredit = xOf(req.credit); + const xStart = xOf(req.start); + const xEnd = xOf(req.end); + // Cull bars entirely outside the visible window so big + // benchmarks don't render thousands of zero-width rects. + if (xEnd < -2 || xCredit > chartWidth + 2) return null; + const runW = Math.max(xEnd - xStart, 1); + const queueW = Math.max(xStart - xCredit, 0); + const phaseColor = PHASE_COLORS[req.phase] ?? PHASE_COLORS.unknown!; + return ( + setTooltip({ x: e.clientX, y: e.clientY, row, req })} + onMouseLeave={() => setTooltip(null)} + > + {/* Queue lead-in (faint) — only drawn when noticeable. */} + {queueW >= 1 && ( + + )} + {/* Main bar — opacity stepped down with depth so + parent > subagent > stream reads visually. */} + + {/* Phase strip at bottom */} + + {/* Cancelled X overlay */} + {req.cancelled && runW > 6 && ( + + )} + + ); + }); + })} + + {/* Cursor crosshair — drawn on top of bars so it stays visible + through dense rows. Stats popover is rendered as fixed + HTML below the SVG block. */} + {cursor && ( + + )} + +
+
+
+ + {/* Footer / legend */} +
+ + + queue wait + + + + profiling + + + + warmup + + scroll to zoom · drag to pan +
+ + {/* Cursor stats popover: count of in-flight / waiting at the cursor's + ns offset. Hidden when the user is hovering an individual bar + (per-request tooltip wins). */} + {cursor && !tooltip && ( + + )} + + {/* Tooltip */} + {tooltip && } +
+ ); +} + +function CursorPopover({ + cursor, + dataStart, + startTimes, + endTimes, + creditTimes, +}: { + cursor: { xPx: number; tNs: number; clientX: number; clientY: number }; + dataStart: number; + startTimes: number[]; + endTimes: number[]; + creditTimes: number[]; +}) { + // At time t (ns from dataStart, here represented as t = tNs): + // running = #(start <= t) - #(end < t) + // waiting = #(credit <= t) - #(start <= t) + // completed= #(end <= t) + const t = cursor.tNs; + const startsLeq = countLeq(startTimes, t); + const endsLt = countLt(endTimes, t); + const creditsLeq = countLeq(creditTimes, t); + const endsLeq = countLeq(endTimes, t); + const running = Math.max(0, startsLeq - endsLt); + const waiting = Math.max(0, creditsLeq - startsLeq); + const completed = endsLeq; + const inflight = running + waiting; + // Absolute wall-clock seconds since the timeline origin (dataStart). + const tSec = t / 1e9; + // Position the popover near the cursor without overflowing the viewport. + // 200 px wide; flip to the left of the cursor if it would clip the right. + const wantLeft = cursor.clientX + 14; + const left = + typeof window === 'undefined' || wantLeft + 220 < window.innerWidth + ? wantLeft + : cursor.clientX - 220; + return ( +
+
+ t = + + {tSec < 60 ? `${tSec.toFixed(3)} s` : `${(tSec / 60).toFixed(3)} m`} + +
+
+ In flight + {inflight} + running + {running} + waiting + {waiting} + Completed + {completed} +
+ {/* dataStart is informational — the displayed t is relative to it. */} +
+ relative to t₀ ({(dataStart / 1e9).toFixed(0)}s wall-clock) +
+
+ ); +} diff --git a/packages/app/src/components/inference/agentic-point/sibling-nav.tsx b/packages/app/src/components/inference/agentic-point/sibling-nav.tsx new file mode 100644 index 00000000..aa727fdc --- /dev/null +++ b/packages/app/src/components/inference/agentic-point/sibling-nav.tsx @@ -0,0 +1,118 @@ +'use client'; + +import { useRouter } from 'next/navigation'; +import { ChevronLeft, ChevronRight } from 'lucide-react'; + +import type { BenchmarkSibling, BenchmarkSku } from '@/hooks/api/use-benchmark-siblings'; + +const HW_LABELS: Record = { + b200: 'B200', + b300: 'B300', + gb200: 'GB200', + gb300: 'GB300', + h100: 'H100', + h200: 'H200', + mi300x: 'MI300X', + mi325x: 'MI325X', + mi355x: 'MI355X', +}; + +const MODEL_LABELS: Record = { + dsr1: 'DeepSeek R1', + dsv4: 'DeepSeek V4 Pro', + glm5: 'GLM-5', + 'glm5.1': 'GLM-5.1', + gptoss120b: 'gpt-oss 120B', + kimik2: 'Kimi K2', + 'kimik2.5': 'Kimi K2.5', + 'kimik2.6': 'Kimi K2.6', + llama70b: 'Llama 3.3 70B', + 'minimaxm2.5': 'MiniMax M2.5', + 'minimaxm2.7': 'MiniMax M2.7', + 'qwen3.5': 'Qwen 3.5', +}; + +function hwLabel(hw: string) { + return HW_LABELS[hw] ?? hw.toUpperCase(); +} +function modelLabel(m: string) { + return MODEL_LABELS[m] ?? m; +} +function frameworkLabel(fw: string) { + if (fw === 'vllm') return 'vLLM'; + if (fw === 'sglang') return 'SGLang'; + if (fw === 'trt') return 'TRT'; + if (fw === 'mori-sglang') return 'Mori-SGLang'; + if (fw.startsWith('dynamo-')) return `Dynamo ${fw.slice('dynamo-'.length).toUpperCase()}`; + return fw; +} + +/** Short label for a sibling chip: parallelism + concurrency. */ +export function chipLabel(s: BenchmarkSibling): string { + const parallel = s.disagg + ? `${s.num_prefill_gpu}P+${s.num_decode_gpu}D` + : `TP${s.decode_tp}${s.decode_ep > 1 ? `EP${s.decode_ep}` : ''}`; + const offload = s.offload_mode === 'on' ? ' • off=ON' : ''; + return `${parallel} • c=${s.conc}${offload}`; +} + +export function SiblingNav({ sku, siblings }: { sku: BenchmarkSku; siblings: BenchmarkSibling[] }) { + const router = useRouter(); + const currentIdx = siblings.findIndex((s) => s.is_current); + const prev = currentIdx > 0 ? siblings[currentIdx - 1] : null; + const next = + currentIdx !== -1 && currentIdx < siblings.length - 1 ? siblings[currentIdx + 1] : null; + + const skuLabel = `${hwLabel(sku.hardware)} · ${modelLabel(sku.model)} · ${sku.precision.toUpperCase()} · ${frameworkLabel(sku.framework)}`; + + return ( +
+
+

{skuLabel}

+ + {siblings.length} point{siblings.length === 1 ? '' : 's'} in this run · {sku.date} + +
+
+ +
+ {siblings.map((s) => { + const active = s.is_current; + return ( + + ); + })} +
+ +
+
+ ); +} diff --git a/packages/app/src/components/inference/agentic-point/time-series-chart.tsx b/packages/app/src/components/inference/agentic-point/time-series-chart.tsx new file mode 100644 index 00000000..399f965d --- /dev/null +++ b/packages/app/src/components/inference/agentic-point/time-series-chart.tsx @@ -0,0 +1,702 @@ +'use client'; + +import { useMemo } from 'react'; + +import type { TimeSeriesPoint } from '@/hooks/api/use-trace-server-metrics'; + +import { ChartHover, type HoverItem } from './chart-hover'; + +interface Series { + name: string; + /** The line to draw (caller pre-smooths if desired). */ + data: TimeSeriesPoint[]; + /** Optional raw per-scrape values; rendered as low-opacity scatter behind the line. */ + rawData?: TimeSeriesPoint[]; + color: string; + /** Override default stroke width (1.8). Use higher values for emphasis lines. */ + strokeWidth?: number; + /** Stroke opacity (0..1). Use < 1 for background/underlay lines. */ + strokeOpacity?: number; + /** Hide from the hover legend (e.g. per-engine underlay lines that + * would clutter the tooltip). The path still renders. */ + hideFromHover?: boolean; +} + +interface TimeSeriesChartProps { + series: Series[]; + durationS: number; + yMax?: number; + yFmt?: (v: number) => string; + yAxisLabel?: string; + width?: number; + height?: number; +} + +/** + * Time-weighted rolling average over a `windowS`-second trailing window. + * Treats the input as a step function (value held constant between + * samples) and integrates over the trailing window, dividing by the + * window length. Good for smoothing irregularly-sampled event series + * (e.g. request start/end events) where the regular sample-count + * `rollingAverage` would over-weight bursts of close-together events. + */ +export function timeRollingAverage(data: TimeSeriesPoint[], windowS: number): TimeSeriesPoint[] { + if (data.length === 0 || windowS <= 0) return data; + const out: TimeSeriesPoint[] = Array.from({ length: data.length }); + for (let i = 0; i < data.length; i++) { + const tEnd = data[i]!.t; + const tStart = Math.max(0, tEnd - windowS); + // Find the first sample j whose t is >= tStart; the step value at + // tStart is data[j-1].value if j > 0, else data[0].value. + let j = 0; + while (j < data.length && data[j]!.t < tStart) j++; + let prevT = tStart; + let prevV = j > 0 ? data[j - 1]!.value : data[0]!.value; + let area = 0; + for (; j <= i; j++) { + const curT = data[j]!.t; + area += prevV * (curT - prevT); + prevT = curT; + prevV = data[j]!.value; + } + const dur = tEnd - tStart; + out[i] = { t: tEnd, value: dur > 0 ? area / dur : data[i]!.value }; + } + return out; +} + +/** Centered rolling average over `windowSize` samples. */ +export function rollingAverage(data: TimeSeriesPoint[], windowSize: number): TimeSeriesPoint[] { + if (data.length === 0 || windowSize <= 1) return data; + const half = Math.floor(windowSize / 2); + const out: TimeSeriesPoint[] = Array.from({ length: data.length }); + for (let i = 0; i < data.length; i++) { + const start = Math.max(0, i - half); + const end = Math.min(data.length, i + half + 1); + let sum = 0; + let n = 0; + for (let j = start; j < end; j++) { + sum += data[j]!.value; + n++; + } + out[i] = { t: data[i]!.t, value: n > 0 ? sum / n : 0 }; + } + return out; +} + +/** Expanding-window cumulative mean from index 0..i. */ +export function cumulativeAverage(data: TimeSeriesPoint[]): TimeSeriesPoint[] { + if (data.length === 0) return data; + const out: TimeSeriesPoint[] = Array.from({ length: data.length }); + let sum = 0; + for (let i = 0; i < data.length; i++) { + sum += data[i]!.value; + out[i] = { t: data[i]!.t, value: sum / (i + 1) }; + } + return out; +} + +/** + * Running cumulative sum of a per-interval rate series. Each output point + * is the integral of the rate from start to that point, assuming the rate + * applies over a 1-second window (aiperf's scrape interval). Use for + * "total tokens served so far" from a tokens-per-second series. + */ +export function cumulativeSum(data: TimeSeriesPoint[]): TimeSeriesPoint[] { + if (data.length === 0) return data; + const out: TimeSeriesPoint[] = Array.from({ length: data.length }); + let sum = 0; + for (let i = 0; i < data.length; i++) { + sum += data[i]!.value; + out[i] = { t: data[i]!.t, value: sum }; + } + return out; +} + +/** + * Per-event step series: at each request start/end, sum the ISLs of + * currently-active requests across distinct `cid`s. Within a single + * `cid` aiperf dispatches turns sequentially (turn N+1 waits for N), + * so each cid contributes at most one in-flight ISL at a time. Across + * different cids we assume content is independent (parent ↔ subagent + * and conv ↔ conv share negligible prefix in practice — cross-conv + * dedup added ~0.25 pp to theoretical hit rate, so treating them as + * independent is a tight approximation of the true in-flight unique + * token count). + * + * Output is a step function: one point per event, value held constant + * until the next event. Time axis is seconds relative to the earliest + * event in `requests`. + */ +export function inflightUniqueTokens( + requests: readonly { cid: string; start: number; end: number; isl: number | null }[], +): TimeSeriesPoint[] { + if (requests.length === 0) return []; + // The request_timeline timestamps are ns-relative to its own origin. + // Convert events to seconds and emit a step series. + interface Event { + tNs: number; + kind: 'start' | 'end'; + cid: string; + isl: number; + } + const events: Event[] = []; + for (const r of requests) { + const isl = r.isl ?? 0; + if (isl <= 0) continue; + events.push({ tNs: r.start, kind: 'start', cid: r.cid, isl }); + events.push({ tNs: r.end, kind: 'end', cid: r.cid, isl }); + } + if (events.length === 0) return []; + // Sort by time; on ties, process 'end' before 'start' so a same-instant + // turn handoff within one cid doesn't transiently double-count. + events.sort((a, b) => a.tNs - b.tNs || (a.kind === 'end' ? -1 : 1)); + + // Active ISL per cid (max in case the same cid somehow has overlapping + // events; in practice it's always 0 or 1 request at a time per cid). + const activeByCid = new Map(); + let total = 0; + const out: TimeSeriesPoint[] = [{ t: 0, value: 0 }]; + for (const e of events) { + const tSec = e.tNs / 1e9; + if (e.kind === 'start') { + const prev = activeByCid.get(e.cid) ?? 0; + const next = Math.max(prev, e.isl); + activeByCid.set(e.cid, next); + total += next - prev; + } else { + const cur = activeByCid.get(e.cid) ?? 0; + if (cur > 0) { + total -= cur; + activeByCid.delete(e.cid); + } + } + out.push({ t: tSec, value: Math.max(0, total) }); + } + return out; +} + +/** + * Monotonic-non-decreasing cumulative difference of two rate series: + * for each unique timestamp, compute Σa[0..t] − Σb[0..t], then enforce + * a running max so the curve never dips below its prior value. + * + * Use this to plot things like "cumulative cache-missed tokens" where the + * true value can only ever grow, but the underlying per-tick rates can + * temporarily look negative due to counter timing skew between scrapes + * (vllm's `prefix_cache_hits` and `prompt_tokens` counters can lag each + * other by ~5-10 s in our data even though their lifetime totals agree). + * + * `a` and `b` may have different (or overlapping) timestamp sets — both + * are unioned and walked in time order. Output has one point per unique + * timestamp present in either input. + */ +export function cumulativeDifferenceMonotonic( + a: TimeSeriesPoint[], + b: TimeSeriesPoint[], +): TimeSeriesPoint[] { + const aByT = new Map(a.map((p) => [p.t, p.value])); + const bByT = new Map(b.map((p) => [p.t, p.value])); + const allT = [...new Set([...aByT.keys(), ...bByT.keys()])].toSorted((x, y) => x - y); + const out: TimeSeriesPoint[] = Array.from({ length: allT.length }); + let cumA = 0; + let cumB = 0; + let runningMax = 0; + for (let i = 0; i < allT.length; i++) { + const t = allT[i]!; + cumA += aByT.get(t) ?? 0; + cumB += bByT.get(t) ?? 0; + const diff = cumA - cumB; + if (diff > runningMax) runningMax = diff; + out[i] = { t, value: runningMax }; + } + return out; +} + +/** Pointwise sum of two arrays sharing the same t index. */ +export function sumSeries(a: TimeSeriesPoint[], b: TimeSeriesPoint[]): TimeSeriesPoint[] { + const n = Math.min(a.length, b.length); + const out: TimeSeriesPoint[] = Array.from({ length: n }); + for (let i = 0; i < n; i++) { + out[i] = { t: a[i]!.t, value: a[i]!.value + b[i]!.value }; + } + return out; +} + +const fmtIntDefault = (n: number) => + n >= 10000 ? new Intl.NumberFormat('en-US').format(Math.round(n)) : String(Math.round(n)); + +const fmtSeconds = (s: number) => { + if (s < 60) return `${Math.round(s)}s`; + const m = Math.floor(s / 60); + const rem = Math.round(s % 60); + return `${m}m ${rem}s`; +}; + +/** Linear-interpolated value at time `t` from a time-sorted series. */ +function interpAt(data: TimeSeriesPoint[], t: number): number | null { + if (data.length === 0) return null; + if (t <= data[0]!.t) return data[0]!.value; + if (t >= data.at(-1)!.t) return data.at(-1)!.value; + // Binary search + let lo = 0; + let hi = data.length - 1; + while (hi - lo > 1) { + const mid = (lo + hi) >> 1; + if (data[mid]!.t <= t) lo = mid; + else hi = mid; + } + const a = data[lo]!; + const b = data[hi]!; + if (b.t === a.t) return a.value; + const frac = (t - a.t) / (b.t - a.t); + return a.value + (b.value - a.value) * frac; +} + +export function TimeSeriesChart({ + series, + durationS, + yMax: yMaxOpt, + yFmt = fmtIntDefault, + yAxisLabel, + width = 720, + height = 260, +}: TimeSeriesChartProps) { + const W = width; + const H = height; + const PAD = { top: 12, right: 16, bottom: 56, left: 60 }; + + const layout = useMemo(() => { + const innerW = W - PAD.left - PAD.right; + const innerH = H - PAD.top - PAD.bottom; + const xMax = Math.max(durationS, 1); + const yMax = yMaxOpt ?? Math.max(1e-9, ...series.flatMap((s) => s.data.map((d) => d.value))); + const xScale = (t: number) => PAD.left + (t / xMax) * innerW; + const yScale = (v: number) => PAD.top + (1 - v / yMax) * innerH; + return { innerW, innerH, xMax, yMax, xScale, yScale }; + }, [series, durationS, yMaxOpt, W, H, PAD.bottom, PAD.left, PAD.right, PAD.top]); + + const { innerW, innerH, xMax, yMax, xScale, yScale } = layout; + + const subsample = (arr: TimeSeriesPoint[]) => { + if (arr.length === 0) return arr; + const stride = Math.max(1, Math.floor(arr.length / innerW)); + return stride > 1 ? arr.filter((_, i) => i % stride === 0) : arr; + }; + + // Pre-format axis ticks. + const xTickVals = Array.from({ length: 5 }, (_, i) => (xMax * i) / 4); + const yTickVals = Array.from({ length: 5 }, (_, i) => (yMax * i) / 4); + + const resolve = (fraction: number) => { + const t = fraction * xMax; + const items: HoverItem[] = []; + for (const s of series) { + if (s.hideFromHover) continue; + const v = interpAt(s.data, t); + if (v === null || !Number.isFinite(v)) continue; + items.push({ color: s.color, label: s.name, value: yFmt(v) }); + } + if (items.length === 0) return null; + return { items, title: fmtSeconds(t) }; + }; + + if (series.every((s) => s.data.length === 0)) { + return ( +
No data
+ ); + } + + return ( + + {/* y-axis gridlines + labels */} + {yTickVals.map((v, i) => { + const y = yScale(v); + return ( + + + + {yFmt(v)} + + + ); + })} + + {/* Raw scatter underlay */} + {series + .filter((s) => s.rawData && s.rawData.length > 0) + .map((s, si) => + subsample(s.rawData!).map((d, i) => ( + + )), + )} + + {/* Lines */} + {series.map((s, si) => { + if (s.data.length === 0) return null; + const sampled = subsample(s.data); + const path = sampled + .map( + (d, i) => + `${i === 0 ? 'M' : 'L'}${xScale(d.t).toFixed(2)},${yScale(d.value).toFixed(2)}`, + ) + .join(' '); + return ( + + ); + })} + + {/* X-axis */} + + {xTickVals.map((v, i) => { + const x = xScale(v); + const anchor = i === 0 ? 'start' : i === xTickVals.length - 1 ? 'end' : 'middle'; + return ( + + {fmtSeconds(v)} + + ); + })} + + time + + + {yAxisLabel && ( + + {yAxisLabel} + + )} + + {/* Legend — skip series flagged hideFromHover so per-engine + underlays don't clutter the chip row. */} + {(() => { + const visible = series.filter((s) => !s.hideFromHover); + const chipY = H - 8; + const chipW = innerW / Math.max(1, visible.length); + return visible.map((s, i) => { + const x = PAD.left + i * chipW; + return ( + + + + {s.name} + + + ); + }); + })()} + + ); +} + +/** Stacked-area chart for token-source share over time. */ +export function StackedAreaChart({ + sourceSeries, + durationS, + width = 720, + height = 260, +}: { + sourceSeries: Record; + durationS: number; + width?: number; + height?: number; +}) { + const W = width; + const H = height; + const PAD = { top: 12, right: 16, bottom: 56, left: 60 }; + + const computed = useMemo(() => { + const entries = Object.entries(sourceSeries).filter(([, v]) => v.length > 0); + if (entries.length === 0) return null; + + // Different sources can land on different scrape timestamps + // (SGLang's hits/misses fire on alternating ticks), so we MUST + // align across all sources before computing shares — otherwise the + // share calculation indexes into each source's own time axis and + // mixes values from different moments. + // + // Approach: union all timestamps across sources, then for each + // unique timestamp carry forward the cumulative sum for every + // source (a source that didn't report at time t holds its previous + // cumulative value rather than dropping to 0). + const tValues = [...new Set(entries.flatMap(([, arr]) => arr.map((p) => p.t)))].toSorted( + (a, b) => a - b, + ); + + // For each source, walk its (sorted) array and produce a parallel + // cumulative-sum array indexed against `tValues` via carry-forward. + const cum: Record = {}; + for (const [name, arr] of entries) { + const valByT = new Map(arr.map((p) => [p.t, p.value])); + const out: number[] = Array.from({ length: tValues.length }); + let acc = 0; + for (let i = 0; i < tValues.length; i++) { + const v = valByT.get(tValues[i]!); + if (v !== undefined) acc += v; + out[i] = acc; + } + cum[name] = out; + } + + const shares: Record = {}; + for (const name of Object.keys(cum)) shares[name] = []; + for (let i = 0; i < tValues.length; i++) { + const total = entries.reduce((s, [name]) => s + (cum[name]?.[i] ?? 0), 0); + for (const [name] of entries) { + shares[name]!.push(total > 0 ? (cum[name]?.[i] ?? 0) / total : 0); + } + } + return { tValues, shares }; + }, [sourceSeries]); + + const colors: Record = { + // vLLM source names + local_compute: '#f97316', + local_cache_hit: '#3b82f6', + external_kv_transfer: '#22c55e', + miss: '#f97316', + // SGLang source names (set by compute-chart-series for sglang rows) + 'cache hit (HBM)': '#3b82f6', + 'cache hit (CPU offload)': '#22c55e', + 'cache hit': '#3b82f6', + 'compute (miss)': '#f97316', + }; + const labelFor: Record = { + local_compute: 'Prefill', + local_cache_hit: 'HBM Cache Hit', + external_kv_transfer: 'Offload Cache Hit', + miss: 'Miss', + }; + // Fallback palette for any source name not in `colors` so we never + // emit two layers in the same shade. Cycles by insertion order. + const fallbackPalette = [ + '#3b82f6', + '#f97316', + '#22c55e', + '#a855f7', + '#ef4444', + '#06b6d4', + '#f59e0b', + '#ec4899', + ]; + let fallbackIdx = 0; + const colorFor = (name: string): string => { + if (colors[name]) return colors[name]!; + const c = fallbackPalette[fallbackIdx % fallbackPalette.length]!; + fallbackIdx++; + colors[name] = c; // memoize so the SAME unknown name always gets the same color + return c; + }; + + if (!computed) { + return ( +
No data
+ ); + } + const { tValues, shares } = computed; + + const innerW = W - PAD.left - PAD.right; + const innerH = H - PAD.top - PAD.bottom; + const xMax = Math.max(durationS, 1); + const xScale = (t: number) => PAD.left + (t / xMax) * innerW; + const yScale = (v: number) => PAD.top + (1 - v) * innerH; + + const stackOrder = Object.keys(shares); + const lower: number[] = Array.from({ length: tValues.length }, () => 0); + const layers = stackOrder.map((name) => { + const upper = shares[name]!.map((v, i) => lower[i]! + v); + const top = upper.map((v, i) => [xScale(tValues[i]!), yScale(v)] as [number, number]); + const bottom = lower.map((v, i) => [xScale(tValues[i]!), yScale(v)] as [number, number]); + const d = `${top + .map(([x, y], i) => `${i === 0 ? 'M' : 'L'}${x.toFixed(2)},${y.toFixed(2)}`) + .join(' ')} ${[...bottom] + .toReversed() + .map(([x, y]) => `L${x.toFixed(2)},${y.toFixed(2)}`) + .join(' ')} Z`; + const color = colorFor(name); + for (let i = 0; i < tValues.length; i++) lower[i] = upper[i]!; + return { name, color, d }; + }); + + const resolve = (fraction: number) => { + const t = fraction * xMax; + // Find the closest tValue index. + let idx = 0; + let bestDist = Infinity; + for (let i = 0; i < tValues.length; i++) { + const d = Math.abs(tValues[i]! - t); + if (d < bestDist) { + bestDist = d; + idx = i; + } + } + const items: HoverItem[] = stackOrder.map((name) => ({ + color: colorFor(name), + label: labelFor[name] ?? name, + value: `${((shares[name]?.[idx] ?? 0) * 100).toFixed(1)}%`, + })); + return { items, title: fmtSeconds(t) }; + }; + + const xTickVals = Array.from({ length: 5 }, (_, i) => (xMax * i) / 4); + const yTickVals = [0, 0.25, 0.5, 0.75, 1]; + + return ( + + {yTickVals.map((v, i) => { + const y = yScale(v); + return ( + + + + {(v * 100).toFixed(0)}% + + + ); + })} + {layers.map((l, i) => ( + + ))} + + {xTickVals.map((v, i) => { + const x = xScale(v); + const anchor = i === 0 ? 'start' : i === xTickVals.length - 1 ? 'end' : 'middle'; + return ( + + {fmtSeconds(v)} + + ); + })} + + time + + + % of prefill tokens + + {(() => { + const chipY = H - 8; + const chipW = innerW / Math.max(1, layers.length); + return layers.map((l, i) => { + const x = PAD.left + i * chipW; + return ( + + + + {labelFor[l.name] ?? l.name} + + + ); + }); + })()} + + ); +} diff --git a/packages/app/src/components/inference/hooks/useChartData.ts b/packages/app/src/components/inference/hooks/useChartData.ts index 13b22951..e76c3123 100644 --- a/packages/app/src/components/inference/hooks/useChartData.ts +++ b/packages/app/src/components/inference/hooks/useChartData.ts @@ -1,7 +1,7 @@ import { useMemo, useRef } from 'react'; import { useQueries } from '@tanstack/react-query'; -import { sequenceToIslOsl } from '@semianalysisai/inferencex-constants'; +import { rowToSequence } from '@semianalysisai/inferencex-constants'; import chartDefinitions from '@/components/inference/inference-chart-config.json'; import type { @@ -19,9 +19,91 @@ import { getModelSortIndex, hardwareKeyMatchesAnyBase, } from '@/lib/constants'; -import { transformBenchmarkRows } from '@/lib/benchmark-transform'; -import type { Model, Sequence } from '@/lib/data-mappings'; +import { + mergeRunScopedRows, + transformBenchmarkRows, + withPercentile, +} from '@/lib/benchmark-transform'; +import { Sequence, type Model } from '@/lib/data-mappings'; import { calculateCostsForGpus, calculatePowerForGpus } from '@/lib/utils'; +import { + paretoFrontLowerLeft, + paretoFrontLowerRight, + paretoFrontUpperLeft, + paretoFrontUpperRight, +} from '@/lib/chart-utils'; + +type XAxisMode = 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps'; + +/** + * Resolve the percentile-prefixed e2e-latency field name for the given + * sequence + percentile combo (e.g. 'median_e2el', 'p90_e2el'). + */ +function e2elFieldFor(percentile: string): string { + return withPercentile('median_e2el', percentile); +} + +/** + * Compute the set of benchmark_results.id values that sit on the + * (e2e_latency, y) Pareto frontier within each (hwKey, precision, date) + * group. Used to restrict the non-e2e xmode charts (ttft, interactivity, + * session-time, prefill-tps) so they show *only* the points that win on + * end-to-end latency — preventing benchmark-hacking where a config tops + * one axis while tanking the other. + * + * Returns null when the y-metric has no roofline direction declared on + * the e2e chart (caller falls back to no filtering in that case). + */ +function e2eParetoIds( + points: InferenceData[], + selectedYAxisMetric: string, + percentile: string, +): Set | null { + const e2eChartDef = (chartDefinitions as ChartDefinition[]).find((c) => c.chartType === 'e2e'); + if (!e2eChartDef) return null; + const dir = e2eChartDef[`${selectedYAxisMetric}_roofline` as keyof ChartDefinition] as + | 'upper_right' + | 'upper_left' + | 'lower_left' + | 'lower_right' + | undefined; + if (!dir) return null; + const frontierFn = + dir === 'upper_right' + ? paretoFrontUpperRight + : dir === 'upper_left' + ? paretoFrontUpperLeft + : dir === 'lower_left' + ? paretoFrontLowerLeft + : paretoFrontLowerRight; + const e2elField = e2elFieldFor(percentile); + const metricKey = selectedYAxisMetric.replace('y_', '') as YAxisMetricKey; + + // Re-frame each candidate point in (e2el, y) space, then compute the + // pareto per (hwKey, precision, date) bucket — frontiers don't span dates + // (a May 17 point can't dominate a May 15 plot). + const byGroup = new Map(); + for (const p of points) { + const yValue = (p[metricKey] as { y?: number } | undefined)?.y; + const xValue = (p as unknown as Record)[e2elField]; + if (typeof xValue !== 'number' || !Number.isFinite(xValue)) continue; + if (typeof yValue !== 'number' || !Number.isFinite(yValue)) continue; + const key = `${p.hwKey}|${p.precision}|${p.date}`; + let bucket = byGroup.get(key); + if (!bucket) { + bucket = []; + byGroup.set(key, bucket); + } + bucket.push({ ...p, x: xValue, y: yValue }); + } + const ids = new Set(); + for (const bucket of byGroup.values()) { + for (const f of frontierFn(bucket)) { + if (typeof f.id === 'number') ids.add(f.id); + } + } + return ids; +} /** Build deduplicated comparison dates, excluding the main run date. */ export function buildComparisonDates( @@ -83,8 +165,24 @@ export function useChartData( selectedRunDate?: string, enabled = true, latestAvailableDate?: string, + selectedPercentile = 'p90', /** When set, only series for these two registry GPU keys are shown (compare pages). */ compareGpuPair?: readonly [string, string] | null, + /** + * GitHub run id (g_runid) from the run picker. When set, the benchmarks API + * scopes results to that workflow run instead of returning the latest per + * config — disambiguates when two runs land on the same date. + */ + selectedRunId?: string, + /** + * Current x-axis mode. When set to anything other than 'e2e', the displayed + * data is filtered to the (e2e-latency, y) Pareto frontier so the ttft / + * interactivity / session-time / prefill-tps charts show only points that + * also win on end-to-end latency — preventing benchmark-hacking where a + * config tops one metric while tanking the other. The 'e2e' mode is the + * source of truth and keeps the full point set. + */ + selectedXAxisMode: XAxisMode = 'e2e', ) { // When the selected date is the latest available, use '' (empty string) to match // the initial no-date query key, reusing the eagerly-fetched benchmarks from the @@ -94,11 +192,35 @@ export function useChartData( ? '' : selectedRunDate; + // Two queries: the normal latest-per-config view (always), plus the + // run-scoped rows when a specific workflow run is selected. The merged + // result pins ONLY the configs the selected run produced to that run, and + // carries every other config forward from the base rows — selecting one of + // two same-day vLLM runs must not hide the day's SGLang curve just because + // it lives in a different workflow run. The base query is the default view + // query, so it's almost always already in the React Query cache. const { - data: allRows, - isLoading: queryLoading, - error: queryError, + data: baseRows, + isLoading: baseLoading, + error: baseError, } = useBenchmarks(selectedModel, queryDate, enabled); + const { + data: runRows, + isLoading: runLoading, + error: runError, + } = useBenchmarks(selectedModel, queryDate, enabled && Boolean(selectedRunId), selectedRunId); + + const allRows = useMemo(() => { + if (!selectedRunId) return baseRows; + // Wait for the run rows before rendering a scoped view — rendering base + // rows first would flash the un-scoped chart, then swap contested points. + if (!runRows) return undefined; + if (!baseRows) return runRows; + return mergeRunScopedRows(runRows, baseRows); + }, [selectedRunId, runRows, baseRows]); + + const queryLoading = baseLoading || (Boolean(selectedRunId) && runLoading); + const queryError = baseError ?? (selectedRunId ? runError : null); // GPU comparison: fetch data for each additional comparison date const comparisonDates = useMemo( @@ -125,11 +247,13 @@ export function useChartData( // Merge main rows with comparison date rows. // Stamp each row with the *requested* date (not the actual DB date) so that // GPUGraph's activeDates filter (keyed by user-selected date) matches the points. - const sequenceIslOsl = useMemo(() => sequenceToIslOsl(selectedSequence), [selectedSequence]); + // + // rowToSequence handles both fixed-seq (via isl/osl) and agentic (via + // benchmark_type), so one filter covers every scenario. const rows = useMemo(() => { - if (!allRows || !sequenceIslOsl) return []; - const seqFilter = (r: { isl: number; osl: number }) => - r.isl === sequenceIslOsl.isl && r.osl === sequenceIslOsl.osl; + if (!allRows) return []; + const seqFilter = (r: { isl: number | null; osl: number | null; benchmark_type: string }) => + rowToSequence(r) === selectedSequence; const seqFiltered = allRows.filter(seqFilter); // For each (hw, framework, spec_method, disagg, precision) group, keep only @@ -156,14 +280,14 @@ export function useChartData( .map((r) => ({ ...r, date: comparisonDates[i], actualDate: r.date })), ); return [...mainRows, ...extraRows]; - }, [allRows, sequenceIslOsl, comparisonDates, comparisonDataKey, selectedRunDate]); + }, [allRows, selectedSequence, comparisonDates, comparisonDataKey, selectedRunDate]); // Transform filtered rows into chart data const { chartData, hardwareConfig: rawHardwareConfig } = useMemo(() => { if (rows.length === 0) return { chartData: [] as InferenceData[][], hardwareConfig: {} as HardwareConfig }; - return transformBenchmarkRows(rows); - }, [rows]); + return transformBenchmarkRows(rows, selectedPercentile); + }, [rows, selectedPercentile]); // Sort hardware config — stabilize reference when keys haven't changed. // Different sequences for the same model often have the same GPU configs, @@ -198,8 +322,11 @@ export function useChartData( (chartDefinitions as ChartDefinition[]).map((chartDef) => { const metricKey = selectedYAxisMetric.replace('y_', '') as YAxisMetricKey; - // Determine dynamic x-axis - let xAxisField: keyof AggDataEntry = chartDef.x; + // Default x-axis = chart's natural latency metric, percentile-adjusted + // for the agentic case (median_e2el → p99_e2el etc.). For non-agentic + // scenarios `withPercentile` is a no-op when percentile === 'median'. + const naturalX = withPercentile(chartDef.x, selectedPercentile) as keyof AggDataEntry; + let xAxisField: keyof AggDataEntry = naturalX; let xAxisLabel = chartDef.x_label; const metricTitle = @@ -209,14 +336,25 @@ export function useChartData( // Resolve the effective x-axis override per chart type const effectiveXMetric = chartDef.chartType === 'e2e' ? selectedE2eXAxisMetric : selectedXAxisMetric; + // The TTFT override is now any *_ttft metric (not just p90_ttft) — the + // x-axis-mode picker reconciles the percentile prefix based on sequence + // kind (fixed-seq → median, agentic → user-picked percentile). const isTtftOverride = - effectiveXMetric === 'p99_ttft' || effectiveXMetric === 'median_ttft'; - const ttftLabel = - effectiveXMetric === 'p99_ttft' - ? 'P99 Time To First Token (s)' - : 'Median Time To First Token (s)'; - - if (effectiveXMetric && chartDef.chartType === 'interactivity' && isInputMetric) { + typeof effectiveXMetric === 'string' && effectiveXMetric.endsWith('_ttft'); + const ttftPctl = isTtftOverride + ? (effectiveXMetric as string).replace(/_ttft$/u, '') + : 'p90'; + const ttftPctlWord = ttftPctl === 'median' ? 'Median' : ttftPctl.toUpperCase(); + const ttftLabel = `${ttftPctlWord} Time To First Token (s)`; + + const isAgentic = selectedSequence === Sequence.AgenticTraces; + + if ( + effectiveXMetric && + chartDef.chartType === 'interactivity' && + isInputMetric && + !isAgentic + ) { xAxisField = effectiveXMetric as keyof AggDataEntry; const labelKey = `${selectedYAxisMetric}_x_label` as keyof ChartDefinition; if (effectiveXMetric === chartDef[`${selectedYAxisMetric}_x` as keyof ChartDefinition]) { @@ -225,6 +363,10 @@ export function useChartData( xAxisLabel = isTtftOverride ? ttftLabel : chartDef.x_label; } } else if (chartDef.chartType === 'interactivity' && isInputMetric) { + // Agentic falls through here too — the manual X-axis dropdown is + // hidden in agentic mode (would double up with the percentile + // selector), so the config default + percentile post-processing + // below drives the x axis. const xOverrideKey = `${selectedYAxisMetric}_x` as keyof ChartDefinition; const xLabelOverrideKey = `${selectedYAxisMetric}_x_label` as keyof ChartDefinition; xAxisField = (chartDef[xOverrideKey] as keyof AggDataEntry) || chartDef.x; @@ -234,12 +376,35 @@ export function useChartData( xAxisLabel = ttftLabel; } + // Agentic: rewrite the resolved x metric to the chosen percentile, + // and relabel accordingly. Both have to be updated unconditionally — + // xAxisField may already be percentile-adjusted (via naturalX) while + // xAxisLabel still carries the raw chartDef.x_label prefix. + // The chart heading ("vs. ") is also rewritten to include + // the percentile so the title above the plot reflects what's drawn. + const headingKey = `${selectedYAxisMetric}_heading` as keyof ChartDefinition; + let chartHeading = (chartDef[headingKey] as string) || chartDef.heading; + if (isAgentic) { + xAxisField = withPercentile( + xAxisField as string, + selectedPercentile, + ) as keyof AggDataEntry; + const pctlWord = selectedPercentile.toUpperCase(); + xAxisLabel = xAxisLabel.replace(/^(?:Median|Mean|P75|P90|P95|P99(?:\.9)?)\b/iu, pctlWord); + chartHeading = chartHeading.replace( + /^(?vs\.\s+)(?:(?:Median|Mean|P75|P90|P95|P99(?:\.9)?)\s+)?/iu, + `$1${pctlWord} `, + ); + } + // The x-axis is "flipped" only when the good-direction reverses // (e.g. interactivity → TTFT: "higher is better" → "lower is better"). // E2EL → TTFT keeps the same direction ("lower is better" for both), // so no roofline flip is needed for the e2e chart. + // Compare against `naturalX` (percentile-adjusted) — switching the + // percentile of the same logical metric is NOT a flip. const xAxisFlipped = - xAxisField !== chartDef.x && !(chartDef.chartType === 'e2e' && isTtftOverride); + xAxisField !== naturalX && !(chartDef.chartType === 'e2e' && isTtftOverride); const yLabelKey = `${selectedYAxisMetric}_label` as keyof ChartDefinition; const dynamicYLabel = chartDef[yLabelKey]; @@ -260,6 +425,7 @@ export function useChartData( chartDefinition: { ...chartDef, ...rooflineOverrides, + heading: chartHeading, x_label: xAxisLabel, y_label: dynamicYLabel === null ? undefined : String(dynamicYLabel), }, @@ -267,7 +433,13 @@ export function useChartData( xAxisField, }; }), - [selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric], + [ + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + selectedPercentile, + selectedSequence, + ], ); // Build renderable graphs (data processing + stable chart definitions) @@ -297,9 +469,30 @@ export function useChartData( filteredData = filterDataByCostLimit(filteredData, chartDefinition, selectedYAxisMetric); + // For AGENTIC workloads only: when the user is NOT viewing the + // e2e latency chart, mark each point with whether it sits on the + // (e2e_latency, y) Pareto frontier for its (hwKey, precision, + // date) group. The chart still renders every point as scatter — + // only e2e-Pareto winners feed the roofline (ScatterGraph honors + // the flag). Prevents benchmark-hacking the TTFT / interactivity + // line by tanking decode (or vice versa) without hiding the + // non-optimal configs from view. + // + // Fixed-seq workloads keep the existing per-axis Pareto since + // there's no separate "session-time" notion of total latency — + // their e2e IS the request latency, so a TTFT hack there reads + // honestly on e2e too. The anti-hack constraint is specifically + // about multi-turn agentic where TTFT measures a tiny fraction + // of the user-visible session time. + const isAgentic = selectedSequence === Sequence.AgenticTraces; + const e2eParetoSet = + isAgentic && selectedXAxisMode !== 'e2e' + ? e2eParetoIds(filteredData, selectedYAxisMetric, selectedPercentile) + : null; + // Filter to points that have the selected metric, then remap x/y const hasMetric = filteredData.some((d) => metricKey in d); - const isTtftX = xAxisField === 'p99_ttft' || xAxisField === 'median_ttft'; + const isTtftX = typeof xAxisField === 'string' && xAxisField.endsWith('_ttft'); const processedData = hasMetric ? filteredData .filter((d) => metricKey in d) @@ -312,18 +505,26 @@ export function useChartData( // d.x would otherwise mask the regression). const xCandidate = (d as Partial)[xAxisField]; const xValue = typeof xCandidate === 'number' ? xCandidate : d.x; + const isOnE2eFrontier = + e2eParetoSet === null + ? undefined + : typeof d.id === 'number' && e2eParetoSet.has(d.id); return { ...d, x: xValue, y: yValue, roof, + isOnE2eFrontier, }; }) - // When TTFT is on the x-axis, apply the latency limit to filter overload outliers - // (e.g. conc=2048 rows with TTFT > 60s that compress all real data to the far left) + // When TTFT is on the x-axis, apply the latency limit to filter + // overload outliers (fixed-seq conc=2048 rows with TTFT > 60s that + // compress all real data to the far left). Skip for agentic — long + // TTFTs there reflect real workloads (multi-turn, big prompts). .filter( (d) => !isTtftX || + isAgentic || !chartDefinition.y_latency_limit || d.x <= chartDefinition.y_latency_limit, ) @@ -348,6 +549,8 @@ export function useChartData( userPowers, stableChartDefinitions, compareGpuPair, + selectedXAxisMode, + selectedPercentile, ]); return { graphs, loading, error, hardwareConfig }; diff --git a/packages/app/src/components/inference/inference-chart-config.json b/packages/app/src/components/inference/inference-chart-config.json index d9a29181..9617638f 100644 --- a/packages/app/src/components/inference/inference-chart-config.json +++ b/packages/app/src/components/inference/inference-chart-config.json @@ -13,9 +13,9 @@ "y_inputTputPerGpu_label": "Input Token Throughput per GPU (tok/s/gpu)", "y_inputTputPerGpu_title": "Input Token Throughput per GPU", "y_inputTputPerGpu_roofline": "upper_left", - "y_inputTputPerGpu_x": "p99_ttft", - "y_inputTputPerGpu_x_label": "P99 Time To First Token (s)", - "y_inputTputPerGpu_heading": "vs. P99 Time To First Token", + "y_inputTputPerGpu_x": "p90_ttft", + "y_inputTputPerGpu_x_label": "P90 Time To First Token (s)", + "y_inputTputPerGpu_heading": "vs. P90 Time To First Token", "y_outputTputPerGpu": "outputTputPerGpu.y", "y_outputTputPerGpu_label": "Output Token Throughput per GPU (tok/s/gpu)", "y_outputTputPerGpu_title": "Output Token Throughput per GPU", @@ -126,8 +126,8 @@ "y_inputTputPerGpu_label": "Input Token Throughput per GPU (tok/s/gpu)", "y_inputTputPerGpu_title": "Input Token Throughput per GPU", "y_inputTputPerGpu_roofline": "upper_right", - "y_inputTputPerGpu_x": "p99_ttft", - "y_inputTputPerGpu_x_label": "P99 Time To First Token (s)", + "y_inputTputPerGpu_x": "p90_ttft", + "y_inputTputPerGpu_x_label": "P90 Time To First Token (s)", "y_outputTputPerGpu": "outputTputPerGpu.y", "y_outputTputPerGpu_label": "Output Token Throughput per GPU (tok/s/gpu)", "y_outputTputPerGpu_title": "Output Token Throughput per GPU", diff --git a/packages/app/src/components/inference/replay/buildReplayTimeline.ts b/packages/app/src/components/inference/replay/buildReplayTimeline.ts index be076418..b0eb1446 100644 --- a/packages/app/src/components/inference/replay/buildReplayTimeline.ts +++ b/packages/app/src/components/inference/replay/buildReplayTimeline.ts @@ -82,8 +82,7 @@ function resolveXAxisField( const metricTitle = (chartDef[`${selectedYAxisMetric}_title` as keyof ChartDefinition] as string) || ''; const isInputMetric = metricTitle.toLowerCase().includes('input'); - const isTtftOverride = - selectedXAxisMetric === 'p99_ttft' || selectedXAxisMetric === 'median_ttft'; + const isTtftOverride = selectedXAxisMetric === 'p90_ttft'; if (selectedXAxisMetric && chartDef.chartType === 'interactivity' && isInputMetric) { return selectedXAxisMetric; diff --git a/packages/app/src/components/inference/replay/exportMp4.ts b/packages/app/src/components/inference/replay/exportMp4.ts index 676c30ff..af0e55a9 100644 --- a/packages/app/src/components/inference/replay/exportMp4.ts +++ b/packages/app/src/components/inference/replay/exportMp4.ts @@ -59,7 +59,7 @@ interface ExportOptions { signal?: AbortSignal; } -const CSS_VAR_RE = /var\(--([^)]+)\)/u; +const CSS_VAR_RE = /var\(--(?[^)]+)\)/u; const WATERMARK_HEIGHT = 48; const WATERMARK_TEXT = 'InferenceX — github.com/SemiAnalysisAI/InferenceX'; diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index cbf64787..50728a06 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -80,6 +80,8 @@ export interface WorkerPower { * @property {number} p99_e2el - 99th percentile of End-to-End Latency. */ export interface AggDataEntry { + /** Stable per-point id from benchmark_results — for trace_replay lookups. */ + id?: number; hw: string; mtp?: string; hwKey: string; @@ -94,23 +96,43 @@ export interface AggDataEntry { mean_ttft: number; median_ttft: number; std_ttft: number; + p75_ttft: number; + p90_ttft: number; + p95_ttft: number; p99_ttft: number; + 'p99.9_ttft': number; mean_tpot: number; mean_intvty: number; median_tpot: number; median_intvty: number; std_tpot: number; std_intvty: number; + p75_tpot: number; + p75_intvty: number; + p90_tpot: number; + p90_intvty: number; + p95_tpot: number; + p95_intvty: number; p99_tpot: number; p99_intvty: number; + 'p99.9_tpot': number; + 'p99.9_intvty': number; mean_itl: number; median_itl: number; std_itl: number; + p75_itl: number; + p90_itl: number; + p95_itl: number; p99_itl: number; + 'p99.9_itl': number; mean_e2el: number; median_e2el: number; std_e2el: number; + p75_e2el: number; + p90_e2el: number; + p95_e2el: number; p99_e2el: number; + 'p99.9_e2el': number; // Measured GPU telemetry (emitted by runner's aggregate_power.py). // Optional because historical runs predate the fields. avg_power_w?: number; @@ -162,6 +184,29 @@ export interface AggDataEntry { actualDate?: string; /** URL to the GitHub Actions workflow run that produced this data point. */ run_url?: string; + /** Benchmark scenario: `single_turn` (fixed-seq isl/osl) or `agentic_traces`. */ + benchmark_type?: string; + /** ISL in tokens — null for agentic_traces. */ + isl?: number | null; + /** OSL in tokens — null for agentic_traces. */ + osl?: number | null; + // ── Agentic-only fields (populated from metrics JSONB for `agentic_traces` rows) ── + /** "on" | "off" — whether KV cache offload to CPU was enabled. */ + offload_mode?: string; + /** Actual server-observed GPU prefix-cache hit rate (0..1). */ + server_gpu_cache_hit_rate?: number; + /** Actual server-observed CPU prefix-cache hit rate (0..1). */ + server_cpu_cache_hit_rate?: number; + /** Infinite-cache theoretical hit rate (0..1) computed from trace. */ + theoretical_cache_hit_rate?: number; + /** Total requests attempted during the window. */ + num_requests_total?: number; + /** Requests that completed successfully. */ + num_requests_successful?: number; + /** Total prompt tokens served. */ + total_prompt_tokens?: number; + /** Total generated (output) tokens. */ + total_generation_tokens?: number; } /** @@ -187,6 +232,17 @@ export interface InferenceData extends Partial void; + /** Latency percentile for the x-axis under agentic scenarios (median/p90/p99/p99.9). */ + selectedPercentile: string; + setSelectedPercentile: (p: string) => void; selectedXAxisMetric: string | null; setSelectedXAxisMetric: (metric: string | null) => void; selectedE2eXAxisMetric: string | null; setSelectedE2eXAxisMetric: (metric: string | null) => void; + /** + * Which chart variant the user wants to see — the inference card shows one chart + * at a time, picked by the big buttons above the chart. + * - 'ttft' → e2e chartType with x-axis forced to p90_ttft + * - 'e2e' → e2e chartType with the chart-config default x-axis (median_e2el / p90_e2el) + * - 'interactivity' → interactivity chartType (x = median_intvty / p90_intvty) + * - 'session-time' → agentic-only; x = mean-normalized session time (live-computed from trace blobs) + * - 'prefill-tps' → agentic-only; x = mean of P90 prefill TPS/user per session + */ + selectedXAxisMode: 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps'; + setSelectedXAxisMode: ( + mode: 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps', + ) => void; scaleType: 'auto' | 'linear' | 'log'; setScaleType: (type: 'auto' | 'linear' | 'log') => void; setIsLegendExpanded: (metric: boolean) => void; diff --git a/packages/app/src/components/inference/ui/ChartControls.tsx b/packages/app/src/components/inference/ui/ChartControls.tsx index 27274f02..c969f60e 100644 --- a/packages/app/src/components/inference/ui/ChartControls.tsx +++ b/packages/app/src/components/inference/ui/ChartControls.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useMemo, useState } from 'react'; +import { useEffect, useMemo, useState } from 'react'; import { track } from '@/lib/analytics'; import { useFeatureGate } from '@/lib/use-feature-gate'; @@ -8,7 +8,8 @@ import { useFeatureGate } from '@/lib/use-feature-gate'; import { useInference } from '@/components/inference/InferenceContext'; import { ModelSelector, - SequenceSelector, + ScenarioSelector, + PercentileSelector, PrecisionSelector, } from '@/components/ui/chart-selectors'; import { DateRangePicker } from '@/components/ui/date-range-picker'; @@ -25,7 +26,7 @@ import { SearchableSelect } from '@/components/ui/searchable-select'; import { TooltipProvider } from '@/components/ui/tooltip'; import chartDefinitions from '@/components/inference/inference-chart-config.json'; import type { ChartDefinition } from '@/components/inference/types'; -import type { Model, Sequence } from '@/lib/data-mappings'; +import { Sequence, type Model, type Percentile } from '@/lib/data-mappings'; /** * Y-axis metric options from static chart config JSON — available immediately, no API wait. @@ -87,6 +88,13 @@ interface ChartControlsProps { } export default function ChartControls({ hideGpuComparison = false }: ChartControlsProps) { + // The percentile selector is rendered conditionally on `selectedSequence`, + // which on the client is hydrated from URL params. SSR doesn't see the URL, + // so deferring the conditional until after mount keeps the initial DOM + // identical between server and client (avoids hydration warnings). + const [mounted, setMounted] = useState(false); + useEffect(() => setMounted(true), []); + const [openDropdown, setOpenDropdown] = useState(null); const handleDropdownOpenChange = (dropdownKey: string) => (open: boolean) => { if (open) { @@ -95,6 +103,7 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro } setOpenDropdown((current) => (current === dropdownKey ? null : current)); }; + const { selectedModel, setSelectedModel, @@ -104,6 +113,8 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro setSelectedPrecisions, selectedYAxisMetric, setSelectedYAxisMetric, + selectedPercentile, + setSelectedPercentile, graphs, selectedGPUs, setSelectedGPUs, @@ -246,14 +257,21 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro availableModels={availableModels} data-testid="model-selector" /> - + {mounted && selectedSequence === Sequence.AgenticTraces && ( + setSelectedPercentile(p)} + data-testid="percentile-selector" + /> + )} {graphs.some((g) => g.chartDefinition?.chartType === 'interactivity') && - isInputMetric && ( + isInputMetric && + selectedSequence !== Sequence.AgenticTraces && (
diff --git a/packages/app/src/components/inference/ui/ChartDisplay.tsx b/packages/app/src/components/inference/ui/ChartDisplay.tsx index 32590464..26bcb0eb 100644 --- a/packages/app/src/components/inference/ui/ChartDisplay.tsx +++ b/packages/app/src/components/inference/ui/ChartDisplay.tsx @@ -1,8 +1,8 @@ 'use client'; import { track } from '@/lib/analytics'; import dynamic from 'next/dynamic'; -import { useMemo, useRef, useState } from 'react'; -import { BarChart3, ChevronDown, Table2, X } from 'lucide-react'; +import { useEffect, useMemo, useRef, useState } from 'react'; +import { BarChart3, Table2, X } from 'lucide-react'; import chartDefinitions from '@/components/inference/inference-chart-config.json'; import { useInference } from '@/components/inference/InferenceContext'; @@ -30,7 +30,6 @@ import { DialogHeader, DialogTitle, } from '@/components/ui/dialog'; -import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover'; import { Skeleton } from '@/components/ui/skeleton'; import { useUnofficialRun } from '@/components/unofficial-run-provider'; import { @@ -40,8 +39,10 @@ import { getModelLabel, getPrecisionLabel, getSequenceLabel, + sequenceKind, } from '@/lib/data-mappings'; import { useComparisonChangelogs } from '@/hooks/api/use-comparison-changelogs'; +import { useDerivedAgenticMetrics } from '@/hooks/api/use-derived-agentic-metrics'; import { useTrendData } from '@/components/inference/hooks/useTrendData'; import { hardwareKeyMatchesAnyBase } from '@/lib/constants'; @@ -59,56 +60,30 @@ const ModelArchitectureDiagram = dynamic(() => import('./ModelArchitectureDiagra }); import WorkflowInfoDisplay from './WorkflowInfoDisplay'; -/** Controlled popover dropdown for the e2e chart x-axis toggle. */ -function E2eXAxisDropdown({ - xAxisLabel, - xAxisOptions, - selectedValue, - onSelect, -}: { - xAxisLabel: string; - xAxisOptions: { value: string | null; label: string }[]; - selectedValue: string | null; - onSelect: (value: string | null) => void; -}) { - const [open, setOpen] = useState(false); - return ( - - - - - - {xAxisOptions.map((opt) => ( - - ))} - - - ); -} - type InferenceViewMode = 'chart' | 'table'; +/** + * The chart variants the user can choose with the big buttons above the chart + * card. The first three map to entries in `inference-chart-config.json` plus a + * forced x-axis override for the E2E chartType; the last two are agentic-only + * derived metrics computed live from the stored trace_replay blobs. + */ +type XAxisMode = 'ttft' | 'e2e' | 'interactivity' | 'session-time' | 'prefill-tps'; + +interface XAxisModeButton { + value: XAxisMode; + label: string; + /** When true, the button is only shown on agentic scenarios. */ + agenticOnly?: boolean; +} +const X_AXIS_MODE_BUTTONS: XAxisModeButton[] = [ + { value: 'ttft', label: 'TTFT' }, + { value: 'e2e', label: 'E2E Latency' }, + { value: 'interactivity', label: 'Interactivity' }, + { value: 'session-time', label: 'Session Time', agenticOnly: true }, + { value: 'prefill-tps', label: 'Prefill TPS / user', agenticOnly: true }, +]; + const VIEW_MODE_OPTIONS: SegmentedToggleOption[] = [ { value: 'chart', @@ -153,8 +128,10 @@ export default function ChartDisplay() { logScale, activeHwTypes, activeDates, - setSelectedE2eXAxisMetric, + selectedPercentile, compareGpuPair, + selectedXAxisMode, + setSelectedXAxisMode, } = useInference(); const { @@ -163,6 +140,13 @@ export default function ChartDisplay() { totalDatesQueried, } = useComparisonChangelogs(selectedGPUs, selectedDateRange, dateRangeAvailableDates); + // SSR has no URL access and `selectedSequence` defaults to agentic on the + // server even when the URL says fixed-seq — so any conditional rendering + // that keys off `sequenceKind(selectedSequence)` would diverge between + // server and client first render. Defer agentic-only UI until after mount. + const [mounted, setMounted] = useState(false); + useEffect(() => setMounted(true), []); + const [viewModes, setViewModes] = useState>({}); const replayHandlesRef = useRef>({}); const getViewMode = (index: number): InferenceViewMode => viewModes[index] ?? 'chart'; @@ -192,7 +176,7 @@ export default function ChartDisplay() { const info = unofficialRunInfos[runIndexByUrl[url]]; return info ? { branch: info.branch, url: info.url } : undefined; } - const idMatch = url.match(/\/runs\/(\d+)/u); + const idMatch = url.match(/\/runs\/(?\d+)/u); if (idMatch && idMatch[1] in runIndexByUrl) { const info = unofficialRunInfos[runIndexByUrl[idMatch[1]]]; return info ? { branch: info.branch, url: info.url } : undefined; @@ -212,6 +196,7 @@ export default function ChartDisplay() { chartType, selectedYAxisMetric, effectiveXMetric, + { isAgentic: sequenceKind(selectedSequence) === 'agentic' }, ); let overlayPoints = processed; @@ -329,214 +314,258 @@ export default function ChartDisplay() { })); }, [graphs, overlayDataByChartType, selectedModel, selectedSequence]); - const displayGraphs = isFirstLoad - ? Array.from({ length: 2 }).map((_, index) => ( - - - - - - )) - : effectiveGraphs.length === 0 - ? [] - : effectiveGraphs.map((graph, graphIndex) => { - const isTimelineMode = Boolean( - selectedDateRange.startDate && selectedDateRange.endDate && selectedGPUs.length > 0, - ); - const replayAvailable = getViewMode(graphIndex) === 'chart' && !isTimelineMode; - return ( -
-
- handleViewModeChange(graphIndex, v)} - ariaLabel="View mode" - testId={`inference-view-toggle-${graphIndex}`} - /> - } - hideImageExport={getViewMode(graphIndex) === 'table'} - setIsLegendExpanded={setIsLegendExpanded} - exportFileName={`InferenceX_${selectedModel}_${graph.chartDefinition.chartType}`} - onExportMp4={ - replayAvailable ? () => replayHandlesRef.current[graphIndex]?.open() : undefined - } - onExportCsv={() => { - const visibleData = graph.data.filter((d) => + // Show one chart at a time, picked by the buttons above the chart. + // - 'interactivity' renders the interactivity chartType. + // - 'ttft' / 'e2e' render the e2e chartType (x swap via selectedE2eXAxisMetric). + // - 'session-time' / 'prefill-tps' render the e2e chartType too; the x-axis + // is overridden below from live-computed derived metrics. + const visibleGraphs = useMemo(() => { + const wantedType = selectedXAxisMode === 'interactivity' ? 'interactivity' : 'e2e'; + const filtered = effectiveGraphs.filter((g) => g.chartDefinition.chartType === wantedType); + return filtered.length > 0 ? filtered : effectiveGraphs; + }, [effectiveGraphs, selectedXAxisMode]); + + // Derived-metric path: fetch live-computed values from the trace_replay blobs + // and override scatter data.x. Only fires for the two agentic-only modes. + const useDerived = + sequenceKind(selectedSequence) === 'agentic' && + (selectedXAxisMode === 'session-time' || selectedXAxisMode === 'prefill-tps'); + const derivedTargetIds = useMemo(() => { + if (!useDerived) return [] as number[]; + const ids = new Set(); + for (const g of visibleGraphs) { + for (const d of g.data) { + if (d.benchmark_type === 'agentic_traces' && typeof d.id === 'number') { + ids.add(d.id); + } + } + } + return [...ids]; + }, [useDerived, visibleGraphs]); + const derivedQuery = useDerivedAgenticMetrics(derivedTargetIds, useDerived); + const derivedMetrics = derivedQuery.data; + // Show skeleton (not "No data available") while the derived-metrics query + // is in flight. Without this gate, every flip to session-time / prefill-tps + // briefly blanks the chart and surfaces a misleading empty-state. + const isDerivedLoading = + useDerived && + derivedTargetIds.length > 0 && + (derivedQuery.isPending || derivedQuery.isFetching) && + !derivedMetrics; + + const renderableGraphs = useMemo(() => { + if (!useDerived) return visibleGraphs; + if (!derivedMetrics) return visibleGraphs.map((g) => ({ ...g, data: [] })); + const isSession = selectedXAxisMode === 'session-time'; + const xLabel = isSession + ? 'Mean Normalized Session Time (min)' + : 'P90 Prefill TPS per user (tok/s)'; + // Roofline corner = which corner the curve sweeps from / toward, matching + // existing chart-config convention: + // - session-time: as concurrency rises, session time AND throughput both + // grow → curve goes bottom-left → top-right → upper_right. + // - prefill-tps: as concurrency rises, per-user prefill TPS falls while + // total throughput rises → curve goes top-left → bottom-right → + // upper_left. + const rooflineCorner = isSession ? 'upper_right' : 'upper_left'; + return visibleGraphs.map((g) => { + const overriddenChartDef = { + ...g.chartDefinition, + x_label: xLabel, + // y_latency_limit was meant to suppress fixed-seq overload outliers on + // the TTFT axis — irrelevant for these derived axes. + y_latency_limit: undefined, + [`${selectedYAxisMetric}_roofline` as keyof typeof g.chartDefinition]: rooflineCorner, + }; + const data = g.data + .map((d) => { + if (typeof d.id !== 'number') return null; + const m = derivedMetrics[d.id]; + const raw = isSession ? m?.normalized_session_time_s : m?.p90_prefill_tps_per_user; + if (raw === null || raw === undefined || !Number.isFinite(raw)) return null; + const v = isSession ? raw / 60 : raw; + return { ...d, x: v }; + }) + .filter((d): d is NonNullable => d !== null); + return { ...g, chartDefinition: overriddenChartDef, data }; + }); + }, [useDerived, visibleGraphs, derivedMetrics, selectedXAxisMode, selectedYAxisMetric]); + + const displayGraphs = + isFirstLoad || isDerivedLoading + ? [ + + + + + , + ] + : renderableGraphs.length === 0 + ? [] + : renderableGraphs.map((graph, graphIndex) => { + const isTimelineMode = Boolean( + selectedDateRange.startDate && selectedDateRange.endDate && selectedGPUs.length > 0, + ); + const replayAvailable = getViewMode(graphIndex) === 'chart' && !isTimelineMode; + return ( +
+
+ - - {(() => { - const chartCaption = ( - <> -

- { - graph.chartDefinition[ - `${selectedYAxisMetric}_title` as keyof typeof graph.chartDefinition - ] - }{' '} - {(() => { - // For Input metrics with dynamic x-axis, use dynamic heading - const metricTitle = - (graph.chartDefinition[ + ? 'gpu_timeseries' + : graph.chartDefinition.chartType === 'e2e' + ? 'latency' + : 'interactivity' + } + leadingControls={ + handleViewModeChange(graphIndex, v)} + ariaLabel="View mode" + testId={`inference-view-toggle-${graphIndex}`} + /> + } + hideImageExport={getViewMode(graphIndex) === 'table'} + setIsLegendExpanded={setIsLegendExpanded} + exportFileName={`InferenceX_${selectedModel}_${graph.chartDefinition.chartType}`} + onExportMp4={ + replayAvailable + ? () => replayHandlesRef.current[graphIndex]?.open() + : undefined + } + onExportCsv={() => { + const visibleData = graph.data.filter((d) => + isTimelineMode + ? activeDates.has(`${d.date}_${d.hwKey}`) + : activeHwTypes.has(d.hwKey as string) && + selectedPrecisions.includes(d.precision), + ); + const { headers, rows } = inferenceChartToCsv( + visibleData, + graph.model, + graph.sequence, + ); + exportToCsv( + `InferenceX_${selectedModel}_${graph.chartDefinition.chartType}`, + headers, + rows, + ); + }} + /> + + {(() => { + const chartCaption = ( + <> +

+ { + graph.chartDefinition[ `${selectedYAxisMetric}_title` as keyof typeof graph.chartDefinition - ] as string) || ''; - const isInputMetric = metricTitle.toLowerCase().includes('input'); - if ( - graph.chartDefinition.chartType === 'interactivity' && - isInputMetric && - selectedXAxisMetric - ) { - if (selectedXAxisMetric === 'p99_ttft') { - return 'vs. P99 Time To First Token'; - } else if (selectedXAxisMetric === 'median_ttft') { - return 'vs. Median Time To First Token'; + ] + }{' '} + {(() => { + // For Input metrics with dynamic x-axis, use dynamic heading + const metricTitle = + (graph.chartDefinition[ + `${selectedYAxisMetric}_title` as keyof typeof graph.chartDefinition + ] as string) || ''; + const isInputMetric = metricTitle.toLowerCase().includes('input'); + if ( + graph.chartDefinition.chartType === 'interactivity' && + isInputMetric && + selectedXAxisMetric === 'p90_ttft' + ) { + return 'vs. P90 Time To First Token'; } - } - // For e2e chart: render clickable inline dropdown for x-axis - if (graph.chartDefinition.chartType === 'e2e') { - const xAxisLabel = - selectedE2eXAxisMetric === 'p99_ttft' - ? 'P99 TTFT' - : selectedE2eXAxisMetric === 'median_ttft' - ? 'Median TTFT' - : 'End-to-end Latency'; - const xAxisOptions = [ - { value: null, label: 'End-to-end Latency' }, - { value: 'p99_ttft', label: 'P99 TTFT' }, - { value: 'median_ttft', label: 'Median TTFT' }, - ]; - const zoomPrefix = - selectedDateRange.startDate && - selectedDateRange.endDate && - selectedGPUs.length > 0 - ? 'gpu_timeseries' - : 'latency'; - return ( - { - setSelectedE2eXAxisMetric(value); - track('latency_x_axis_metric_selected', { - metric: value ?? 'median_e2el', - }); - window.dispatchEvent( - new CustomEvent( - `${zoomPrefix}_zoom_reset_chart-${graphIndex}`, - ), - ); - }} - /> - ); - } + // For e2e chart: heading is driven by the buttons above the + // card. Derived-metric modes win first; otherwise the metric + // carries the percentile prefix (e.g. p90_ttft, median_ttft). + if (graph.chartDefinition.chartType === 'e2e') { + if (selectedXAxisMode === 'session-time') { + return 'vs. Mean Normalized Session Time'; + } + if (selectedXAxisMode === 'prefill-tps') { + return 'vs. P90 Prefill TPS / user'; + } + const isAgentic = sequenceKind(selectedSequence) === 'agentic'; + if (selectedE2eXAxisMetric?.endsWith('_ttft')) { + const pctl = selectedE2eXAxisMetric.replace(/_ttft$/u, ''); + const word = pctl === 'median' ? 'Median' : pctl.toUpperCase(); + return `vs. ${word} Time To First Token`; + } + const pctlWord = selectedPercentile.toUpperCase(); + return isAgentic + ? `vs. ${pctlWord} End-to-end Latency` + : 'vs. End-to-end Latency'; + } - // Fall back to configured heading - return ( - graph.chartDefinition[ - `${selectedYAxisMetric}_heading` as keyof typeof graph.chartDefinition - ] || graph.chartDefinition.heading - ); - })()} -

-

- {getModelLabel(graph.model as Model)} •{' '} - {selectedPrecisions - .map((prec) => getPrecisionLabel(prec as Precision)) - .join(', ')}{' '} - • {getSequenceLabel(graph.sequence as Sequence)} •{' '} - {isUnofficialRun - ? 'Source: UNOFFICIAL' - : 'Source: SemiAnalysis InferenceX™'} - {selectedRunDate && ( - <> - {' '} - • Updated:{' '} - {new Date(`${selectedRunDate}T00:00:00Z`).toLocaleDateString( - 'en-US', - { - year: 'numeric', - month: '2-digit', - day: '2-digit', - timeZone: 'UTC', - }, - )} - - )} -

- - - - ); - - if (getViewMode(graphIndex) === 'table') { - const overlay = - graph.chartDefinition.chartType === 'e2e' - ? overlayDataByChartType.e2e - : overlayDataByChartType.interactivity; - const overlayRows = (overlay?.data ?? []).filter((p) => - selectedPrecisions.includes(p.precision), - ); - return ( - <> - {chartCaption} - 0 ? [...graph.data, ...overlayRows] : graph.data - } - chartDefinition={graph.chartDefinition} - selectedYAxisMetric={selectedYAxisMetric} - /> + // Fall back to the heading baked into chartDefinition + // by useChartData (already resolves per-metric overrides + // and applies the agentic percentile rewrite). + return graph.chartDefinition.heading; + })()} +

+

+ {getModelLabel(graph.model as Model)} •{' '} + {selectedPrecisions + .map((prec) => getPrecisionLabel(prec as Precision)) + .join(', ')}{' '} + • {getSequenceLabel(graph.sequence as Sequence)} •{' '} + {isUnofficialRun + ? 'Source: UNOFFICIAL' + : 'Source: SemiAnalysis InferenceX™'} + {selectedRunDate && ( + <> + {' '} + • Updated:{' '} + {new Date(`${selectedRunDate}T00:00:00Z`).toLocaleDateString( + 'en-US', + { + year: 'numeric', + month: '2-digit', + day: '2-digit', + timeZone: 'UTC', + }, + )} + + )} +

+ + ); - } - return selectedDateRange.startDate && - selectedDateRange.endDate && - selectedGPUs.length > 0 ? ( - - ) : ( -
- + selectedPrecisions.includes(p.precision), + ); + return ( + <> + {chartCaption} + 0 + ? [...graph.data, ...overlayRows] + : graph.data + } + chartDefinition={graph.chartDefinition} + selectedYAxisMetric={selectedYAxisMetric} + /> + + ); + } + + return selectedDateRange.startDate && + selectedDateRange.endDate && + selectedGPUs.length > 0 ? ( + - {selectedGPUs.length > 0 && - (!selectedDateRange.startDate || !selectedDateRange.endDate) && ( -
-

- Select a date range to view GPU comparison -

-
- )} -
- ); - })()} - {replayAvailable && ( - { - replayHandlesRef.current[graphIndex] = handle; - }} - parentChartId={`chart-${graphIndex}`} - chartDefinition={graph.chartDefinition} - yLabel={`${ - graph.chartDefinition[ - `${selectedYAxisMetric}_label` as keyof typeof graph.chartDefinition - ] - }`} - xLabel={graph.chartDefinition.x_label} - /> - )} -
-
-
- ); - }); + ) : ( +
+ + {selectedGPUs.length > 0 && + (!selectedDateRange.startDate || !selectedDateRange.endDate) && ( +
+

+ Select a date range to view GPU comparison +

+
+ )} +
+ ); + })()} + {replayAvailable && ( + { + replayHandlesRef.current[graphIndex] = handle; + }} + parentChartId={`chart-${graphIndex}`} + chartDefinition={graph.chartDefinition} + yLabel={`${ + graph.chartDefinition[ + `${selectedYAxisMetric}_label` as keyof typeof graph.chartDefinition + ] + }`} + xLabel={graph.chartDefinition.x_label} + /> + )} + +
+
+ ); + }); return (
@@ -642,6 +686,43 @@ export default function ChartDisplay() { )} +
+ {X_AXIS_MODE_BUTTONS.filter(({ agenticOnly }) => { + if (!agenticOnly) return true; + // Before client mount, conditionalize on the server-default kind + // (agentic) so SSR + first client render produce identical DOM. After + // mount, hide the agentic-only buttons on fixed-seq sequences. + if (!mounted) return true; + return sequenceKind(selectedSequence) === 'agentic'; + }).map(({ value, label }) => { + const isActive = selectedXAxisMode === value; + return ( + + ); + })} +
{displayGraphs}
{/* Performance Over Time — Modal Drill-Down */} diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index 56e0088e..c85ec9c0 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -6,11 +6,18 @@ import React, { useCallback, useEffect, useMemo, useRef } from 'react'; import { GRADIENT_NUDGE_EVENT } from '@/lib/nudges/registry'; import { useInference } from '@/components/inference/InferenceContext'; +import { useTraceAvailability } from '@/hooks/api/use-trace-availability'; +import { useRouter } from 'next/navigation'; import ChartLegend from '@/components/ui/chart-legend'; import { useUnofficialRun } from '@/components/unofficial-run-provider'; import { computeToggle } from '@/hooks/useTogglableSet'; import { getHardwareConfig, getModelSortIndex } from '@/lib/constants'; -import { getChartWatermark, getPrecisionLabel, type Precision } from '@/lib/data-mappings'; +import { + getChartWatermark, + getPrecisionLabel, + type Precision, + Sequence, +} from '@/lib/data-mappings'; import { formatNumber, getDisplayLabel, updateRepoUrl } from '@/lib/utils'; import { D3Chart } from '@/lib/d3-chart/D3Chart'; import type { @@ -63,6 +70,96 @@ import { buildGradientColorMap, } from '@/components/inference/utils/paretoLabels'; +// Greedy label-collision avoidance. +// Each candidate is the y-position of the FIRST baseline (relative to point +// center) which we apply via the first tspan's `dy` — later tspans cascade +// down by 1.1em. We try above/below at primary and secondary offsets, and +// hide the label if all four positions collide. +function avoidLabelCollisions( + zoomGroup: d3.Selection, +): void { + interface LabelInfo { + el: SVGTextElement; + firstTspan: SVGTSpanElement; + cx: number; + cy: number; + w: number; + nLines: number; + defaultFirstY: number; + } + const labels: LabelInfo[] = []; + const ASCENT = 9; + const DESCENT = 3; + const LINE_H = 11; + + zoomGroup.selectAll('.dot-group').each(function () { + const labelEl = this.querySelector('.point-label'); + if (!labelEl) return; + if ((this as SVGGElement).style.opacity === '0') return; + const tspans = labelEl.querySelectorAll('tspan'); + if (tspans.length === 0) return; + const transform = (this as SVGGElement).getAttribute('transform') ?? ''; + const m = transform.match(/translate\((?[^,]+),(?[^)]+)\)/u); + if (!m) return; + const cx = parseFloat(m[1]); + const cy = parseFloat(m[2]); + const nLines = tspans.length; + const defaultFirstY = -(8 + (nLines - 1) * LINE_H); // last baseline 8px above point + // Reset to default before measuring so prior positioning doesn't bias bbox + tspans[0].setAttribute('dy', `${defaultFirstY}px`); + labelEl.style.opacity = '1'; + const bbox = labelEl.getBBox(); + labels.push({ + el: labelEl, + firstTspan: tspans[0], + cx, + cy, + w: bbox.width, + nLines, + defaultFirstY, + }); + }); + + labels.sort((a, b) => a.cx - b.cx); + const placed: { left: number; right: number; top: number; bottom: number }[] = []; + const pad = 2; + + for (const lab of labels) { + const blockH = (lab.nLines - 1) * LINE_H + ASCENT + DESCENT; + const aboveFirstY = lab.defaultFirstY; + const belowFirstY = 14; // first baseline 14px below point center + const candidates = [ + aboveFirstY, + belowFirstY, + aboveFirstY - blockH - 2, + belowFirstY + blockH + 2, + ]; + let chosenY: number | null = null; + let chosenBox: { left: number; right: number; top: number; bottom: number } | null = null; + for (const firstY of candidates) { + const top = lab.cy + firstY - ASCENT - pad; + const bottom = lab.cy + firstY + (lab.nLines - 1) * LINE_H + DESCENT + pad; + const left = lab.cx - lab.w / 2 - pad; + const right = lab.cx + lab.w / 2 + pad; + const collides = placed.some( + (p) => !(right < p.left || left > p.right || bottom < p.top || top > p.bottom), + ); + if (!collides) { + chosenY = firstY; + chosenBox = { left, right, top, bottom }; + break; + } + } + if (chosenY !== null && chosenBox) { + lab.firstTspan.setAttribute('dy', `${chosenY}px`); + lab.el.style.opacity = '1'; + placed.push(chosenBox); + } else { + lab.el.style.opacity = '0'; + } + } +} + // X-shape path for overlay (unofficial) data points const X_SIZE = 5; const X_HOVER_SIZE = 7; @@ -158,6 +255,8 @@ const ScatterGraph = React.memo( trackedConfigs, addTrackedConfig, removeTrackedConfig, + selectedXAxisMode, + selectedSequence, } = useInference(); const { @@ -266,6 +365,10 @@ const ScatterGraph = React.memo( ); const rooflines = useMemo(() => { + // Frontier scope is (hw, precision, date) — points from different dates + // can never share a frontier (a May 15 point can't dominate a May 17 plot). + // The legend grouping is still by (hw, precision); we just split the + // pareto compute per date and re-merge into the legend bucket. const result: Record = {}; const rooflineKey = `${selectedYAxisMetric}_roofline` as keyof ChartDefinition; const dir = chartDefinition[rooflineKey] as @@ -274,17 +377,43 @@ const ScatterGraph = React.memo( | 'lower_left' | 'lower_right' | undefined; - for (const hw of Object.keys(groupedData)) { - const front = - dir === 'upper_right' - ? paretoFrontUpperRight(groupedData[hw]) - : dir === 'upper_left' - ? paretoFrontUpperLeft(groupedData[hw]) - : dir === 'lower_left' - ? paretoFrontLowerLeft(groupedData[hw]) - : paretoFrontLowerRight(groupedData[hw]); - front.sort((a, b) => a.x - b.x); - result[hw] = front; + const frontierFn = + dir === 'upper_right' + ? paretoFrontUpperRight + : dir === 'upper_left' + ? paretoFrontUpperLeft + : dir === 'lower_left' + ? paretoFrontLowerLeft + : paretoFrontLowerRight; + for (const hwKey of Object.keys(groupedData)) { + const byDate = new Map(); + for (const p of groupedData[hwKey]) { + const d = p.date; + let bucket = byDate.get(d); + if (!bucket) { + bucket = []; + byDate.set(d, bucket); + } + bucket.push(p); + } + const combined: InferenceData[] = []; + for (const datePoints of byDate.values()) { + // In non-e2e xmodes, useChartData stamps every point with an + // `isOnE2eFrontier` flag so the line is restricted to the + // e2e-Pareto winners — same set of points across every chart, + // just re-plotted at the chosen x metric. When the flag is + // present on ANY point in the bucket, narrow to the winners + // before paretoing (otherwise we'd recompute a fresh frontier + // on the swapped x axis and reintroduce the benchmark hack). + const flagged = datePoints.some((p) => p.isOnE2eFrontier !== undefined); + const seedPoints = flagged + ? datePoints.filter((p) => p.isOnE2eFrontier === true) + : datePoints; + if (seedPoints.length === 0) continue; + combined.push(...frontierFn(seedPoints)); + } + combined.sort((a, b) => a.x - b.x); + result[hwKey] = combined; } return result; }, [groupedData, selectedYAxisMetric, chartDefinition]); @@ -292,7 +421,7 @@ const ScatterGraph = React.memo( const optimalPointKeys = useMemo(() => { const keys = new Set(); Object.values(rooflines).forEach((pts) => - pts.forEach((p) => keys.add(`${p.hwKey}_${p.precision}-${p.x}-${p.y}`)), + pts.forEach((p) => keys.add(`${p.hwKey}_${p.precision}_${p.date}-${p.x}-${p.y}`)), ); return keys; }, [rooflines]); @@ -319,6 +448,10 @@ const ScatterGraph = React.memo( const buildPointConfigId = useCallback((point: InferenceData): string => { let key = `${point.hwKey}|${point.precision}|${point.tp}|${point.conc}|${point.decode_ep ?? 0}|${point.prefill_tp ?? 0}|${point.prefill_ep ?? 0}`; if (point.disagg) key += `|disagg|${point.num_prefill_gpu ?? 0}|${point.num_decode_gpu ?? 0}`; + // Agentic runs emit two rows per (config, conc) — one offload=on, one off. + // Without this suffix, d3's data join treats them as the same point and + // drops one variant (along with its halo). + if (point.offload_mode) key += `|offload-${point.offload_mode}`; return key; }, []); @@ -391,6 +524,21 @@ const ScatterGraph = React.memo( // All official points for rendering (unfiltered — visibility via opacity) const pointsData = useMemo(() => Object.values(groupedData).flat(), [groupedData]); + // Bulk presence lookup for agentic points: which ids have a stored + // trace_replay blob → controls the "View charts" button in the pinned + // tooltip. We deliberately don't fetch the histograms themselves here; + // a 95-point dsv4-b300 dashboard would pull GB of profile blobs through + // Neon's HTTP API and trip its 64 MB per-response cap. + const agenticIds = useMemo(() => { + const ids: number[] = []; + for (const p of pointsData) { + if (p.benchmark_type === 'agentic_traces' && typeof p.id === 'number') ids.push(p.id); + } + return ids; + }, [pointsData]); + const { data: traceAvailability } = useTraceAvailability(agenticIds); + const router = useRouter(); + // Gradient label data const allPointLabelsByKey = useMemo(() => { const globalLabelColorMap = new Map(); @@ -430,7 +578,9 @@ const ScatterGraph = React.memo( const visiblePoints = useMemo(() => { let pts = filteredData; if (hideNonOptimal) { - pts = pts.filter((d) => optimalPointKeys.has(`${d.hwKey}_${d.precision}-${d.x}-${d.y}`)); + pts = pts.filter((d) => + optimalPointKeys.has(`${d.hwKey}_${d.precision}_${d.date}-${d.x}-${d.y}`), + ); } return processedOverlayData.length > 0 ? [...pts, ...processedOverlayData] : pts; }, [filteredData, processedOverlayData, hideNonOptimal, optimalPointKeys]); @@ -515,7 +665,8 @@ const ScatterGraph = React.memo( (d: InferenceData) => effectiveActiveHwTypes.has(d.hwKey as string) && selectedPrecisions.includes(d.precision) && - (!hideNonOptimal || optimalPointKeys.has(`${d.hwKey}_${d.precision}-${d.x}-${d.y}`)), + (!hideNonOptimal || + optimalPointKeys.has(`${d.hwKey}_${d.precision}_${d.date}-${d.x}-${d.y}`)), [effectiveActiveHwTypes, selectedPrecisions, hideNonOptimal, optimalPointKeys], ); @@ -633,6 +784,7 @@ const ScatterGraph = React.memo( d3.axisLeft(newYS).ticks(10).tickFormat(logTickFormat(newYS)) as any, ); } + avoidLabelCollisions(ctx.layout.zoomGroup); }, }), [zoomResetEventName, eventPrefix, xScaleConfig._isLog, yScaleConfig.type], @@ -652,6 +804,7 @@ const ScatterGraph = React.memo( hardwareConfig, isTracked: trackedConfigIdsRef.current.has(buildPointConfigId(d)), runUrl: d.run_url ? updateRepoUrl(d.run_url) : undefined, + hasTrace: typeof d.id === 'number' ? traceAvailability?.[d.id] === true : false, }), getRulerX: (d: InferenceData, xScale: any) => (xScale as ContinuousScale)(d.x), getRulerY: (d: InferenceData, yScale: any) => (yScale as ContinuousScale)(d.y), @@ -667,26 +820,43 @@ const ScatterGraph = React.memo( ), onPointClick: (d: InferenceData) => { track('latency_data_point_clicked', { hw: String(d.hwKey), x: d.x, y: d.y }); - // Attach track-over-time button handler in the tooltip const tooltipEl = chartRef.current?.getTooltipElement(); - if (tooltipEl) { - const btn = tooltipEl.querySelector('[data-action="track-over-time"]'); - if (btn) { - btn.addEventListener('click', (btnEvent) => { - btnEvent.stopPropagation(); - const configId = buildPointConfigId(d); - if (trackedConfigIdsRef.current.has(configId)) removeTrackedConfig(configId); - else addTrackedConfig(d, chartDefinition.chartType); - chartRef.current?.dismissTooltip(); - chartRef.current?.hideTooltip(); - track('latency_point_tracked_via_tooltip', { - hwKey: String(d.hwKey), - tp: d.tp, - conc: d.conc, - precision: d.precision, - }); + if (!tooltipEl) return; + + // ── Summary-page actions ────────────────────────────────────────── + const trackBtn = tooltipEl.querySelector('[data-action="track-over-time"]'); + if (trackBtn) { + trackBtn.addEventListener('click', (btnEvent) => { + btnEvent.stopPropagation(); + const configId = buildPointConfigId(d); + if (trackedConfigIdsRef.current.has(configId)) removeTrackedConfig(configId); + else addTrackedConfig(d, chartDefinition.chartType); + chartRef.current?.dismissTooltip(); + chartRef.current?.hideTooltip(); + track('latency_point_tracked_via_tooltip', { + hwKey: String(d.hwKey), + tp: d.tp, + conc: d.conc, + precision: d.precision, }); - } + }); + } + + // ── "View charts" → navigate to dedicated detail page ──────────── + const viewBtn = tooltipEl.querySelector('[data-action="view-charts"]'); + if (viewBtn && typeof d.id === 'number') { + const pointId = d.id; + viewBtn.addEventListener('click', (btnEvent) => { + btnEvent.stopPropagation(); + track('latency_view_charts_opened', { + id: pointId, + hwKey: String(d.hwKey), + conc: d.conc, + }); + chartRef.current?.dismissTooltip(); + chartRef.current?.hideTooltip(); + router.push(`/inference/agentic/${pointId}`); + }); } }, attachToLayer: 1, // scatter layer is index 1 (after rooflines at 0) @@ -701,6 +871,12 @@ const ScatterGraph = React.memo( removeTrackedConfig, chartDefinition.chartType, selectedPrecisions, + // Tooltip content closure reads traceAvailability to decide whether + // to render the "View charts" button — rebuild config when the + // presence fetch resolves so the button appears for points that + // have a trace_replay blob. + traceAvailability, + router, ], ); @@ -751,35 +927,64 @@ const ScatterGraph = React.memo( const precision = key.split('_').pop()!; const visible = effectiveActiveHwTypes.has(hw) && selectedPrecisions.includes(precision); - let stroke = getCssColor(resolveColor(hw)); - - if (showGradientLabels) { - const pointLabels = allPointLabelsByKey[key]; - if (pointLabels) { - const stops = computeGradientStops(pointLabels, xScale); - if (stops) { - const gid = `roofline-gradient-${chartId}-${key}`; - activeGradientIds.add(gid); - let gradient = defs.select(`#${CSS.escape(gid)}`); - if (gradient.empty()) gradient = defs.append('linearGradient').attr('id', gid); - gradient - .attr('gradientUnits', 'userSpaceOnUse') - .attr('x1', xScale(pts[0].x)) - .attr('y1', 0) - .attr('x2', xScale(pts.at(-1)!.x)) - .attr('y2', 0); - gradient - .selectAll('stop') - .data(stops) - .join('stop') - .attr('offset', (s) => `${(s.offset * 100).toFixed(2)}%`) - .attr('stop-color', (s) => s.color); - stroke = `url(#${gid})`; - } + const baseStroke = getCssColor(resolveColor(hw)); + + // Split into per-date sub-paths so the line never crosses dates. + // (When only one date is present the loop runs once with the full set.) + const byDate = new Map(); + for (const p of pts) { + let bucket = byDate.get(p.date); + if (!bucket) { + bucket = []; + byDate.set(p.date, bucket); } + bucket.push(p); } + const singleDate = byDate.size === 1; + + for (const [date, datePoints] of byDate) { + if (datePoints.length <= 1) continue; + const entryKey = singleDate ? key : `${key}__${date}`; + let stroke = baseStroke; + + // Gradient labels only apply in the single-date case; mapping the + // (key-wide) ParetoPointLabel array onto per-date sub-segments is + // ambiguous and the comparison-date overlay is a rare combo. + if (singleDate && showGradientLabels) { + const pointLabels = allPointLabelsByKey[key]; + if (pointLabels) { + const stops = computeGradientStops(pointLabels, xScale); + if (stops) { + const gid = `roofline-gradient-${chartId}-${entryKey}`; + activeGradientIds.add(gid); + let gradient = defs.select(`#${CSS.escape(gid)}`); + if (gradient.empty()) gradient = defs.append('linearGradient').attr('id', gid); + gradient + .attr('gradientUnits', 'userSpaceOnUse') + .attr('x1', xScale(datePoints[0].x)) + .attr('y1', 0) + .attr('x2', xScale(datePoints.at(-1)!.x)) + .attr('y2', 0); + gradient + .selectAll('stop') + .data(stops) + .join('stop') + .attr('offset', (s) => `${(s.offset * 100).toFixed(2)}%`) + .attr('stop-color', (s) => s.color); + stroke = `url(#${gid})`; + } + } + } - entries.push({ key, hw, precision, points: pts, stroke, visible }); + entries.push({ + key: entryKey, + hw, + precision, + points: datePoints, + stroke, + visible, + }); + } }); // Remove stale gradients @@ -1206,11 +1411,26 @@ const ScatterGraph = React.memo( .y((d) => newYScale(d.y)) .curve(d3.curveMonotoneX); - // Update roofline paths + // Update roofline paths — must split per-date so the zoom redraw + // matches the per-date sub-paths created in the initial render. Object.entries(rooflines).forEach(([key, pts]) => { if (pts.length < 2) return; - const sel = zoomGroup.select(`.roofline-${key}`); - if (!sel.empty()) sel.attr('d', lineGen(pts) as string); + const byDate = new Map(); + for (const p of pts) { + let bucket = byDate.get(p.date); + if (!bucket) { + bucket = []; + byDate.set(p.date, bucket); + } + bucket.push(p); + } + const singleDate = byDate.size === 1; + for (const [date, datePoints] of byDate) { + if (datePoints.length < 2) continue; + const cls = singleDate ? `roofline-${key}` : `roofline-${key}__${date}`; + const sel = zoomGroup.select(`.${CSS.escape(cls)}`); + if (!sel.empty()) sel.attr('d', lineGen(datePoints) as string); + } }); // Update gradient coordinates @@ -1442,7 +1662,8 @@ const ScatterGraph = React.memo( getOpacity: (d) => (isPointVisible(d) ? 1 : 0), getPointerEvents: (d) => (isPointVisible(d) ? 'auto' : 'none'), hideLabels: hidePointLabels || showGradientLabels, - getLabelText: (d) => (useAdvancedLabels ? getPointLabel(d) : String(d.tp)), + getLabelText: (d) => + useAdvancedLabels ? `${getPointLabel(d)}\nC=${d.conc}` : `${d.tp}\nC=${d.conc}`, foreground: 'var(--foreground)', dataAttrs: { 'hw-key': (d) => String(d.hwKey), @@ -1542,17 +1763,31 @@ const ScatterGraph = React.memo( // Labels const showLabels = !hidePointLabels && !showGradientLabels; overlayPoints.each(function (d) { - d3.select(this) + const lines = showLabels + ? (useAdvancedLabels + ? `${getPointLabel(d)}\nC=${d.conc}` + : `${d.tp}\nC=${d.conc}` + ).split('\n') + : []; + const text = d3 + .select(this) .selectAll('.overlay-label') .data(showLabels ? [true] : []) .join('text') .attr('class', 'overlay-label') - .attr('dy', -10) .attr('text-anchor', 'middle') .style('fill', 'var(--foreground)') .attr('font-size', '10px') - .attr('pointer-events', 'none') - .text(useAdvancedLabels ? getPointLabel(d) : String(d.tp)); + .attr('font-weight', '700') + .attr('pointer-events', 'none'); + const firstDy = -(1 + (lines.length - 1) * 1.1); + text + .selectAll('tspan') + .data(lines) + .join('tspan') + .attr('x', 0) + .attr('dy', (_l, i) => (i === 0 ? `${firstDy}em` : '1.1em')) + .text((l) => l); }); // Overlay tooltip handlers @@ -1820,6 +2055,23 @@ const ScatterGraph = React.memo( .attr('pointer-events', 'none'); }); + // Offload halo: dashed ring on every point that used KV offload (Pareto or not) + zoomGroup.selectAll('.dot-group').each(function (d) { + const showHalo = d.offload_mode === 'on'; + d3.select(this) + .selectAll('.offload-halo') + .data(showHalo ? [true] : []) + .join('circle') + .attr('class', 'offload-halo') + .attr('r', POINT_SIZE + 4) + .attr('fill', 'none') + .attr('stroke', 'var(--foreground)') + .attr('stroke-width', 1.5) + .attr('stroke-dasharray', '3 2') + .attr('opacity', 0.9) + .attr('pointer-events', 'none'); + }); + // Double-click to track/untrack zoomGroup .selectAll('.dot-group') @@ -1853,6 +2105,8 @@ const ScatterGraph = React.memo( }); }); + avoidLabelCollisions(zoomGroup); + // Log tick formatting on initial render if (xScaleConfig._isLog) { const xScale = ctx.xScale as d3.ScaleLogarithmic; @@ -1875,6 +2129,9 @@ const ScatterGraph = React.memo( chartDefinition.chartType, xScaleConfig._isLog, yScaleConfig.type, + optimalPointKeys, + getCssColor, + resolveColor, ], ); @@ -2067,6 +2324,17 @@ const ScatterGraph = React.memo( setHideNonOptimal(checked); track('latency_hide_non_optimal_toggled', { enabled: checked }); }, + // On agentic + non-e2e chart, "optimal" means "on the + // e2e-latency Pareto frontier" (not a per-axis Pareto on the + // current x metric). Explain that so users don't wonder why + // a point sitting above the line is still considered + // dominated. + ...(selectedSequence === Sequence.AgenticTraces && selectedXAxisMode !== 'e2e' + ? { + infoTooltip: + "On agentic, optimal = on the end-to-end latency Pareto frontier, so a config can't win this axis by tanking e2e. Off-frontier points may appear above the line.", + } + : {}), }, { id: 'scatter-hide-point-labels', diff --git a/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx b/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx index f9b1b3c8..73018483 100644 --- a/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx +++ b/packages/app/src/components/inference/ui/UnofficialChartDisplay.tsx @@ -194,9 +194,7 @@ export function UnofficialChartDisplay() { `${selectedYAxisMetric}_title` as keyof typeof graph.chartDefinition ] }{' '} - {graph.chartDefinition[ - `${selectedYAxisMetric}_heading` as keyof typeof graph.chartDefinition - ] || graph.chartDefinition.heading} + {graph.chartDefinition.heading}

{graph.model} • {selectedPrecisions.join(', ')} • {graph.sequence} diff --git a/packages/app/src/components/inference/utils.test.ts b/packages/app/src/components/inference/utils.test.ts index 8f8705e1..589ba580 100644 --- a/packages/app/src/components/inference/utils.test.ts +++ b/packages/app/src/components/inference/utils.test.ts @@ -157,12 +157,12 @@ describe('processOverlayChartData', () => { }); it('remaps x to config override for input metrics on interactivity chart', () => { - // inputTputPerGpu has x override to p99_ttft on interactivity chart + // inputTputPerGpu has x override to p90_ttft on interactivity chart const data = [ pt({ x: 100, inputTputPerGpu: { y: 5, roof: false }, - p99_ttft: 0.25, + p90_ttft: 0.25, median_intvty: 50, } as any), ]; @@ -176,16 +176,11 @@ describe('processOverlayChartData', () => { pt({ x: 100, inputTputPerGpu: { y: 5, roof: false }, - median_ttft: 0.1, + p90_ttft: 0.1, median_intvty: 50, } as any), ]; - const result = processOverlayChartData( - data, - 'interactivity', - 'y_inputTputPerGpu', - 'median_ttft', - ); + const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p90_ttft'); expect(result).toHaveLength(1); expect(result[0].x).toBe(0.1); }); @@ -195,76 +190,62 @@ describe('processOverlayChartData', () => { pt({ x: 100, inputTputPerGpu: { y: 5, roof: false }, - p99_ttft: 0.25, + p90_ttft: 0.25, median_e2el: 2.5, } as any), ]; const result = processOverlayChartData(data, 'e2e', 'y_inputTputPerGpu', null); expect(result).toHaveLength(1); - // e2e uses median_e2el as x (from chart config default), not p99_ttft + // e2e uses median_e2el as x (from chart config default), not p90_ttft expect(result[0].x).toBe(2.5); }); - it('remaps x to TTFT for e2e chart when selectedXAxisMetric is p99_ttft', () => { - const data = [ - pt({ - x: 100, - tpPerGpu: { y: 42, roof: false }, - p99_ttft: 0.35, - median_e2el: 2.5, - } as any), - ]; - const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p99_ttft'); - expect(result).toHaveLength(1); - expect(result[0].x).toBe(0.35); - }); - - it('remaps x to TTFT for e2e chart when selectedXAxisMetric is median_ttft', () => { + it('remaps x to TTFT for e2e chart when selectedXAxisMetric is p90_ttft', () => { const data = [ pt({ x: 100, tpPerGpu: { y: 42, roof: false }, - median_ttft: 0.12, + p90_ttft: 0.12, median_e2el: 2.5, } as any), ]; - const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'median_ttft'); + const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p90_ttft'); expect(result).toHaveLength(1); expect(result[0].x).toBe(0.12); }); it('filters e2e TTFT outliers exceeding y_latency_limit', () => { const data = [ - pt({ tpPerGpu: { y: 10, roof: false }, p99_ttft: 0.5, median_e2el: 1 } as any), - pt({ tpPerGpu: { y: 5, roof: false }, p99_ttft: 999, median_e2el: 2 } as any), + pt({ tpPerGpu: { y: 10, roof: false }, p90_ttft: 0.5, median_e2el: 1 } as any), + pt({ tpPerGpu: { y: 5, roof: false }, p90_ttft: 999, median_e2el: 2 } as any), ]; - const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p99_ttft'); + const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p90_ttft'); // y_latency_limit is 60 in the e2e chart config — the 999 outlier should be filtered expect(result).toHaveLength(1); expect(result[0].x).toBe(0.5); }); it('does not filter interactivity points by latency limit when x-axis is default', () => { - // Regression: selectedXAxisMetric defaults to 'p99_ttft' but the interactivity + // Regression: selectedXAxisMetric defaults to 'p90_ttft' but the interactivity // chart's x-axis stays median_intvty for non-input metrics. The latency limit // (60) must NOT apply to median_intvty values. const data = [ pt({ tpPerGpu: { y: 42, roof: false }, median_intvty: 200 } as any), pt({ tpPerGpu: { y: 10, roof: false }, median_intvty: 30 } as any), ]; - const result = processOverlayChartData(data, 'interactivity', 'y_tpPerGpu', 'p99_ttft'); + const result = processOverlayChartData(data, 'interactivity', 'y_tpPerGpu', 'p90_ttft'); expect(result).toHaveLength(2); }); it('applies latency limit on interactivity only when x-axis is actually overridden', () => { - // When an input metric IS selected and x-axis overrides to p99_ttft, + // When an input metric IS selected and x-axis overrides to p90_ttft, // the latency limit should apply. const data = [ - pt({ inputTputPerGpu: { y: 5, roof: false }, p99_ttft: 0.5, median_intvty: 10 } as any), - pt({ inputTputPerGpu: { y: 3, roof: false }, p99_ttft: 999, median_intvty: 20 } as any), + pt({ inputTputPerGpu: { y: 5, roof: false }, p90_ttft: 0.5, median_intvty: 10 } as any), + pt({ inputTputPerGpu: { y: 3, roof: false }, p90_ttft: 999, median_intvty: 20 } as any), ]; - const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p99_ttft'); - // x-axis is overridden to p99_ttft for input metric — latency limit SHOULD filter 999 + const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p90_ttft'); + // x-axis is overridden to p90_ttft for input metric — latency limit SHOULD filter 999 expect(result).toHaveLength(1); expect(result[0].x).toBe(0.5); }); diff --git a/packages/app/src/components/inference/utils.ts b/packages/app/src/components/inference/utils.ts index 4b5335b6..4876c614 100644 --- a/packages/app/src/components/inference/utils.ts +++ b/packages/app/src/components/inference/utils.ts @@ -75,11 +75,13 @@ export function processOverlayChartData( chartType: 'e2e' | 'interactivity', selectedYAxisMetric: string, selectedXAxisMetric: string | null, + options?: { isAgentic?: boolean }, ): InferenceData[] { const chartDef = (chartDefinitions as ChartDefinition[]).find((d) => d.chartType === chartType); if (!chartDef) return []; const metricKey = selectedYAxisMetric.replace('y_', '') as YAxisMetricKey; + const isAgentic = options?.isAgentic === true; // Resolve x-axis field (must match useChartData logic) const metricTitle = @@ -87,9 +89,11 @@ export function processOverlayChartData( const isInputMetric = metricTitle.toLowerCase().includes('input'); let xAxisField: string = chartDef.x; // selectedXAxisMetric is already the effective metric for this chart type - // (interactivity uses selectedXAxisMetric, e2e uses selectedE2eXAxisMetric) + // (interactivity uses selectedXAxisMetric, e2e uses selectedE2eXAxisMetric). + // Match any *_ttft metric — the x-axis-mode picker can now select any + // percentile (median/p75/p90/p99) depending on sequence kind. const isTtftOverride = - selectedXAxisMetric === 'p99_ttft' || selectedXAxisMetric === 'median_ttft'; + typeof selectedXAxisMetric === 'string' && selectedXAxisMetric.endsWith('_ttft'); if (selectedXAxisMetric && chartDef.chartType === 'interactivity' && isInputMetric) { xAxisField = selectedXAxisMetric; @@ -109,7 +113,12 @@ export function processOverlayChartData( }) .filter( (d) => - xAxisField === chartDef.x || !chartDef.y_latency_limit || d.x <= chartDef.y_latency_limit, + // Skip the latency limit for the natural x-axis or for agentic + // (long TTFTs are normal there, not overload outliers). + xAxisField === chartDef.x || + isAgentic || + !chartDef.y_latency_limit || + d.x <= chartDef.y_latency_limit, ); return filterDataByCostLimit(processedData, chartDef, selectedYAxisMetric); diff --git a/packages/app/src/components/inference/utils/paretoLabels.ts b/packages/app/src/components/inference/utils/paretoLabels.ts index 6e57d499..8f4f9702 100644 --- a/packages/app/src/components/inference/utils/paretoLabels.ts +++ b/packages/app/src/components/inference/utils/paretoLabels.ts @@ -46,7 +46,7 @@ export const parseLabelComponents = (label: string): string[] => { const parts = label.split('+'); return parts.map((p) => { // Strip the leading "NxNNN" multiplier (e.g., "1x" or "3x") - const match = p.match(/^\d+x(.+)$/u); + const match = p.match(/^\d+x(?

${label}: ${value}
`; -const shortenSha = (image: string) => image.replaceAll(/(sha256:[a-f0-9]{7})[a-f0-9]+/giu, '$1…'); +const formatPct = (v: number | undefined): string | null => + v === undefined || v === null || Number.isNaN(v) ? null : `${(v * 100).toFixed(1)}%`; + +/** Tooltip numeric values are capped at 3 decimal places (trailing zeros stripped). */ +const fmt = (v: number): string => { + if (!Number.isFinite(v)) return String(v); + const rounded = parseFloat(v.toFixed(3)); + if (Math.abs(rounded) >= 10000) return new Intl.NumberFormat('en-US').format(rounded); + return String(rounded); +}; + +/** + * Agentic-only tooltip rows: offload mode, KV cache hit rates, request + * success, token totals. Returns an empty string for non-agentic rows. + */ +const generateAgenticHTML = (d: InferenceData): string => { + if (d.benchmark_type !== 'agentic_traces') return ''; + + const parts: string[] = []; + if (d.offload_mode) { + parts.push(tooltipLine('Offload Mode', d.offload_mode.toUpperCase())); + } + + const gpuHit = formatPct(d.server_gpu_cache_hit_rate); + const cpuHit = formatPct(d.server_cpu_cache_hit_rate); + const theoHit = formatPct(d.theoretical_cache_hit_rate); + if (gpuHit) parts.push(tooltipLine('GPU Cache Hit Rate', gpuHit)); + if (cpuHit) parts.push(tooltipLine('CPU Cache Hit Rate', cpuHit)); + if (theoHit) parts.push(tooltipLine('Theoretical Cache Hit Rate', theoHit)); + + if (d.num_requests_total !== undefined && d.num_requests_successful !== undefined) { + const successPct = + d.num_requests_total > 0 + ? ` (${((d.num_requests_successful / d.num_requests_total) * 100).toFixed(0)}%)` + : ''; + parts.push( + tooltipLine( + 'Requests', + `${d.num_requests_successful} / ${d.num_requests_total}${successPct}`, + ), + ); + } + + if (d.total_prompt_tokens !== undefined) { + parts.push(tooltipLine('Prompt Tokens', formatNumber(d.total_prompt_tokens))); + } + if (d.total_generation_tokens !== undefined) { + parts.push(tooltipLine('Generated Tokens', formatNumber(d.total_generation_tokens))); + } + + // Histograms + time-series live on the dedicated detail page now; the + // "View charts" button (rendered by the wrapper when pinned + has trace + // data) takes the user there. + + return parts.join(''); +}; + +/** "View charts" button — only visible when the tooltip is pinned and the + * point has stored trace data. Wired up by the ScatterGraph click handler. */ +const viewChartsButtonHTML = (isPinned: boolean, hasTraceData: boolean): string => { + if (!isPinned || !hasTraceData) return ''; + return ``; +}; + +const shortenSha = (image: string) => + image.replaceAll(/(?sha256:[a-f0-9]{7})[a-f0-9]+/giu, '$1…'); const imageTooltipLine = (image: string) => `
@@ -138,7 +215,16 @@ const generateParallelismHTML = (d: InferenceData): string => { * @returns HTML string for the tooltip content */ export const generateTooltipContent = (config: TooltipConfig): string => { - const { data: d, isPinned, xLabel, yLabel, selectedYAxisMetric, hardwareConfig, runUrl } = config; + const { + data: d, + isPinned, + xLabel, + yLabel, + selectedYAxisMetric, + hardwareConfig, + runUrl, + hasTrace, + } = config; return `
@@ -156,16 +242,16 @@ export const generateTooltipContent = (config: TooltipConfig): string => { : '' }
- ${xLabel}: ${formatNumber(d.x)} + ${xLabel}: ${fmt(d.x)}
- ${yLabel}: ${formatNumber(d.y)} + ${yLabel}: ${fmt(d.y)}
${ selectedYAxisMetric === 'y_tpPerGpu' && d['inputTputPerGpu'] ? `
- Input Token Throughput per GPU: ${formatNumber(d['inputTputPerGpu'].y)} + Input Token Throughput per GPU: ${fmt(d['inputTputPerGpu'].y)}
` : '' } @@ -173,7 +259,7 @@ export const generateTooltipContent = (config: TooltipConfig): string => { selectedYAxisMetric === 'y_tpPerGpu' && d['outputTputPerGpu'] ? `
- Output Token Throughput per GPU: ${formatNumber(d['outputTputPerGpu'].y)} + Output Token Throughput per GPU: ${fmt(d['outputTputPerGpu'].y)}
` : '' } @@ -182,10 +268,12 @@ export const generateTooltipContent = (config: TooltipConfig): string => {
Concurrency: ${d.conc}
-
+
Precision: ${d.precision.toUpperCase()}
+ ${generateAgenticHTML(d)} ${runLinkHTML(runUrl)} + ${viewChartsButtonHTML(isPinned, Boolean(hasTrace))} ${ isPinned ? `
- ${yLabel}: ${formatNumber(d.y)} + ${yLabel}: ${fmt(d.y)}
${tooltipLine('Total GPUs', d.tp)} ${generateParallelismHTML(d)}
Concurrency: ${d.conc}
-
+
Precision: ${d.precision.toUpperCase()}
+ ${generateAgenticHTML(d)}
`; }; @@ -271,16 +360,16 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string => : '' }
- ${xLabel}: ${formatNumber(d.x)} + ${xLabel}: ${fmt(d.x)}
- ${yLabel}: ${formatNumber(d.y)} + ${yLabel}: ${fmt(d.y)}
${ selectedYAxisMetric === 'y_tpPerGpu' && d['inputTputPerGpu'] ? `
- Input Token Throughput per GPU: ${formatNumber(d['inputTputPerGpu'].y)} + Input Token Throughput per GPU: ${fmt(d['inputTputPerGpu'].y)}
` : '' } @@ -288,7 +377,7 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string => selectedYAxisMetric === 'y_tpPerGpu' && d['outputTputPerGpu'] ? `
- Output Token Throughput per GPU: ${formatNumber(d['outputTputPerGpu'].y)} + Output Token Throughput per GPU: ${fmt(d['outputTputPerGpu'].y)}
` : '' } @@ -297,9 +386,10 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string =>
Concurrency: ${d.conc}
-
+
Precision: ${d.precision.toUpperCase()}
+ ${generateAgenticHTML(d)} ${runLinkHTML(runUrl)}
`; diff --git a/packages/app/src/components/json-ld.test.ts b/packages/app/src/components/json-ld.test.ts index 34feaa4b..ee085700 100644 --- a/packages/app/src/components/json-ld.test.ts +++ b/packages/app/src/components/json-ld.test.ts @@ -9,7 +9,7 @@ function render(data: object): string { } function scriptBody(html: string): string { - const match = html.match(/]*>([\s\S]*?)<\/script[^>]*>/iu); + const match = html.match(/]*>(?[\s\S]*?)<\/script[^>]*>/iu); if (!match) throw new Error(`no