Skip to content
Merged
8 changes: 4 additions & 4 deletions generated/benchmarks/INCREMENTAL-BENCHMARKS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ Import resolution: native batch vs JS fallback throughput.
|---------|--------|------:|-----------:|------:|-------:|------------------:|-------------:|
| 3.10.0 | native | 745 | 2.0s ↓34% | 15ms ↑50% | 66ms ↑22% | 5ms ↓18% | 7ms ↓38% |
| 3.10.0 | wasm | 745 | 8.4s ↓40% | 13ms ↓90% | 51ms ↓18% | 5ms ↓18% | 7ms ↓38% |
| 3.9.6 | native | 744 | 3.0s | 10ms | 54ms | 7ms ~ | 11ms ↑7% |
| 3.9.6 | wasm | 744 | 14.0s | 131ms | 62ms | 7ms ~ | 11ms ↑7% |
| 3.9.6 | native | 744 | 3.0s ↑39% | 10ms ↑11% | 54ms ↓87% | 7ms ~ | 11ms ↑7% |
| 3.9.6 | wasm | 744 | 14.0s ↑86% | 131ms ↑589% | 62ms ~ | 7ms ~ | 11ms ↑7% |
| 3.9.4 | native | 668 | 2.1s ↓4% | 9ms ~ | 406ms ↑7% | 6ms ↑7% | 9ms ↓15% |
| 3.9.4 | wasm | 668 | 7.6s ~ | 19ms ↑6% | 61ms ↓90% | 6ms ↑7% | 9ms ↓15% |
| 3.9.3 | native | 667 | 2.2s ↓76% | 9ms ↑13% | 380ms ↓32% | 6ms ↓5% | 11ms ↑6% |
Expand All @@ -22,10 +22,10 @@ Import resolution: native batch vs JS fallback throughput.
| 3.9.0 | wasm | 567 | 6.8s ↓3% | 12ms ↓20% | 541ms ↓10% | 5ms ↓18% | 11ms ~ |
| 3.8.1 | native | 565 | 6.6s ↑468% | 8ms ↑14% | 41ms ↑24% | 6ms ↑51% | 11ms ↓14% |
| 3.8.1 | wasm | 565 | 7.0s ↑493% | 15ms ↑88% | 603ms ↑1727% | 6ms ↑51% | 11ms ↓14% |
| 3.8.0 | native | 564 | 1.2s | 7ms | 33ms | 4ms ↑2% | 12ms ↓19% |
| 3.8.0 | native | 564 | 1.2s ↓50% | 7ms ↓46% | 33ms ↓90% | 4ms ↑2% | 12ms ↓19% |
| 3.8.0 | wasm | 564 | 1.2s ↓82% | 8ms ↓58% | 33ms ↓94% | 4ms ↑2% | 12ms ↓19% |
| 3.7.0 | wasm | 532 | 6.4s ↑5% | 19ms ↑46% | 558ms ↑2% | 4ms ↑3% | 15ms ↑31% |
| 3.6.0 | wasm | 514 | 6.1s | 13ms | 545ms | 4ms ↓3% | 12ms ↑9% |
| 3.6.0 | wasm | 514 | 6.1s ↑20% | 13ms ~ | 545ms ↑7% | 4ms ↓3% | 12ms ↑9% |
| 3.4.1 | native | 473 | 2.4s ↑3% | 13ms ↑8% | 331ms ↓26% | 4ms ~ | 13ms ↑8% |
| 3.4.1 | wasm | 473 | 5.1s ~ | 13ms ↑8% | 511ms ↓17% | 4ms ~ | 13ms ↑8% |
| 3.4.0 | native | 473 | 2.3s ↓4% | 12ms ↑9% | 448ms ↑29% | 4ms ↓58% | 12ms ↓54% |
Expand Down
101 changes: 64 additions & 37 deletions scripts/update-incremental-report.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,24 @@ if (!isDev) {
}
history.unshift(entry);

function findPrevRelease(hist, fromIdx) {
// Walk back through release history to find the most recent non-null value
// for a specific metric. Lets trend annotations skip past releases whose
// workers crashed and stored null, instead of hiding regressions behind an
// empty cell.
function findPrevMetric(hist, fromIdx, getter) {
for (let i = fromIdx + 1; i < hist.length; i++) {
if (hist[i].version !== 'dev') return hist[i];
if (hist[i].version === 'dev') continue;
const v = getter(hist[i]);
if (v != null) return v;
}
return null;
}

// ── Helpers ──────────────────────────────────────────────────────────────
function trend(current, previous, lowerIsBetter = true) {
if (current == null || previous == null) return '';
// Guard against null/undefined and a zero baseline — dividing by zero would
// emit `↑Infinity%`. Matches the `previous === 0` guard in checkRegression.
if (current == null || previous == null || previous === 0) return '';
const pct = ((current - previous) / previous) * 100;
if (Math.abs(pct) < 2) return ' ~';
if (lowerIsBetter) {
Expand All @@ -86,19 +94,21 @@ function formatMs(ms) {
return `${Math.round(ms)}ms`;
}

function engineRow(h, prev, engineKey) {
function engineRow(hist, i, engineKey, prevResolveNative, prevResolveJs) {
const h = hist[i];
const e = h[engineKey];
const p = prev?.[engineKey] || null;
if (!e) return null;

const fullT = trend(e.fullBuildMs, p?.fullBuildMs);
const noopT = trend(e.noopRebuildMs, p?.noopRebuildMs);
const oneT = trend(e.oneFileRebuildMs, p?.oneFileRebuildMs);
const fullT = trend(e.fullBuildMs, findPrevMetric(hist, i, (r) => r[engineKey]?.fullBuildMs));
const noopT = trend(e.noopRebuildMs, findPrevMetric(hist, i, (r) => r[engineKey]?.noopRebuildMs));
const oneT = trend(
e.oneFileRebuildMs,
findPrevMetric(hist, i, (r) => r[engineKey]?.oneFileRebuildMs),
);

const r = h.resolve;
const pr = prev?.resolve || null;
const natT = r.nativeBatchMs != null ? trend(r.nativeBatchMs, pr?.nativeBatchMs) : '';
const jsT = trend(r.jsFallbackMs, pr?.jsFallbackMs);
const natT = r.nativeBatchMs != null ? trend(r.nativeBatchMs, prevResolveNative) : '';
const jsT = trend(r.jsFallbackMs, prevResolveJs);

const noopCell = e.noopRebuildMs != null ? `${formatMs(e.noopRebuildMs)}${noopT}` : 'n/a';
const oneFileCell = e.oneFileRebuildMs != null ? `${formatMs(e.oneFileRebuildMs)}${oneT}` : 'n/a';
Expand All @@ -125,11 +135,13 @@ md +=
'|---------|--------|------:|-----------:|------:|-------:|------------------:|-------------:|\n';

for (let i = 0; i < history.length; i++) {
const h = history[i];
const prev = findPrevRelease(history, i);

const nativeRow = engineRow(h, prev, 'native');
const wasmRow = engineRow(h, prev, 'wasm');
// Resolve metrics are release-level (not engine-specific), so pre-compute
// their fallback baselines once per release instead of having engineRow walk
// history twice (once per engine type).
const prevResolveNative = findPrevMetric(history, i, (x) => x.resolve?.nativeBatchMs);
const prevResolveJs = findPrevMetric(history, i, (x) => x.resolve?.jsFallbackMs);
const nativeRow = engineRow(history, i, 'native', prevResolveNative, prevResolveJs);
const wasmRow = engineRow(history, i, 'wasm', prevResolveNative, prevResolveJs);
if (nativeRow) md += nativeRow + '\n';
if (wasmRow) md += wasmRow + '\n';
}
Comment on lines 137 to 147
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The natT/jsT resolve trends are computed inside engineRow, which is called once for each engine type per release. Since h.resolve is release-level (not engine-specific), findPrevMetric for nativeBatchMs and jsFallbackMs is invoked twice per release — returning identical results both times. This is harmless but wasteful for long history arrays. Consider hoisting the resolve trend calls to the call site, alongside the nativeRow/wasmRow pair, so they are computed once and passed in.

Suggested change
for (let i = 0; i < history.length; i++) {
const h = history[i];
const prev = findPrevRelease(history, i);
const nativeRow = engineRow(h, prev, 'native');
const wasmRow = engineRow(h, prev, 'wasm');
const nativeRow = engineRow(history, i, 'native');
const wasmRow = engineRow(history, i, 'wasm');
if (nativeRow) md += nativeRow + '\n';
if (wasmRow) md += wasmRow + '\n';
}
for (let i = 0; i < history.length; i++) {
// Resolve metrics are release-level; pre-compute their trends once per
// release so engineRow doesn't redundantly walk history twice.
const prevNative = findPrevMetric(history, i, (x) => x.resolve?.nativeBatchMs);
const prevJs = findPrevMetric(history, i, (x) => x.resolve?.jsFallbackMs);
const nativeRow = engineRow(history, i, 'native', prevNative, prevJs);
const wasmRow = engineRow(history, i, 'wasm', prevNative, prevJs);
if (nativeRow) md += nativeRow + '\n';
if (wasmRow) md += wasmRow + '\n';
}

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 628d37d — hoisted findPrevMetric for the release-level resolve metrics (nativeBatchMs, jsFallbackMs) to the call site and pass them into engineRow, so they are computed once per release instead of twice.

Expand Down Expand Up @@ -175,7 +187,6 @@ console.error(`Updated ${path.relative(root, reportPath)}`);

// ── Regression detection ─────────────────────────────────────────────────
const REGRESSION_THRESHOLD = 0.15; // 15% regression triggers a warning
const prev = findPrevRelease(history, 0);

function checkRegression(label, current, previous) {
if (previous == null || previous === 0) return;
Expand All @@ -190,26 +201,42 @@ function checkRegression(label, current, previous) {
}
}

if (prev) {
for (const engineKey of ['native', 'wasm']) {
const e = latest[engineKey];
const p = prev[engineKey];
if (!e || !p) continue;
const tag = `[${engineKey}]`;
checkRegression(`${tag} Full build`, e.fullBuildMs, p.fullBuildMs);
if (e.noopRebuildMs != null && p.noopRebuildMs != null) {
checkRegression(`${tag} No-op rebuild`, e.noopRebuildMs, p.noopRebuildMs);
}
if (e.oneFileRebuildMs != null && p.oneFileRebuildMs != null) {
checkRegression(`${tag} 1-file rebuild`, e.oneFileRebuildMs, p.oneFileRebuildMs);
}
for (const engineKey of ['native', 'wasm']) {
const e = latest[engineKey];
if (!e) continue;
const tag = `[${engineKey}]`;
checkRegression(
`${tag} Full build`,
e.fullBuildMs,
findPrevMetric(history, 0, (r) => r[engineKey]?.fullBuildMs),
);
if (e.noopRebuildMs != null) {
checkRegression(
`${tag} No-op rebuild`,
e.noopRebuildMs,
findPrevMetric(history, 0, (r) => r[engineKey]?.noopRebuildMs),
);
}
const re = latest.resolve;
const rp = prev.resolve;
if (re && rp) {
checkRegression(`[resolve] JS fallback`, re.jsFallbackMs, rp.jsFallbackMs);
if (re.nativeBatchMs != null && rp.nativeBatchMs != null) {
checkRegression(`[resolve] Native batch`, re.nativeBatchMs, rp.nativeBatchMs);
}
if (e.oneFileRebuildMs != null) {
checkRegression(
`${tag} 1-file rebuild`,
e.oneFileRebuildMs,
findPrevMetric(history, 0, (r) => r[engineKey]?.oneFileRebuildMs),
);
}
}
const re = latest.resolve;
if (re) {
checkRegression(
`[resolve] JS fallback`,
re.jsFallbackMs,
findPrevMetric(history, 0, (r) => r.resolve?.jsFallbackMs),
);
if (re.nativeBatchMs != null) {
checkRegression(
`[resolve] Native batch`,
re.nativeBatchMs,
findPrevMetric(history, 0, (r) => r.resolve?.nativeBatchMs),
);
}
}
143 changes: 143 additions & 0 deletions tests/unit/incremental-report-trend-fallback.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import { execFileSync } from 'node:child_process';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const repoRoot = path.resolve(__dirname, '..', '..');
const scriptPath = path.join(repoRoot, 'scripts', 'update-incremental-report.ts');

// --experimental-strip-types works in Node 22.6+ through current 24.x; the
// renamed --strip-types was added then removed again across 24.x minor
// versions, so prefer the experimental name for compatibility.
const stripFlag = '--experimental-strip-types';

let tmpDir: string;
let reportPath: string;
let entryPath: string;

beforeEach(() => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-trend-fallback-'));
reportPath = path.join(tmpDir, 'INCREMENTAL-BENCHMARKS.md');
entryPath = path.join(tmpDir, 'entry.json');
});

afterEach(() => {
if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
});

function runScript() {
execFileSync('node', [stripFlag, scriptPath, entryPath], {
env: { ...process.env, CODEGRAPH_INCREMENTAL_REPORT_PATH: reportPath },
stdio: 'pipe',
});
}

function row(content: string, version: string, engine: string): string {
const re = new RegExp(`^\\| ${version.replace(/\./g, '\\.')} \\| ${engine} \\|.*$`, 'm');
const m = content.match(re);
if (!m) throw new Error(`row not found for ${version}/${engine}`);
return m[0];
}

describe('update-incremental-report trend fallback', () => {
it('falls back to nearest non-null prior release when previous metrics are null', () => {
// Seed: 9.9.4 has full data. 9.9.5 is the null release (workers crashed).
const initial = `# Codegraph Incremental Build Benchmarks

<!-- INCREMENTAL_BENCHMARK_DATA
[
{
"version": "9.9.5",
"date": "2026-05-01",
"files": 100,
"wasm": null,
"native": null,
"resolve": { "imports": 100, "nativeBatchMs": 5, "jsFallbackMs": 10, "perImportNativeMs": 0, "perImportJsMs": 0 }
},
{
"version": "9.9.4",
"date": "2026-04-18",
"files": 100,
"wasm": { "fullBuildMs": 1000, "noopRebuildMs": 10, "oneFileRebuildMs": 50 },
"native": { "fullBuildMs": 500, "noopRebuildMs": 5, "oneFileRebuildMs": 25 },
"resolve": { "imports": 100, "nativeBatchMs": 5, "jsFallbackMs": 10, "perImportNativeMs": 0, "perImportJsMs": 0 }
}
]
-->
`;
fs.writeFileSync(reportPath, initial);
// New release 9.9.6 doubles wasm full-build and 13x's no-op rebuild — should
// compare against 9.9.4 (skipping null 9.9.5) and show large regressions.
fs.writeFileSync(
entryPath,
JSON.stringify({
version: '9.9.6',
date: '2026-04-30',
files: 100,
wasm: { fullBuildMs: 2000, noopRebuildMs: 130, oneFileRebuildMs: 60 },
native: { fullBuildMs: 600, noopRebuildMs: 8, oneFileRebuildMs: 30 },
resolve: {
imports: 100,
nativeBatchMs: 5,
jsFallbackMs: 10,
perImportNativeMs: 0,
perImportJsMs: 0,
},
}),
);

runScript();
const out = fs.readFileSync(reportPath, 'utf8');

const wasmRow = row(out, '9.9.6', 'wasm');
// Full build: 1000 → 2000 = +100%
expect(wasmRow).toContain('↑100%');
// No-op: 10 → 130 = +1200%
expect(wasmRow).toContain('↑1200%');
// 1-file: 50 → 60 = +20%
expect(wasmRow).toContain('↑20%');

// And the null 9.9.5 row still renders without a trend (no prior data
// for it to compare against — it itself has no metrics).
expect(out).not.toMatch(/^\| 9\.9\.5 \| wasm \|/m);
expect(out).not.toMatch(/^\| 9\.9\.5 \| native \|/m);
});

it('leaves trend empty when no prior release has the metric at all', () => {
const initial = `# Codegraph Incremental Build Benchmarks

<!-- INCREMENTAL_BENCHMARK_DATA
[]
-->
`;
fs.writeFileSync(reportPath, initial);
fs.writeFileSync(
entryPath,
JSON.stringify({
version: '1.0.0',
date: '2026-01-01',
files: 50,
wasm: { fullBuildMs: 1000, noopRebuildMs: 10, oneFileRebuildMs: 50 },
native: { fullBuildMs: 500, noopRebuildMs: 5, oneFileRebuildMs: 25 },
resolve: {
imports: 50,
nativeBatchMs: 1,
jsFallbackMs: 2,
perImportNativeMs: 0,
perImportJsMs: 0,
},
}),
);

runScript();
const out = fs.readFileSync(reportPath, 'utf8');
const wasmRow = row(out, '1.0.0', 'wasm');
// No prior history => no arrow annotations on any cell
expect(wasmRow).not.toContain('↑');
expect(wasmRow).not.toContain('↓');
expect(wasmRow).not.toContain('~');
});
});
Loading