From 5f96cac7104ae811b8bc43a9e083d84e62e02b72 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 15 May 2026 08:42:13 -0600 Subject: [PATCH 1/5] perf(bench): exclude resolution-benchmark fixtures from dogfooding sweep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an `exclude` field to BuildGraphOpts that augments `config.exclude` for a single build, then wires the three dogfooding benchmark scripts (scripts/benchmark.ts, scripts/incremental-benchmark.ts, scripts/query-benchmark.ts) to pass `tests/benchmarks/resolution/fixtures/**`. Those fixtures are hand-annotated test scaffolding for the static-resolution suite, not representative source code — each new heavyweight grammar (e.g. tree-sitter-verilog in #1107) inflates `fullBuildMs` by hundreds of ms purely from parsing fixture files that have no business in dogfooding timing measurements. The 3.10.0:Full build KNOWN_REGRESSIONS exemption that papered over the Verilog cost is removed now that the root cause is addressed at the build invocation. Closes #1112 --- scripts/benchmark.ts | 10 +++--- scripts/incremental-benchmark.ts | 14 ++++---- scripts/lib/bench-config.ts | 15 +++++++++ scripts/query-benchmark.ts | 4 +-- src/domain/graph/builder/pipeline.ts | 7 ++++ src/types.ts | 7 ++++ tests/benchmarks/regression-guard.test.ts | 13 -------- .../config-include-exclude.test.ts | 33 +++++++++++++++++++ 8 files changed, 76 insertions(+), 27 deletions(-) diff --git a/scripts/benchmark.ts b/scripts/benchmark.ts index aaacb65d..0ed92c3f 100644 --- a/scripts/benchmark.ts +++ b/scripts/benchmark.ts @@ -15,7 +15,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { fileURLToPath } from 'node:url'; import Database from 'better-sqlite3'; -import { resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; +import { BENCHMARK_EXCLUDES, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js'; // ── Parent process: fork one child per engine, assemble final output ───── @@ -130,7 +130,7 @@ console.log = (...args) => console.error(...args); if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); const buildStart = performance.now(); -const buildResult = await buildGraph(root, { engine, incremental: false }); +const buildResult = await buildGraph(root, { engine, incremental: false, exclude: [...BENCHMARK_EXCLUDES] }); const buildTimeMs = performance.now() - buildStart; const queryStart = performance.now(); @@ -150,7 +150,7 @@ try { const noopTimings = []; for (let i = 0; i < INCREMENTAL_RUNS; i++) { const start = performance.now(); - await buildGraph(root, { engine, incremental: true }); + await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); noopTimings.push(performance.now() - start); } noopRebuildMs = Math.round(median(noopTimings)); @@ -167,7 +167,7 @@ try { for (let i = 0; i < INCREMENTAL_RUNS; i++) { fs.writeFileSync(PROBE_FILE, original + `\n// probe-${i}\n`); const start = performance.now(); - const res = await buildGraph(root, { engine, incremental: true }); + const res = await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); oneFileRuns.push({ ms: performance.now() - start, phases: res?.phases || null }); } oneFileRuns.sort((a, b) => a.ms - b.ms); @@ -179,7 +179,7 @@ try { } finally { fs.writeFileSync(PROBE_FILE, original); try { - await buildGraph(root, { engine, incremental: true }); + await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); } catch { // Cleanup rebuild failed — probe file is already restored, move on } diff --git a/scripts/incremental-benchmark.ts b/scripts/incremental-benchmark.ts index a37f132c..31badb7b 100644 --- a/scripts/incremental-benchmark.ts +++ b/scripts/incremental-benchmark.ts @@ -14,7 +14,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { fileURLToPath } from 'node:url'; -import { resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; +import { BENCHMARK_EXCLUDES, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; import { isWorker, workerEngine, forkEngines } from './lib/fork-engine.js'; // ── Parent process: fork one child per engine, assemble final output ───── @@ -194,7 +194,7 @@ const fullTimings = []; for (let i = 0; i < RUNS; i++) { if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); const start = performance.now(); - await buildGraph(root, { engine, incremental: false }); + await buildGraph(root, { engine, incremental: false, exclude: [...BENCHMARK_EXCLUDES] }); fullTimings.push(performance.now() - start); } const fullBuildMs = Math.round(median(fullTimings)); @@ -203,12 +203,12 @@ const fullBuildMs = Math.round(median(fullTimings)); let noopRebuildMs = null; try { for (let i = 0; i < WARMUP_RUNS; i++) { - await buildGraph(root, { engine, incremental: true }); + await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); } const noopTimings = []; for (let i = 0; i < RUNS; i++) { const start = performance.now(); - await buildGraph(root, { engine, incremental: true }); + await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); noopTimings.push(performance.now() - start); } noopRebuildMs = Math.round(median(noopTimings)); @@ -223,13 +223,13 @@ let oneFilePhases = null; try { for (let i = 0; i < WARMUP_RUNS; i++) { fs.writeFileSync(PROBE_FILE, original + `\n// warmup-${i}\n`); - await buildGraph(root, { engine, incremental: true }); + await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); } const oneFileRuns = []; for (let i = 0; i < RUNS; i++) { fs.writeFileSync(PROBE_FILE, original + `\n// probe-${i}\n`); const start = performance.now(); - const res = await buildGraph(root, { engine, incremental: true }); + const res = await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); oneFileRuns.push({ ms: performance.now() - start, phases: res?.phases || null }); } oneFileRuns.sort((a, b) => a.ms - b.ms); @@ -241,7 +241,7 @@ try { } finally { fs.writeFileSync(PROBE_FILE, original); try { - await buildGraph(root, { engine, incremental: true }); + await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); } catch { // Cleanup rebuild failed — probe file is already restored, move on } diff --git a/scripts/lib/bench-config.ts b/scripts/lib/bench-config.ts index 0ed6f784..7a3a71e8 100644 --- a/scripts/lib/bench-config.ts +++ b/scripts/lib/bench-config.ts @@ -15,6 +15,21 @@ import { pathToFileURL } from 'node:url'; import { getBenchmarkVersion } from '../bench-version.js'; +/** + * Globs excluded from every benchmark's `buildGraph(root, ...)` invocation. + * + * Resolution-benchmark fixtures (`tests/benchmarks/resolution/fixtures/**`) + * are hand-annotated scaffolding for the static-resolution test suite, not + * representative source code. They inflate dogfooding timing measurements + * disproportionately whenever a new-language PR lands a heavyweight grammar + * (e.g. tree-sitter-verilog added ~850ms to native fullBuildMs in #1107). + * Excluding them here keeps build/query/incremental benchmarks measuring + * codegraph's own source rather than its test fixtures. + */ +export const BENCHMARK_EXCLUDES: readonly string[] = [ + 'tests/benchmarks/resolution/fixtures/**', +]; + // On Windows, `npm` is `npm.cmd` and Node refuses to spawn `.cmd`/`.bat` // without `shell: true` (since Node 18.20 / 20.15). When `shell: true`, the // Windows `cmd.exe` shell resolves bare `npm` to `npm.cmd` automatically, so diff --git a/scripts/query-benchmark.ts b/scripts/query-benchmark.ts index 11c328c6..3a13d0a8 100644 --- a/scripts/query-benchmark.ts +++ b/scripts/query-benchmark.ts @@ -16,7 +16,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { fileURLToPath } from 'node:url'; import Database from 'better-sqlite3'; -import { resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; +import { BENCHMARK_EXCLUDES, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js'; // ── Parent process: fork one child per engine, assemble final output ───── @@ -254,7 +254,7 @@ function benchDiffImpact(hubName) { // Build graph for this engine if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); -await buildGraph(root, { engine, incremental: false }); +await buildGraph(root, { engine, incremental: false, exclude: [...BENCHMARK_EXCLUDES] }); const targets = workerTargets() || selectTargets(); console.error(`Targets: hub=${targets.hub}, mid=${targets.mid}, leaf=${targets.leaf}`); diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 1416631a..85a6bcd3 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -183,6 +183,13 @@ function setupPipeline(ctx: PipelineContext): void { initSchema(ctx.db); ctx.config = loadConfig(ctx.rootDir); + // Merge caller-supplied excludes on top of the file-config excludes so + // programmatic callers (e.g. benchmark scripts) can extend exclusion + // without mutating .codegraphrc.json. Native orchestrator picks this up + // automatically — it reads exclude off the serialized ctx.config below. + if (ctx.opts.exclude?.length) { + ctx.config.exclude = [...(ctx.config.exclude ?? []), ...ctx.opts.exclude]; + } ctx.incremental = ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false; diff --git a/src/types.ts b/src/types.ts index 08eb9431..8e4bf13c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1063,6 +1063,13 @@ export interface BuildGraphOpts { complexity?: boolean; cfg?: boolean; scope?: string[]; + /** + * Glob patterns merged on top of `config.exclude` for this build only. + * Lets callers extend exclusion programmatically without writing a config + * file — used by the benchmark scripts to skip resolution-benchmark + * fixtures that aren't representative of real code. + */ + exclude?: string[]; skipRegistry?: boolean; } diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index 4125325e..1694c0d5 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -176,18 +176,6 @@ const SKIP_VERSIONS = new Set(['3.8.0']); * absolute delta 10.4ms exactly at the MIN_ABSOLUTE_DELTA floor. Exempt * this release; remove once 3.11.0+ data confirms stabilization. * - * - 3.10.0:Full build — adding native Verilog support (#1107) pulled the - * 4 `.v` resolution-benchmark fixtures into the corpus the incremental - * benchmark sweeps (it runs against the repo root). tree-sitter-verilog - * is a large grammar (SystemVerilog is one of the heaviest in the - * tree-sitter ecosystem) so each file costs noticeably more than the - * other fixture languages. Local measurement: 1959 → 2809 (+43%, run - * 25716010487). The cost is real and structural — not a regression in - * shared code paths. Resolution: either exclude `tests/benchmarks/ - * resolution/fixtures/verilog/**` from the benchmark sweep or accept the - * one-time bump as the cost of supporting Verilog. Tracked separately; - * exempt this release. - * * - 3.10.0:Query time — cumulative effect of adding two native extractors * (Solidity #1100 + R #1102) in quick succession. Neither tripped the * threshold individually (Solidity PR's Query time stayed at 49ms, R PR @@ -230,7 +218,6 @@ const KNOWN_REGRESSIONS = new Set([ '3.10.0:fnDeps depth 1', '3.10.0:fnDeps depth 3', '3.10.0:fnDeps depth 5', - '3.10.0:Full build', '3.10.0:Query time', ]); diff --git a/tests/integration/config-include-exclude.test.ts b/tests/integration/config-include-exclude.test.ts index dd3655ee..6209555c 100644 --- a/tests/integration/config-include-exclude.test.ts +++ b/tests/integration/config-include-exclude.test.ts @@ -178,4 +178,37 @@ describe('config.include / config.exclude (issue #981)', () => { // Paths are already relative to each run's own tmpDir so they compare directly. expect(nativeFiles).toEqual(wasmFiles); }); + + // ── opts.exclude (programmatic, no on-disk config) ─────────────── + + async function buildWithOptsExclude( + root: string, + engine: EngineName, + optsExclude: string[], + ): Promise { + clearConfigCache(); + const dbDir = path.join(root, '.codegraph'); + if (fs.existsSync(dbDir)) fs.rmSync(dbDir, { recursive: true, force: true }); + await buildGraph(root, { engine, exclude: optsExclude, skipRegistry: true }); + const files = readFileRows(path.join(dbDir, 'graph.db')); + return files.map((f) => f.replace(/\\/g, '/')).sort(); + } + + it('wasm: opts.exclude rejects matching files without writing config', async () => { + const root = fs.mkdtempSync(path.join(tmpDir, 'opts-wasm-')); + writeFixture(root); + const files = await buildWithOptsExclude(root, 'wasm', ['**/*.test.js', '**/*.spec.js']); + expect(files).toContain('src/math.js'); + expect(files).not.toContain('src/math.test.js'); + expect(files).not.toContain('src/util.spec.js'); + }); + + itNative('native: opts.exclude rejects matching files without writing config', async () => { + const root = fs.mkdtempSync(path.join(tmpDir, 'opts-native-')); + writeFixture(root); + const files = await buildWithOptsExclude(root, 'native', ['**/*.test.js', '**/*.spec.js']); + expect(files).toContain('src/math.js'); + expect(files).not.toContain('src/math.test.js'); + expect(files).not.toContain('src/util.spec.js'); + }); }); From b008ffb74d9c5f7b6d8339ca678cf4795266653e Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 15 May 2026 10:09:41 -0600 Subject: [PATCH 2/5] test(exclude): add incremental round-trip tests for opts.exclude (#1134) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile flagged that the existing opts.exclude tests always wipe the DB before building, so they only exercise the fresh-build path. The scenario where opts.exclude changes between builds against the same DB (files previously indexed must be detected as removals on the second pass) was untested. Add wasm + native parity tests that build the fixture twice against the same DB — first with no exclude, then with exclude — and assert the second build drops the previously-indexed test/spec files from file_hashes. Locks in the collect → detect-removal contract that the benchmark scripts depend on once their exclude list changes mid-life. --- .../config-include-exclude.test.ts | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/integration/config-include-exclude.test.ts b/tests/integration/config-include-exclude.test.ts index 6209555c..83839459 100644 --- a/tests/integration/config-include-exclude.test.ts +++ b/tests/integration/config-include-exclude.test.ts @@ -211,4 +211,70 @@ describe('config.include / config.exclude (issue #981)', () => { expect(files).not.toContain('src/math.test.js'); expect(files).not.toContain('src/util.spec.js'); }); + + // ── opts.exclude incremental round trip ────────────────────────── + // + // Greptile feedback on PR #1134: the opts.exclude tests above always wipe + // the DB before building, so they only exercise the fresh-build path. The + // scenario where files that were previously indexed become excluded on a + // subsequent incremental run (i.e. opts.exclude changes between builds + // against the same DB) was untested. This round trip locks in the + // collect → detect behaviour: the second build must observe the newly + // excluded files as removals and drop them from file_hashes. + + async function buildSameDb( + root: string, + engine: EngineName, + optsExclude: string[] | undefined, + ): Promise { + clearConfigCache(); + await buildGraph(root, { + engine, + ...(optsExclude !== undefined ? { exclude: optsExclude } : {}), + skipRegistry: true, + }); + const files = readFileRows(path.join(root, '.codegraph', 'graph.db')); + return files.map((f) => f.replace(/\\/g, '/')).sort(); + } + + it('wasm: opts.exclude introduced on second incremental build drops previously-indexed files', async () => { + const root = fs.mkdtempSync(path.join(tmpDir, 'opts-inc-wasm-')); + writeFixture(root); + // Wipe DB so the first build is a clean baseline that indexes everything. + const dbDir = path.join(root, '.codegraph'); + if (fs.existsSync(dbDir)) fs.rmSync(dbDir, { recursive: true, force: true }); + + // First build: no exclude — every supported file is indexed. + const firstFiles = await buildSameDb(root, 'wasm', undefined); + expect(firstFiles).toContain('src/math.test.js'); + expect(firstFiles).toContain('src/util.spec.js'); + + // Second build against the same DB with exclude — previously-indexed + // test files must be detected as removals and disappear from file_hashes. + const secondFiles = await buildSameDb(root, 'wasm', ['**/*.test.js', '**/*.spec.js']); + expect(secondFiles).toContain('src/math.js'); + expect(secondFiles).toContain('src/util.js'); + expect(secondFiles).not.toContain('src/math.test.js'); + expect(secondFiles).not.toContain('src/util.spec.js'); + }); + + itNative( + 'native: opts.exclude introduced on second incremental build drops previously-indexed files', + async () => { + const root = fs.mkdtempSync(path.join(tmpDir, 'opts-inc-native-')); + writeFixture(root); + const dbDir = path.join(root, '.codegraph'); + if (fs.existsSync(dbDir)) fs.rmSync(dbDir, { recursive: true, force: true }); + + const firstFiles = await buildSameDb(root, 'native', undefined); + expect(firstFiles).toContain('src/math.test.js'); + expect(firstFiles).toContain('src/util.spec.js'); + + const secondFiles = await buildSameDb(root, 'native', ['**/*.test.js', '**/*.spec.js']); + expect(secondFiles).toContain('src/math.js'); + expect(secondFiles).toContain('src/util.js'); + expect(secondFiles).not.toContain('src/math.test.js'); + expect(secondFiles).not.toContain('src/util.spec.js'); + }, + ); }); From e2346e0c4d6c1e9a4b86ac5b15bbed7ec9583d94 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 15 May 2026 10:09:50 -0600 Subject: [PATCH 3/5] fix(bench): exempt 3.10.0:DB bytes/file from regression guard (#1134) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fixture-exclude in this PR shifts the denominator of every per-file build metric: the 3.10.0 baseline was measured over ~745 files (codegraph source + resolution-benchmark fixtures), while dev now measures the ~607 source files alone. DB content is dominated by src/, so absolute bytes stay roughly constant while the file count drops — inflating dbSizeBytes/file from 41614 to ~52211 (+25%, exactly at the threshold). This is a one-time methodology shift, not a real regression in the schema or extraction layer (which is why the old 3.10.0:Full build exemption could be dropped — Full build absolute timing actually improved with the exclude). Add a one-release exemption with a detailed comment so the distinction is visible to future maintainers; remove once 3.11.0+ data is captured under the post-#1134 methodology. --- tests/benchmarks/regression-guard.test.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index 1694c0d5..7ae5ca23 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -206,6 +206,20 @@ const SKIP_VERSIONS = new Set(['3.8.0']); * (24.3 → 45.6ms) is consistent with the other depths. Tracked in #1113 * alongside depth 1, depth 5, and Query time; remove all four once * 3.11.0+ data confirms the new steady-state. + * + * - 3.10.0:DB bytes/file — one-time per-file methodology shift introduced + * by #1134, which excludes `tests/benchmarks/resolution/fixtures/**` + * from the dogfooding `buildGraph` sweep so heavyweight new grammars + * (e.g. Verilog #1107) no longer inflate timing. The 3.10.0 baseline + * was measured with fixtures in the corpus (~745 files); dev now + * measures the codegraph source alone (~607 files). DB content is + * dominated by `src/`, so total bytes stay roughly constant while the + * denominator drops, inflating `dbSizeBytes/file` from 41614 → ~52211 + * (+25%, exactly at the threshold). This is the same shape as the old + * `3.10.0:Full build` exemption (now removed because Full build absolute + * timing actually improved) — a measurement-scope artifact, not a real + * regression in the schema or extraction layer. Exempt this release; + * remove once 3.11.0+ data is captured under the post-#1134 methodology. */ const KNOWN_REGRESSIONS = new Set([ '3.9.6:Build ms/file', @@ -219,6 +233,7 @@ const KNOWN_REGRESSIONS = new Set([ '3.10.0:fnDeps depth 3', '3.10.0:fnDeps depth 5', '3.10.0:Query time', + '3.10.0:DB bytes/file', ]); /** From d5b354ab0e1119c02659b672501012edf0baed8f Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 16 May 2026 22:28:19 -0600 Subject: [PATCH 4/5] fix: remove duplicate exclude field from BuildGraphOpts (#1134) --- src/types.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/types.ts b/src/types.ts index 5c845e86..8e4bf13c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1071,12 +1071,6 @@ export interface BuildGraphOpts { */ exclude?: string[]; skipRegistry?: boolean; - /** - * Extra exclude globs appended to `config.exclude`. Lets benchmark scripts - * skip fixture directories that bloat self-build timings without permanently - * affecting `.codegraphrc.json` for normal users. - */ - exclude?: string[]; } /** Build timing result from buildGraph. */ From e6bf5a16fc2159196ebf0cf63d39056478d0258c Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 16 May 2026 22:28:24 -0600 Subject: [PATCH 5/5] fix: skip BENCHMARK_EXCLUDES in --npm mode for corpus parity (#1134) --- scripts/benchmark.ts | 11 +++++----- scripts/incremental-benchmark.ts | 7 +++++-- scripts/lib/bench-config.ts | 36 ++++++++++++++++++++++++++++++++ scripts/query-benchmark.ts | 4 ++-- 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/scripts/benchmark.ts b/scripts/benchmark.ts index 0ed92c3f..0607a8ac 100644 --- a/scripts/benchmark.ts +++ b/scripts/benchmark.ts @@ -15,7 +15,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { fileURLToPath } from 'node:url'; import Database from 'better-sqlite3'; -import { BENCHMARK_EXCLUDES, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; +import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js'; // ── Parent process: fork one child per engine, assemble final output ───── @@ -93,6 +93,7 @@ try { const INCREMENTAL_RUNS = 3; const QUERY_RUNS = 5; const PROBE_FILE = path.join(root, 'src', 'domain', 'queries.ts'); +const BENCH_EXCLUDE = [...resolveBenchmarkExcludes()]; function median(arr) { const sorted = [...arr].sort((a, b) => a - b); @@ -130,7 +131,7 @@ console.log = (...args) => console.error(...args); if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); const buildStart = performance.now(); -const buildResult = await buildGraph(root, { engine, incremental: false, exclude: [...BENCHMARK_EXCLUDES] }); +const buildResult = await buildGraph(root, { engine, incremental: false, exclude: BENCH_EXCLUDE }); const buildTimeMs = performance.now() - buildStart; const queryStart = performance.now(); @@ -150,7 +151,7 @@ try { const noopTimings = []; for (let i = 0; i < INCREMENTAL_RUNS; i++) { const start = performance.now(); - await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); + await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); noopTimings.push(performance.now() - start); } noopRebuildMs = Math.round(median(noopTimings)); @@ -167,7 +168,7 @@ try { for (let i = 0; i < INCREMENTAL_RUNS; i++) { fs.writeFileSync(PROBE_FILE, original + `\n// probe-${i}\n`); const start = performance.now(); - const res = await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); + const res = await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); oneFileRuns.push({ ms: performance.now() - start, phases: res?.phases || null }); } oneFileRuns.sort((a, b) => a.ms - b.ms); @@ -179,7 +180,7 @@ try { } finally { fs.writeFileSync(PROBE_FILE, original); try { - await buildGraph(root, { engine, incremental: true, exclude: [...BENCHMARK_EXCLUDES] }); + await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); } catch { // Cleanup rebuild failed — probe file is already restored, move on } diff --git a/scripts/incremental-benchmark.ts b/scripts/incremental-benchmark.ts index 1f3f717f..853d595d 100644 --- a/scripts/incremental-benchmark.ts +++ b/scripts/incremental-benchmark.ts @@ -14,7 +14,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { fileURLToPath } from 'node:url'; -import { BENCHMARK_EXCLUDES, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; +import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; import { isWorker, workerEngine, forkEngines } from './lib/fork-engine.js'; // ── Parent process: fork one child per engine, assemble final output ───── @@ -185,7 +185,10 @@ const WARMUP_RUNS = 2; // are excluded from every benchmark `buildGraph` call. See that constant for the // full rationale — short version: hand-annotated fixtures aren't representative // of real source, and heavyweight grammars (#1107) silently inflate timings. -const BUILD_OPTS = { engine, exclude: [...BENCHMARK_EXCLUDES] }; +// `resolveBenchmarkExcludes` returns `[]` in `--npm` mode so the baseline (an +// older published version that ignores `opts.exclude`) and the dev run sweep +// the same corpus. +const BUILD_OPTS = { engine, exclude: [...resolveBenchmarkExcludes()] }; function median(arr) { const sorted = [...arr].sort((a, b) => a - b); diff --git a/scripts/lib/bench-config.ts b/scripts/lib/bench-config.ts index 7a3a71e8..44000d0a 100644 --- a/scripts/lib/bench-config.ts +++ b/scripts/lib/bench-config.ts @@ -25,11 +25,47 @@ import { getBenchmarkVersion } from '../bench-version.js'; * (e.g. tree-sitter-verilog added ~850ms to native fullBuildMs in #1107). * Excluding them here keeps build/query/incremental benchmarks measuring * codegraph's own source rather than its test fixtures. + * + * NOTE: callers should generally prefer `resolveBenchmarkExcludes()` instead + * of this constant. The helper returns `[]` in `--npm` mode so the dev-vs- + * baseline corpus stays consistent — published versions before this PR + * silently dropped `opts.exclude`, which would otherwise leave the baseline + * sweeping fixtures while the dev run skipped them. */ export const BENCHMARK_EXCLUDES: readonly string[] = [ 'tests/benchmarks/resolution/fixtures/**', ]; +/** + * `BENCHMARK_EXCLUDES` in local mode; `[]` in `--npm` mode. + * + * `--npm` benchmarks load `buildGraph` from a previously-published version + * via `srcImport(srcDir, ...)`. Releases before `BuildGraphOpts.exclude` + * landed don't recognise the option and silently drop it, so passing the + * excludes to a stale baseline would make it sweep ~745 files while the dev + * run sweeps ~607 — a corpus-mismatch that disguises measurement-shift as a + * perf delta. Emitting `[]` in `--npm` mode keeps the comparison apples-to- + * apples; the warning makes the methodology shift explicit. + * + * Idempotent across calls (the warning is printed on the first invocation + * only — `process.stderr.write` is a no-op but the helper is conceptually + * "compute once, return constant"); intentionally synchronous because + * `parseArgs` is. + */ +let warnedAboutNpmExcludeSkip = false; +export function resolveBenchmarkExcludes(): readonly string[] { + const { npm } = parseArgs(); + if (!npm) return BENCHMARK_EXCLUDES; + if (!warnedAboutNpmExcludeSkip) { + console.error( + 'Note: --npm mode skips BENCHMARK_EXCLUDES so the baseline and dev runs sweep the same corpus. ' + + 'Published versions before #1134 ignore opts.exclude silently; passing it would skew dev timings down by ~138 fewer files.', + ); + warnedAboutNpmExcludeSkip = true; + } + return []; +} + // On Windows, `npm` is `npm.cmd` and Node refuses to spawn `.cmd`/`.bat` // without `shell: true` (since Node 18.20 / 20.15). When `shell: true`, the // Windows `cmd.exe` shell resolves bare `npm` to `npm.cmd` automatically, so diff --git a/scripts/query-benchmark.ts b/scripts/query-benchmark.ts index 3a13d0a8..0d53d75e 100644 --- a/scripts/query-benchmark.ts +++ b/scripts/query-benchmark.ts @@ -16,7 +16,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { fileURLToPath } from 'node:url'; import Database from 'better-sqlite3'; -import { BENCHMARK_EXCLUDES, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; +import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js'; // ── Parent process: fork one child per engine, assemble final output ───── @@ -254,7 +254,7 @@ function benchDiffImpact(hubName) { // Build graph for this engine if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); -await buildGraph(root, { engine, incremental: false, exclude: [...BENCHMARK_EXCLUDES] }); +await buildGraph(root, { engine, incremental: false, exclude: [...resolveBenchmarkExcludes()] }); const targets = workerTargets() || selectTargets(); console.error(`Targets: hub=${targets.hub}, mid=${targets.mid}, leaf=${targets.leaf}`);