--wip-- [skip ci]

GuillaumeLagrange · GuillaumeLagrange · commit f511deefc7d7 · 2026-07-01T13:03:47.000+02:00
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -94,4 +94,25 @@ Based on the codebase analysis, to add stats access features:
   - Build: `pnpm turbo run build --filter=<package-name>`
   - Typecheck: `pnpm turbo run typecheck --filter=<package-name>`
   - Lint: `pnpm turbo run lint --filter=<package-name>`
-  - Run a task across all packages by omitting `--filter` (e.g. `pnpm turbo run build`).
+  - Run a task across all packages by omitting `--filter` (e.g. `pnpm turbo run build`).
+
+## Testing a plugin during development
+
+A plugin behaves differently depending on whether CodSpeed is driving the run,
+so exercise all three of these when developing or reviewing a plugin change
+(build the plugin first — the benches import from `dist`):
+
+1. **Fallback (not under CodSpeed).** No env vars. The plugin must stay out of
+   the way and let the framework run its benchmarks normally (no instrumentation,
+   no hijacked output). e.g. `pnpm turbo run bench --filter=<package-name>`.
+2. **Instrumentation / simulation.** `CODSPEED_ENV=true CODSPEED_RUNNER_MODE=simulation`
+   (or `instrumentation`). The plugin hijacks the run to do a single instrumented
+   pass per benchmark and prints `Measured/Checked <uri>` instead of the normal
+   harness output.
+3. **Walltime.** `CODSPEED_ENV=true CODSPEED_RUNNER_MODE=walltime`. The plugin
+   instruments the framework's real benchmark loop and collects walltime results.
+
+Running these locally outside the CodSpeed runner is expected to log
+`instrument-hooks: failed to write environment.json` and skip actual measurement
+writes — the point is to verify the plugin's control flow and output per mode,
+not to produce real measurements.
diff --git a/packages/vitest-plugin/src/instrument.ts b/packages/vitest-plugin/src/instrument.ts
@@ -5,6 +5,7 @@ import {
   MARKER_TYPE_BENCHMARK_START,
   msToNs,
   msToS,
+  optimizeFunction,
   wrapWithRootFrame,
   writeWalltimeResults,
   type Benchmark,
@@ -14,6 +15,21 @@ import type * as tinybench from "tinybench";
 
 export type Tinybench = typeof tinybench;
 
+/** tinybench's per-task lifecycle hooks (a subset of `FnOptions`). */
+export interface TinybenchFnOptions {
+  beforeAll?: (mode?: "run" | "warmup") => unknown;
+  beforeEach?: (mode?: "run" | "warmup") => unknown;
+  afterEach?: (mode?: "run" | "warmup") => unknown;
+  afterAll?: (mode?: "run" | "warmup") => unknown;
+}
+
+/** The captured registration for a task: its fn and options. */
+export interface CapturedTask {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  fn: (...args: any[]) => any;
+  fnOpts?: TinybenchFnOptions;
+}
+
 /** A tinybench task, exposing the `fn` the runner wraps with the root frame. */
 export interface TinybenchTask {
   name: string;
@@ -34,17 +50,6 @@ export interface TinybenchBench {
   teardown: TinybenchHook;
 }
 
-/** The minimal task shape `patchTaskRunWithRootFrame` mutates. */
-interface RunnableTask {
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  fn: (...args: any[]) => any;
-}
-
-/** The tinybench Task prototype whose `run` we wrap. */
-interface TinybenchTaskClass {
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  prototype: { run: (this: any) => Promise<unknown> };
-}
 
 /**
  * The tinybench statistics shape (latency/throughput) shared across the v2 and
@@ -77,7 +82,7 @@ interface InstrumentWindow {
   runStart: bigint | null;
 }
 
-let isTaskPatched = false;
+let isBenchAddPatched = false;
 
 /**
  * The window bracketing the currently running task's measured loop, driven by
@@ -86,14 +91,85 @@ let isTaskPatched = false;
  */
 const instrumentWindow: InstrumentWindow = { runStart: null };
 
+// tinybench keeps a task's fn and options as `#private` fields (v6+), so we
+// capture them ourselves when `Bench.add` runs, keyed by bench then task name.
+// The analysis seam needs the raw fn to run it under its own tight window
+// instead of tinybench's timing loop.
+const capturedTasks = new WeakMap<object, Map<string, CapturedTask>>();
+
+/** The minimal tinybench Bench prototype we patch to capture registrations. */
+interface TinybenchBenchClass {
+  prototype: {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    add: (...args: any[]) => unknown;
+  };
+}
+
+/**
+ * Patch `Bench.prototype.add` to record each task's fn and options, keyed by
+ * bench then task name. Idempotent, and applied to the prototype so it captures
+ * registrations on every Bench the host constructs.
+ *
+ * `BenchClass` must be the exact class the host instantiates. In tinybench v6 a
+ * task's fn is a true `#private` field — it cannot be read or replaced on the
+ * task afterwards — so capturing (and, for walltime, root-frame-wrapping) has to
+ * happen here, as the fn is registered.
+ *
+ * `registerFn` transforms the fn actually handed to tinybench: identity for
+ * analysis (which runs the captured fn itself), or a root-frame wrap for
+ * walltime (where tinybench drives the fn and the frame must already be baked
+ * in).
+ */
+export function captureBenchAddOnce(
+  BenchClass: TinybenchBenchClass,
+  registerFn: (fn: CapturedTask["fn"]) => CapturedTask["fn"],
+): void {
+  if (isBenchAddPatched) {
+    return;
+  }
+  isBenchAddPatched = true;
+
+  const originalAdd = BenchClass.prototype.add;
+  BenchClass.prototype.add = function (
+    this: object,
+    name: string,
+    fn: CapturedTask["fn"],
+    fnOpts?: TinybenchFnOptions,
+  ) {
+    let byName = capturedTasks.get(this);
+    if (!byName) {
+      byName = new Map<string, CapturedTask>();
+      capturedTasks.set(this, byName);
+    }
+    byName.set(name, { fn, fnOpts });
+    return originalAdd.call(this, name, registerFn(fn), fnOpts);
+  };
+}
+
+/** Retrieve the fn/options captured for a task on a given bench, if any. */
+export function getCapturedTask(
+  bench: object,
+  taskName: string,
+): CapturedTask | undefined {
+  return capturedTasks.get(bench)?.get(taskName);
+}
+
+/** The tinybench Task prototype whose `run` the legacy seam wraps. */
+interface TinybenchTaskClass {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  prototype: { run: (this: any) => Promise<unknown> };
+}
+
+let isTaskPatched = false;
+
 /**
- * Wrap every task's fn with the root frame so collected stacks are attributed to
- * a benchmark. Idempotent: patching the shared `Task.prototype.run` in place hits
- * every Bench instance, so repeat calls are no-ops.
+ * Wrap every task's fn with the root frame by patching `Task.prototype.run` in
+ * place. Used only by the legacy (Vitest 3/4) walltime seam, which runs on
+ * tinybench v2 where a task's `fn` is a plain, reassignable property.
  *
- * `TaskClass` must be the exact prototype the host constructed its tasks against
- * (taken from a live task, not imported) so the patch applies even when multiple
- * copies of tinybench are installed.
+ * The Vitest 5 seam cannot use this: tinybench v6 made `fn` a true `#private`
+ * field, so reassigning `task.fn` is a silent no-op there — the frame must be
+ * baked in at registration time instead (see rootFrameRegisterFn).
  */
 export function patchTaskRunOnce(TaskClass: TinybenchTaskClass): void {
   if (isTaskPatched) {
@@ -102,7 +178,7 @@ export function patchTaskRunOnce(TaskClass: TinybenchTaskClass): void {
   isTaskPatched = true;
 
   const originalRun = TaskClass.prototype.run;
-  TaskClass.prototype.run = async function (this: RunnableTask) {
+  TaskClass.prototype.run = async function (this: CapturedTask) {
     const originalFn = this.fn;
     this.fn = wrapWithRootFrame(() => originalFn.call(this));
 
@@ -114,6 +190,53 @@ export function patchTaskRunOnce(TaskClass: TinybenchTaskClass): void {
   };
 }
 
+/**
+ * The root-frame wrap to hand tinybench at registration time (walltime, v5).
+ * Post-hoc assignment to a task's `fn` is a no-op on tinybench v6 (private
+ * field), so the frame must be baked into the registered fn instead.
+ */
+export function rootFrameRegisterFn(
+  fn: CapturedTask["fn"],
+): CapturedTask["fn"] {
+  return wrapWithRootFrame(() => fn());
+}
+
+/** Identity registration: analysis runs the captured fn itself, unwrapped. */
+export function identityRegisterFn(fn: CapturedTask["fn"]): CapturedTask["fn"] {
+  return fn;
+}
+
+/**
+ * Run one benchmark under instrumentation, matching the analysis window the
+ * Vitest 3/4 runner uses exactly: warm the JIT with `optimizeFunction` outside
+ * the window, run the user hooks around a single measured `fn()`, and bracket
+ * only that call with `startBenchmark`/`stopBenchmark` under the root frame. The
+ * measurement comes from the instrument, so no wall-clock markers are emitted
+ * and tinybench's timing loop is not involved.
+ */
+export async function runAnalysisTask(
+  { fn, fnOpts }: CapturedTask,
+  uri: string,
+): Promise<void> {
+  await fnOpts?.beforeAll?.("run");
+  await optimizeFunction(async () => {
+    await fnOpts?.beforeEach?.("run");
+    await fn();
+    await fnOpts?.afterEach?.("run");
+  });
+
+  await fnOpts?.beforeEach?.("run");
+  global.gc?.();
+  await wrapWithRootFrame(async () => {
+    InstrumentHooks.startBenchmark();
+    await fn();
+    InstrumentHooks.stopBenchmark();
+    InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+  })();
+  await fnOpts?.afterEach?.("run");
+  await fnOpts?.afterAll?.("run");
+}
+
 /**
  * Drive the instrumentation window from each bench's run-mode setup/teardown
  * hooks so it brackets only tinybench's measured loop, excluding the warmup
@@ -148,24 +271,29 @@ export function installInstrumentHooks(
 }
 
 function closeInstrumentWindow(uri: string): void {
-  const runEnd = InstrumentHooks.currentTimestamp();
+  emitBenchmarkWindow(uri, instrumentWindow.runStart!);
+  instrumentWindow.runStart = null;
+}
+
+/**
+ * Close the currently open instrumentation window: emit the benchmark markers
+ * bracketing [start, now], stop the benchmark, and attribute the sample to `uri`.
+ *
+ * Benchmark markers must land inside the sample window opened by
+ * startBenchmark(), so they are emitted before stopBenchmark() closes it. The
+ * runner consumes the FIFO stream in order, so a marker sent after stopBenchmark
+ * would fall outside the sample and break the expected
+ * SampleStart > BenchmarkStart > BenchmarkEnd > SampleEnd nesting.
+ */
+function emitBenchmarkWindow(uri: string, start: bigint): void {
+  const end = InstrumentHooks.currentTimestamp();
   const pid = process.pid;
 
-  // Benchmark markers must land inside the sample window opened by
-  // startBenchmark(), so they have to be emitted before stopBenchmark()
-  // closes it. The runner consumes the FIFO stream in order, so a marker
-  // sent after StopBenchmark falls outside the sample and breaks the
-  // expected SampleStart > BenchmarkStart > BenchmarkEnd > SampleEnd nesting.
-  InstrumentHooks.addMarker(
-    pid,
-    MARKER_TYPE_BENCHMARK_START,
-    instrumentWindow.runStart!,
-  );
-  InstrumentHooks.addMarker(pid, MARKER_TYPE_BENCHMARK_END, runEnd);
+  InstrumentHooks.addMarker(pid, MARKER_TYPE_BENCHMARK_START, start);
+  InstrumentHooks.addMarker(pid, MARKER_TYPE_BENCHMARK_END, end);
 
   InstrumentHooks.stopBenchmark();
   InstrumentHooks.setExecutedBenchmark(pid, uri);
-  instrumentWindow.runStart = null;
 }
 
 /**
diff --git a/packages/vitest-plugin/src/v5/setup.ts b/packages/vitest-plugin/src/v5/setup.ts