Skip to content

Commit 6755581

Browse files
committed
perf: Add IQR outlier removal and median to benchmark statistics
Benchmark results can be noisy due to GC timing and V8 JIT compilation. IQR filtering discards values outside Q1-1.5*IQR to Q3+1.5*IQR before computing statistics. The Samples column shows retained count (e.g. '4 (-1)' means 4 kept, 1 discarded). Falls back to the full dataset if fewer than 4 samples. Applies to all benchmarks, not just identity mode.
1 parent 19c3f8e commit 6755581

1 file changed

Lines changed: 57 additions & 25 deletions

File tree

Benchmarks/run.js

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { instantiate } from "./.build/plugins/PackageToJS/outputs/Package/instan
22
import { defaultNodeSetup } from "./.build/plugins/PackageToJS/outputs/Package/platforms/node.js"
33
import fs from 'fs';
44
import path from 'path';
5-
import { parseArgs } from "util"
5+
import { parseArgs } from "util";
66
import { parseIdentityModes, parseIdentityReusePools, runIdentityModeBenchmarks, summarizeIdentityMemory } from "./lib/identity-benchmarks.js"
77
import { APIResultValues as APIResult, ComplexResultValues as ComplexResult } from "./.build/plugins/PackageToJS/outputs/Package/bridge-js.js";
88

@@ -63,17 +63,44 @@ function createNameFilter(arg) {
6363
* @returns {number} Coefficient of variation as a percentage
6464
*/
6565
function calculateCV(values) {
66-
if (values.length < 2) return 0;
67-
68-
const sum = values.reduce((a, b) => a + b, 0);
69-
const mean = sum / values.length;
70-
71-
if (mean === 0) return 0;
66+
if (values.length < 2) return 0
67+
const filtered = removeOutliers(values)
68+
const sum = filtered.reduce((a, b) => a + b, 0)
69+
const mean = sum / filtered.length
70+
if (mean === 0) return 0
71+
const variance = filtered.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / filtered.length
72+
const stdDev = Math.sqrt(variance)
73+
return (stdDev / mean) * 100
74+
}
7275

73-
const variance = values.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / values.length;
74-
const stdDev = Math.sqrt(variance);
76+
/**
77+
* Remove outliers using the IQR (interquartile range) method.
78+
* Discards values below Q1-1.5*IQR or above Q3+1.5*IQR.
79+
* Returns the filtered array (or the original if too few samples).
80+
* @param {Array<number>} values - Array of measurement values
81+
* @returns {Array<number>} Values with outliers removed
82+
*/
83+
function removeOutliers(values) {
84+
if (values.length < 4) return values
85+
const sorted = [...values].sort((a, b) => a - b)
86+
const q1 = sorted[Math.floor(sorted.length * 0.25)]
87+
const q3 = sorted[Math.floor(sorted.length * 0.75)]
88+
const iqr = q3 - q1
89+
const lower = q1 - 1.5 * iqr
90+
const upper = q3 + 1.5 * iqr
91+
const filtered = values.filter(v => v >= lower && v <= upper)
92+
return filtered.length > 0 ? filtered : values
93+
}
7594

76-
return (stdDev / mean) * 100; // Return as percentage
95+
/**
96+
* Calculate the median of an array of numbers
97+
* @param {Array<number>} values - Array of measurement values
98+
* @returns {number} Median value
99+
*/
100+
function median(values) {
101+
const sorted = [...values].sort((a, b) => a - b)
102+
const mid = Math.floor(sorted.length / 2)
103+
return sorted.length % 2 !== 0 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2
77104
}
78105

79106
/**
@@ -86,33 +113,38 @@ function calculateStatistics(results) {
86113
const consoleTable = [];
87114

88115
for (const [name, times] of Object.entries(results)) {
89-
const sum = times.reduce((a, b) => a + b, 0);
90-
const avg = sum / times.length;
91-
const min = Math.min(...times);
92-
const max = Math.max(...times);
93-
const variance = times.reduce((a, b) => a + Math.pow(b - avg, 2), 0) / times.length;
94-
const stdDev = Math.sqrt(variance);
95-
const cv = (stdDev / avg) * 100; // Coefficient of variation as percentage
116+
const filtered = removeOutliers(times)
117+
const sum = filtered.reduce((a, b) => a + b, 0)
118+
const avg = sum / filtered.length
119+
const med = median(filtered)
120+
const min = Math.min(...filtered)
121+
const max = Math.max(...filtered)
122+
const variance = filtered.reduce((a, b) => a + Math.pow(b - avg, 2), 0) / filtered.length
123+
const stdDev = Math.sqrt(variance)
124+
const cv = (stdDev / avg) * 100
125+
const outliers = times.length - filtered.length
96126

97127
formattedResults[name] = {
98128
"avg_ms": parseFloat(avg.toFixed(2)),
129+
"median_ms": parseFloat(med.toFixed(2)),
99130
"min_ms": parseFloat(min.toFixed(2)),
100131
"max_ms": parseFloat(max.toFixed(2)),
101132
"stdDev_ms": parseFloat(stdDev.toFixed(2)),
102133
"cv_percent": parseFloat(cv.toFixed(2)),
103-
"samples": times.length,
134+
"samples": filtered.length,
135+
"outliers_removed": outliers,
104136
"rawTimes_ms": times.map(t => parseFloat(t.toFixed(2)))
105-
};
137+
}
106138

107139
consoleTable.push({
108140
Test: name,
141+
'Median (ms)': med.toFixed(2),
109142
'Avg (ms)': avg.toFixed(2),
110143
'Min (ms)': min.toFixed(2),
111144
'Max (ms)': max.toFixed(2),
112-
'StdDev (ms)': stdDev.toFixed(2),
113145
'CV (%)': cv.toFixed(2),
114-
'Samples': times.length
115-
});
146+
'Samples': filtered.length + (outliers > 0 ? ` (-${outliers})` : '')
147+
})
116148
}
117149

118150
return { formattedResults, consoleTable };
@@ -283,7 +315,7 @@ async function singleRun(results, nameFilter, iterations, identityConfig) {
283315
return;
284316
}
285317
// Warmup to reduce JIT/IC noise.
286-
body();
318+
body()
287319
if (typeof globalThis.gc === "function") {
288320
globalThis.gc();
289321
}
@@ -900,7 +932,7 @@ async function runUntilStable(results, options, width, nameFilter, filterArg, it
900932
// Update progress with estimated completion
901933
updateProgress(runs, maxRuns, "Benchmark Progress:", width);
902934

903-
await singleRun(results, nameFilter, iterations, identityConfig)
935+
await singleRun(results, nameFilter, iterations, identityConfig);
904936
runs++;
905937

906938
if (runs === 1 && Object.keys(results).length === 0) {
@@ -973,7 +1005,7 @@ Options:
9731005
--filter=PATTERN Filter benchmarks by name (substring or /regex/flags)
9741006
--identity-mode=MODE Identity benchmarks: off, none, pointer, both (default: off)
9751007
--identity-iterations=N Iterations for identity benchmarks (default: 1000000)
976-
--identity-reuse-pools=N,N Pool sizes for reuse scenarios (default: 1)
1008+
--identity-reuse-pools=N,N Pool sizes for reuse scenarios (default: 1,8,64)
9771009
--identity-memory Enable memory profiling for identity benchmarks
9781010
--help Show this help message
9791011
`);

0 commit comments

Comments
 (0)