githubnext · github-actions · May 18, 2026 · May 18, 2026 · May 26, 2026
diff --git a/benchmarks/pandas/bench_math_ops.py b/benchmarks/pandas/bench_math_ops.py
@@ -0,0 +1,35 @@
+"""Benchmark: math_ops — abs / round on Series and DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.where(np.arange(SIZE) % 2 == 0, -(np.arange(SIZE) + 0.567), np.arange(SIZE) + 0.567))
+df = pd.DataFrame({
+    "a": -(np.arange(SIZE) + 0.123),
+    "b": np.arange(SIZE) + 0.456,
+})
+
+for _ in range(WARMUP):
+    s.abs()
+    df.abs()
+    s.round(1)
+    df.round(1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.abs()
+    df.abs()
+    s.round(1)
+    df.round(1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "math_ops",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_na_ops.py b/benchmarks/pandas/bench_na_ops.py
@@ -0,0 +1,42 @@
+"""Benchmark: na_ops — isna / notna / ffill / bfill on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = pd.array([i if i % 5 != 0 else pd.NA for i in range(SIZE)], dtype="Int64")
+s = pd.Series(data, dtype="float64")
+s[np.arange(SIZE) % 5 == 0] = np.nan
+
+df = pd.DataFrame({
+    "a": s,
+    "b": pd.Series([float(i * 2) if i % 7 != 0 else np.nan for i in range(SIZE)]),
+})
+
+for _ in range(WARMUP):
+    pd.isna(s)
+    pd.notna(s)
+    s.ffill()
+    s.bfill()
+    df.ffill()
+    df.bfill()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.isna(s)
+    pd.notna(s)
+    s.ffill()
+    s.bfill()
+    df.ffill()
+    df.bfill()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "na_ops",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_notna_boolean.py b/benchmarks/pandas/bench_notna_boolean.py
@@ -0,0 +1,36 @@
+"""Benchmark: notna_boolean — boolean-mask indexing on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE))
+mask = pd.Series(np.arange(SIZE) % 2 == 0)
+bool_arr = np.arange(SIZE) % 3 != 0
+
+df = pd.DataFrame({
+    "a": np.arange(SIZE),
+    "b": np.arange(SIZE) * 2,
+})
+
+for _ in range(WARMUP):
+    s[mask]
+    s[~mask]
+    df[bool_arr]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s[mask]
+    s[~mask]
+    df[bool_arr]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "notna_boolean",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_read_html.py b/benchmarks/pandas/bench_read_html.py
@@ -0,0 +1,52 @@
+"""
+Benchmark: pd.read_html — parse HTML tables into DataFrames.
+Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import math
+
+try:
+    import pandas as pd
+except ImportError:
+    import subprocess, sys
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "--quiet"])
+    import pandas as pd
+
+try:
+    import lxml  # noqa: F401
+except ImportError:
+    import subprocess, sys
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "lxml", "--quiet"])
+
+ROWS = 1_000
+WARMUP = 3
+ITERATIONS = 20
+
+
+def build_html(rows: int) -> str:
+    header = "<tr><th>id</th><th>name</th><th>value</th><th>score</th></tr>"
+    body_rows = [
+        f"<tr><td>{i}</td><td>item_{i % 100}</td><td>{i * 1.5:.2f}</td><td>{math.sin(i * 0.01):.6f}</td></tr>"
+        for i in range(rows)
+    ]
+    return f"<table><thead>{header}</thead><tbody>{''.join(body_rows)}</tbody></table>"
+
+
+html = build_html(ROWS)
+
+# Warm-up
+for _ in range(WARMUP):
+    pd.read_html(html)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.read_html(html)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "read_html",
+    "mean_ms": total_ms / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_reduce_ops.py b/benchmarks/pandas/bench_reduce_ops.py
@@ -0,0 +1,37 @@
+"""Benchmark: reduce_ops — nunique / any / all on Series and DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE) % 1000)
+bool_s = pd.Series(np.arange(SIZE) > 0)
+df = pd.DataFrame({
+    "a": np.arange(SIZE) % 500,
+    "b": np.arange(SIZE) % 200,
+    "c": np.arange(SIZE) % 100,
+})
+
+for _ in range(WARMUP):
+    s.nunique()
+    bool_s.any()
+    bool_s.all()
+    df.nunique()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.nunique()
+    bool_s.any()
+    bool_s.all()
+    df.nunique()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "reduce_ops",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_rename_ops.py b/benchmarks/pandas/bench_rename_ops.py
@@ -0,0 +1,36 @@
+"""Benchmark: rename_ops — rename / add_prefix / add_suffix on Series/DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE), index=[f"row_{i}" for i in range(SIZE)])
+df = pd.DataFrame({
+    "col_a": np.arange(SIZE),
+    "col_b": np.arange(SIZE) * 2,
+    "col_c": np.arange(SIZE) * 3,
+})
+
+for _ in range(WARMUP):
+    s.rename(lambda lbl: f"new_{lbl}")
+    df.rename(columns={"col_a": "a", "col_b": "b"})
+    df.add_prefix("pre_")
+    df.add_suffix("_suf")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.rename(lambda lbl: f"new_{lbl}")
+    df.rename(columns={"col_a": "a", "col_b": "b"})
+    df.add_prefix("pre_")
+    df.add_suffix("_suf")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "rename_ops",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_value_counts_full.py b/benchmarks/pandas/bench_value_counts_full.py
@@ -0,0 +1,28 @@
+"""Benchmark: value_counts_full — value_counts(bins=N) on Series of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+rng = np.random.default_rng(42)
+s = pd.Series(rng.random(SIZE) * 100)
+
+for _ in range(WARMUP):
+    s.value_counts(bins=10)
+    s.value_counts(bins=20)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.value_counts(bins=10)
+    s.value_counts(bins=20)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "value_counts_full",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_window_extended.py b/benchmarks/pandas/bench_window_extended.py
@@ -0,0 +1,32 @@
+"""Benchmark: window_extended — rolling sem/skew/kurt/quantile on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 3
+ITERATIONS = 20
+WINDOW = 10
+
+s = pd.Series(np.sin(np.arange(SIZE) / 100) * 100 + np.arange(SIZE) * 0.001)
+
+for _ in range(WARMUP):
+    s.rolling(WINDOW).sem()
+    s.rolling(WINDOW).skew()
+    s.rolling(WINDOW).kurt()
+    s.rolling(WINDOW).quantile(0.5)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.rolling(WINDOW).sem()
+    s.rolling(WINDOW).skew()
+    s.rolling(WINDOW).kurt()
+    s.rolling(WINDOW).quantile(0.5)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "window_extended",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/tsb/bench_math_ops.ts b/benchmarks/tsb/bench_math_ops.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: math_ops — absSeries / absDataFrame / roundSeries / roundDataFrame on 100k rows.
+ * Outputs JSON: {"function": "math_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, absSeries, absDataFrame, roundSeries, roundDataFrame } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i % 2 === 0 ? -(i + 0.567) : i + 0.567)) });
+const df = DataFrame.fromColumns({
+  a: Array.from({ length: SIZE }, (_, i) => -(i + 0.123)),
+  b: Array.from({ length: SIZE }, (_, i) => i + 0.456),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+  absSeries(s);
+  absDataFrame(df);
+  roundSeries(s, 1);
+  roundDataFrame(df, 1);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  absSeries(s);
+  absDataFrame(df);
+  roundSeries(s, 1);
+  roundDataFrame(df, 1);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "math_ops",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_na_ops.ts b/benchmarks/tsb/bench_na_ops.ts
@@ -0,0 +1,47 @@
+/**
+ * Benchmark: na_ops — isna / notna / ffillSeries / bfillSeries on 100k rows.
+ * Outputs JSON: {"function": "na_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, isna, notna, ffillSeries, bfillSeries, dataFrameFfill, dataFrameBfill } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const data: (number | null)[] = Array.from({ length: SIZE }, (_, i) =>
+  i % 5 === 0 ? null : i,
+);
+const s = new Series({ data });
+const df = DataFrame.fromColumns({
+  a: data,
+  b: Array.from({ length: SIZE }, (_, i) => (i % 7 === 0 ? null : i * 2)),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+  isna(s);
+  notna(s);
+  ffillSeries(s);
+  bfillSeries(s);
+  dataFrameFfill(df);
+  dataFrameBfill(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  isna(s);
+  notna(s);
+  ffillSeries(s);
+  bfillSeries(s);
+  dataFrameFfill(df);
+  dataFrameBfill(df);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "na_ops",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);