Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions benchmarks/pandas/bench_math_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Benchmark: math_ops — abs / round on Series and DataFrame of 100k rows."""
import json, time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series(np.where(np.arange(SIZE) % 2 == 0, -(np.arange(SIZE) + 0.567), np.arange(SIZE) + 0.567))
df = pd.DataFrame({
"a": -(np.arange(SIZE) + 0.123),
"b": np.arange(SIZE) + 0.456,
})

for _ in range(WARMUP):
s.abs()
df.abs()
s.round(1)
df.round(1)

start = time.perf_counter()
for _ in range(ITERATIONS):
s.abs()
df.abs()
s.round(1)
df.round(1)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "math_ops",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
42 changes: 42 additions & 0 deletions benchmarks/pandas/bench_na_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Benchmark: na_ops — isna / notna / ffill / bfill on 100k rows."""
import json, time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

data = pd.array([i if i % 5 != 0 else pd.NA for i in range(SIZE)], dtype="Int64")
s = pd.Series(data, dtype="float64")
s[np.arange(SIZE) % 5 == 0] = np.nan

df = pd.DataFrame({
"a": s,
"b": pd.Series([float(i * 2) if i % 7 != 0 else np.nan for i in range(SIZE)]),
})

for _ in range(WARMUP):
pd.isna(s)
pd.notna(s)
s.ffill()
s.bfill()
df.ffill()
df.bfill()

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.isna(s)
pd.notna(s)
s.ffill()
s.bfill()
df.ffill()
df.bfill()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "na_ops",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
36 changes: 36 additions & 0 deletions benchmarks/pandas/bench_notna_boolean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Benchmark: notna_boolean — boolean-mask indexing on 100k rows."""
import json, time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series(np.arange(SIZE))
mask = pd.Series(np.arange(SIZE) % 2 == 0)
bool_arr = np.arange(SIZE) % 3 != 0

df = pd.DataFrame({
"a": np.arange(SIZE),
"b": np.arange(SIZE) * 2,
})

for _ in range(WARMUP):
s[mask]
s[~mask]
df[bool_arr]

start = time.perf_counter()
for _ in range(ITERATIONS):
s[mask]
s[~mask]
df[bool_arr]
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "notna_boolean",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
52 changes: 52 additions & 0 deletions benchmarks/pandas/bench_read_html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Benchmark: pd.read_html — parse HTML tables into DataFrames.
Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import math

try:
import pandas as pd
except ImportError:
import subprocess, sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "--quiet"])
import pandas as pd

try:
import lxml # noqa: F401
except ImportError:
import subprocess, sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "lxml", "--quiet"])

ROWS = 1_000
WARMUP = 3
ITERATIONS = 20


def build_html(rows: int) -> str:
header = "<tr><th>id</th><th>name</th><th>value</th><th>score</th></tr>"
body_rows = [
f"<tr><td>{i}</td><td>item_{i % 100}</td><td>{i * 1.5:.2f}</td><td>{math.sin(i * 0.01):.6f}</td></tr>"
for i in range(rows)
]
return f"<table><thead>{header}</thead><tbody>{''.join(body_rows)}</tbody></table>"


html = build_html(ROWS)

# Warm-up
for _ in range(WARMUP):
pd.read_html(html)

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.read_html(html)
total_ms = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "read_html",
"mean_ms": total_ms / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total_ms,
}))
37 changes: 37 additions & 0 deletions benchmarks/pandas/bench_reduce_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Benchmark: reduce_ops — nunique / any / all on Series and DataFrame of 100k rows."""
import json, time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series(np.arange(SIZE) % 1000)
bool_s = pd.Series(np.arange(SIZE) > 0)
df = pd.DataFrame({
"a": np.arange(SIZE) % 500,
"b": np.arange(SIZE) % 200,
"c": np.arange(SIZE) % 100,
})

for _ in range(WARMUP):
s.nunique()
bool_s.any()
bool_s.all()
df.nunique()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.nunique()
bool_s.any()
bool_s.all()
df.nunique()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "reduce_ops",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
36 changes: 36 additions & 0 deletions benchmarks/pandas/bench_rename_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Benchmark: rename_ops — rename / add_prefix / add_suffix on Series/DataFrame of 100k rows."""
import json, time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series(np.arange(SIZE), index=[f"row_{i}" for i in range(SIZE)])
df = pd.DataFrame({
"col_a": np.arange(SIZE),
"col_b": np.arange(SIZE) * 2,
"col_c": np.arange(SIZE) * 3,
})

for _ in range(WARMUP):
s.rename(lambda lbl: f"new_{lbl}")
df.rename(columns={"col_a": "a", "col_b": "b"})
df.add_prefix("pre_")
df.add_suffix("_suf")

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rename(lambda lbl: f"new_{lbl}")
df.rename(columns={"col_a": "a", "col_b": "b"})
df.add_prefix("pre_")
df.add_suffix("_suf")
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "rename_ops",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
28 changes: 28 additions & 0 deletions benchmarks/pandas/bench_value_counts_full.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Benchmark: value_counts_full — value_counts(bins=N) on Series of 100k rows."""
import json, time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

rng = np.random.default_rng(42)
s = pd.Series(rng.random(SIZE) * 100)

for _ in range(WARMUP):
s.value_counts(bins=10)
s.value_counts(bins=20)

start = time.perf_counter()
for _ in range(ITERATIONS):
s.value_counts(bins=10)
s.value_counts(bins=20)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "value_counts_full",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
32 changes: 32 additions & 0 deletions benchmarks/pandas/bench_window_extended.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Benchmark: window_extended — rolling sem/skew/kurt/quantile on 100k rows."""
import json, time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 3
ITERATIONS = 20
WINDOW = 10

s = pd.Series(np.sin(np.arange(SIZE) / 100) * 100 + np.arange(SIZE) * 0.001)

for _ in range(WARMUP):
s.rolling(WINDOW).sem()
s.rolling(WINDOW).skew()
s.rolling(WINDOW).kurt()
s.rolling(WINDOW).quantile(0.5)

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(WINDOW).sem()
s.rolling(WINDOW).skew()
s.rolling(WINDOW).kurt()
s.rolling(WINDOW).quantile(0.5)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "window_extended",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
40 changes: 40 additions & 0 deletions benchmarks/tsb/bench_math_ops.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* Benchmark: math_ops — absSeries / absDataFrame / roundSeries / roundDataFrame on 100k rows.
* Outputs JSON: {"function": "math_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
*/
import { Series, DataFrame, absSeries, absDataFrame, roundSeries, roundDataFrame } from "../../src/index.ts";

const SIZE = 100_000;
const WARMUP = 5;
const ITERATIONS = 50;

const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i % 2 === 0 ? -(i + 0.567) : i + 0.567)) });
const df = DataFrame.fromColumns({
a: Array.from({ length: SIZE }, (_, i) => -(i + 0.123)),
b: Array.from({ length: SIZE }, (_, i) => i + 0.456),
});

for (let i = 0; i < WARMUP; i++) {
absSeries(s);
absDataFrame(df);
roundSeries(s, 1);
roundDataFrame(df, 1);
}

const start = performance.now();
for (let i = 0; i < ITERATIONS; i++) {
absSeries(s);
absDataFrame(df);
roundSeries(s, 1);
roundDataFrame(df, 1);
}
const total = performance.now() - start;

console.log(
JSON.stringify({
function: "math_ops",
mean_ms: total / ITERATIONS,
iterations: ITERATIONS,
total_ms: total,
}),
);
47 changes: 47 additions & 0 deletions benchmarks/tsb/bench_na_ops.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* Benchmark: na_ops — isna / notna / ffillSeries / bfillSeries on 100k rows.
* Outputs JSON: {"function": "na_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
*/
import { Series, DataFrame, isna, notna, ffillSeries, bfillSeries, dataFrameFfill, dataFrameBfill } from "../../src/index.ts";

const SIZE = 100_000;
const WARMUP = 5;
const ITERATIONS = 50;

const data: (number | null)[] = Array.from({ length: SIZE }, (_, i) =>
i % 5 === 0 ? null : i,
);
const s = new Series({ data });
const df = DataFrame.fromColumns({
a: data,
b: Array.from({ length: SIZE }, (_, i) => (i % 7 === 0 ? null : i * 2)),
});

for (let i = 0; i < WARMUP; i++) {
isna(s);
notna(s);
ffillSeries(s);
bfillSeries(s);
dataFrameFfill(df);
dataFrameBfill(df);
}

const start = performance.now();
for (let i = 0; i < ITERATIONS; i++) {
isna(s);
notna(s);
ffillSeries(s);
bfillSeries(s);
dataFrameFfill(df);
dataFrameBfill(df);
}
const total = performance.now() - start;

console.log(
JSON.stringify({
function: "na_ops",
mean_ms: total / ITERATIONS,
iterations: ITERATIONS,
total_ms: total,
}),
);
Loading