vortex-data · gatesn · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/.github/scripts/run-sql-bench.sh b/.github/scripts/run-sql-bench.sh
@@ -107,6 +107,7 @@ if [[ -n "$df_formats" ]]; then
     # shellcheck disable=SC2086
     target/release_debug/datafusion-bench "$subcommand" \
         -d gh-json \
+        --show-session-metrics \
         --formats "$df_formats" \
         $opts \
         -o df-results.json
@@ -118,6 +119,7 @@ if [[ -n "$ddb_formats" ]]; then
     # shellcheck disable=SC2086
     target/release_debug/duckdb-bench "$subcommand" \
         -d gh-json \
+        --show-session-metrics \
         --formats "$ddb_formats" \
         $opts \
         --delete-duckdb-database \
@@ -131,6 +133,7 @@ if ! $is_remote && [[ "$has_lance" == "true" ]] && [[ -f "target/release_debug/l
     # shellcheck disable=SC2086
     target/release_debug/lance-bench "$subcommand" \
         -d gh-json \
+        --show-session-metrics \
         $opts \
         -o lance-results.json
 

diff --git a/AGENTS.md b/AGENTS.md
@@ -10,6 +10,9 @@
   think you've finished work.
 * run `cargo xtask public-api` to re-generate the public API lock files. Please do this every time you reach a stopping
   point or think you've finished work.
+* exception: for trivial, isolated changes that cannot affect formatting or linting (for example constant-only edits
+  with no import/signature/control-flow changes), skip full-workspace `fmt`/`clippy`/`public-api` sweeps unless
+  explicitly requested; prefer targeted validation (for example crate-level `cargo check`/tests) only.
 * you can try running
   `cargo fix --lib --allow-dirty --allow-staged && cargo clippy --fix --lib --allow-dirty --allow-staged` to
   automatically many fix minor errors.
@@ -64,4 +67,4 @@
 
 ## Commits
 
-* All commits must be signed of by the committers in the form `Signed-off-by: "COMMITTER" <COMMITTER_EMAIL>`.
+* All commits must be signed of by the committers in the form `Signed-off-by: "COMMITTER" <COMMITTER_EMAIL>`.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/bench-orchestrator/README.md b/bench-orchestrator/README.md
@@ -52,6 +52,7 @@ vx-bench run <benchmark> [options]
 - `--iterations, -i`: Iterations per query (default: 5)
 - `--label, -l`: Label for this run (useful for later reference)
 - `--track-memory`: Enable memory usage tracking
+- `--allocator`: Host allocator mode: `pooled`, `default`, or `ab` (run both in one run)
 - `--build/--no-build`: Build binaries before running (default: build)
 
 ### `compare` - Compare Results
@@ -71,9 +72,9 @@ vx-bench compare [options]
 - `--format`: Filter results to a specific format
 - `--threshold`: Significance threshold (default: 0.10 = 10%)
 
-**Within-run comparison** (`--run`): Compares different engine:format combinations within a single run. Output shows one row per query, with columns for each engine:format combo.
+**Within-run comparison** (`--run`): Compares different engine:format (or engine:format:allocator) combinations within a single run. Output shows one row per query, with columns for each combo.
 
-**Multi-run comparison** (`--runs`): Compares the same benchmarks across multiple runs. Output shows one row per (query, engine, format) combination, with columns for each run.
+**Multi-run comparison** (`--runs`): Compares the same benchmarks across multiple runs. Output shows one row per (query, engine, format[, allocator]) combination, with columns for each run.
 
 ### `list` - List Benchmark Runs
 

diff --git a/bench-orchestrator/bench_orchestrator/cli.py b/bench-orchestrator/bench_orchestrator/cli.py
@@ -4,7 +4,7 @@
 """CLI for benchmark orchestration."""
 
 from datetime import datetime, timedelta
-from typing import Annotated
+from typing import Annotated, Literal
 
 import pandas as pd
 import typer
@@ -76,19 +76,48 @@ def run(
     iterations: Annotated[int, typer.Option("--iterations", "-i", help="Iterations per query")] = 5,
     label: Annotated[str | None, typer.Option("--label", "-l", help="Label for this run")] = None,
     track_memory: Annotated[bool, typer.Option("--track-memory", help="Track memory usage")] = False,
+    show_session_metrics: Annotated[
+        bool, typer.Option("--show-session-metrics", help="Print session metrics after each engine run")
+    ] = False,
     samply: Annotated[bool, typer.Option("--samply", help="Record a profile using samply")] = False,
     sample_rate: Annotated[int, typer.Option("--sample-rate", help="Sample rate to run samply with")] = None,
     tracing: Annotated[bool, typer.Option("--tracing", help="Record a trace for use with perfetto")] = False,
     build: Annotated[bool, typer.Option("--build/--no-build", help="Build binaries before running")] = True,
     verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Log underlying commands")] = False,
     options: Annotated[list[str] | None, typer.Option("--opt", help="Engine or benchmark specific options")] = None,
+    allocator: Annotated[
+        str,
+        typer.Option(
+            "--allocator",
+            help="Host allocator mode: pooled, default, or ab (run both and compare)",
+        ),
+    ] = "pooled",
 ) -> None:
     """Run benchmarks with specified configuration."""
     engines = parse_engines(engine)
     formats = parse_formats(format)
     query_list = parse_queries(queries)
     exclude_list = parse_queries(exclude_queries)
     bench_opts = {}
+    allocator_mode_value = allocator.strip().lower()
+    if allocator_mode_value not in {"pooled", "default", "ab"}:
+        console.print("[red]--allocator must be one of: pooled, default, ab[/red]")
+        raise typer.Exit(1)
+
+    allocator_mode: Literal["pooled", "default", "ab"]
+    if allocator_mode_value == "default":
+        allocator_mode = "default"
+    elif allocator_mode_value == "pooled":
+        allocator_mode = "pooled"
+    else:
+        allocator_mode = "ab"
+
+    allocator_variants: list[Literal["pooled", "default"]]
+    if allocator_mode == "ab":
+        allocator_variants = ["default", "pooled"]
+    else:
+        allocator_variants = [allocator_mode]
+
     # Build options dict
     if options:
         for opt in options:
@@ -108,6 +137,7 @@ def run(
         label=label,
         options=bench_opts,
         track_memory=track_memory,
+        allocator_mode=allocator_mode,
     )
 
     # Validate configuration
@@ -135,6 +165,7 @@ def run(
     console.print(f"  Engines: {', '.join(e.value for e in engines)}")
     console.print(f"  Formats: {', '.join(f.value for f in formats)}")
     console.print(f"  Iterations: {iterations}")
+    console.print(f"  Allocator: {allocator_mode}")
     if label:
         console.print(f"  Label: {label}")
     console.print()
@@ -153,20 +184,27 @@ def run(
             executor = BenchmarkExecutor(binary_paths[eng], eng, verbose=verbose)
 
             try:
-                results = executor.run(
-                    benchmark=benchmark,
-                    formats=engine_formats,
-                    queries=query_list,
-                    exclude_queries=exclude_list,
-                    iterations=iterations,
-                    options=bench_opts,
-                    track_memory=track_memory,
-                    samply=samply,
-                    sample_rate=sample_rate,
-                    tracing=tracing,
-                    on_result=ctx.write_raw_json,
-                )
-                console.print(f"[green]{eng.value}: {len(results)} results[/green]")
+                total_results = 0
+                for allocator_variant in allocator_variants:
+                    results = executor.run(
+                        benchmark=benchmark,
+                        formats=engine_formats,
+                        queries=query_list,
+                        exclude_queries=exclude_list,
+                        iterations=iterations,
+                        options=bench_opts,
+                        track_memory=track_memory,
+                        show_session_metrics=show_session_metrics,
+                        samply=samply,
+                        sample_rate=sample_rate,
+                        tracing=tracing,
+                        allocator_mode=allocator_variant,
+                        on_result=ctx.write_raw_json,
+                    )
+                    total_results += len(results)
+                    console.print(f"[green]{eng.value} ({allocator_variant}): {len(results)} results[/green]")
+                if allocator_mode == "ab":
+                    console.print(f"[green]{eng.value}: {total_results} total results[/green]")
             except RuntimeError as e:
                 console.print(f"[red]{eng.value} failed: {e}[/red]")
 
@@ -321,7 +359,10 @@ def compare(
             console.print(f"[red]{e}[/red]")
             raise typer.Exit(1)
 
-        table = pivot_comparison_table(pivot, threshold, row_keys=["query", "engine", "format"])
+        row_keys = ["query", "engine", "format"]
+        if "allocator" in pivot.df.columns:
+            row_keys.append("allocator")
+        table = pivot_comparison_table(pivot, threshold, row_keys=row_keys)
         console.print(table)
         return
 

diff --git a/bench-orchestrator/bench_orchestrator/comparison/analyzer.py b/bench-orchestrator/bench_orchestrator/comparison/analyzer.py
@@ -61,6 +61,7 @@ def extract_target_fields(df: pd.DataFrame) -> pd.DataFrame:
         if isinstance(first_target, dict):
             df["engine"] = df["target"].apply(lambda t: t.get("engine", "") if isinstance(t, dict) else "")
             df["format"] = df["target"].apply(lambda t: t.get("format", "") if isinstance(t, dict) else "")
+            df["allocator"] = df["target"].apply(lambda t: t.get("allocator", "") if isinstance(t, dict) else "")
 
     # Extract query number from name if present
     if "name" in df.columns:
@@ -196,9 +197,15 @@ def compare_within_run(
     if filter_format is not None and "format" in df.columns:
         df = df[df["format"] == filter_format]
 
-    # Find unique engine:format combinations
-    combos_df = df.groupby(["engine", "format"]).size().reset_index()[["engine", "format"]]
-    columns = [f"{row['engine']}:{row['format']}" for _, row in combos_df.iterrows()]
+    has_allocator = "allocator" in df.columns and df["allocator"].nunique(dropna=True) > 1
+
+    # Find unique engine:format[:allocator] combinations.
+    combo_keys = ["engine", "format"] + (["allocator"] if has_allocator else [])
+    combos_df = df.groupby(combo_keys).size().reset_index()[combo_keys]
+    if has_allocator:
+        columns = [f"{row['engine']}:{row['format']}:{row['allocator']}" for _, row in combos_df.iterrows()]
+    else:
+        columns = [f"{row['engine']}:{row['format']}" for _, row in combos_df.iterrows()]
 
     if len(columns) < 2:
         raise ValueError("Need at least 2 engine:format combinations to compare")
@@ -208,10 +215,17 @@ def compare_within_run(
         baseline_engine = combos_df.iloc[0]["engine"]
         baseline_format = combos_df.iloc[0]["format"]
 
-    baseline_key = f"{baseline_engine}:{baseline_format}"
+    if has_allocator:
+        baseline_allocator = combos_df.iloc[0]["allocator"]
+        baseline_key = f"{baseline_engine}:{baseline_format}:{baseline_allocator}"
+    else:
+        baseline_key = f"{baseline_engine}:{baseline_format}"
 
-    # Create engine:format column
-    df["combo"] = df["engine"] + ":" + df["format"]
+    # Create combo column.
+    if has_allocator:
+        df["combo"] = df["engine"] + ":" + df["format"] + ":" + df["allocator"]
+    else:
+        df["combo"] = df["engine"] + ":" + df["format"]
 
     # Pivot to get queries as rows, combos as columns
     pivot = df.pivot_table(index="query", columns="combo", values="value", aggfunc="mean")
@@ -268,8 +282,11 @@ def compare_runs(
     # Combine all runs
     combined: pd.DataFrame = pd.concat(processed, ignore_index=True)
 
-    # Pivot to get (query, engine, format) as rows, runs as columns
-    pivot = combined.pivot_table(index=["query", "engine", "format"], columns="run", values="value", aggfunc="mean")
+    # Pivot to get (query, engine, format[, allocator]) as rows, runs as columns.
+    index = ["query", "engine", "format"]
+    if "allocator" in combined.columns and combined["allocator"].nunique(dropna=True) > 1:
+        index.append("allocator")
+    pivot = combined.pivot_table(index=index, columns="run", values="value", aggfunc="mean")
 
     # Deduplicate labels while preserving order (two runs can share a label).
     unique_labels = list(dict.fromkeys(labels))

diff --git a/bench-orchestrator/bench_orchestrator/config.py b/bench-orchestrator/bench_orchestrator/config.py
@@ -6,6 +6,7 @@
 from dataclasses import dataclass, field
 from enum import Enum
 from pathlib import Path
+from typing import Literal
 
 
 class Engine(Enum):
@@ -78,6 +79,7 @@ class RunConfig:
     label: str | None = None
     options: dict[str, str] = field(default_factory=dict)
     track_memory: bool = False
+    allocator_mode: Literal["pooled", "default", "ab"] = "pooled"
 
     def validate(self) -> list[str]:
         """Validate the configuration and return any warnings."""

diff --git a/bench-orchestrator/bench_orchestrator/runner/executor.py b/bench-orchestrator/bench_orchestrator/runner/executor.py
@@ -3,10 +3,12 @@
 
 """Benchmark binary execution."""
 
+import json
+import os
 import subprocess
 from collections.abc import Callable
 from pathlib import Path
-from typing import final
+from typing import Literal, final
 
 from rich.console import Console
 from rich.progress import Progress, SpinnerColumn, TextColumn
@@ -34,9 +36,11 @@ def run(
         iterations: int = 5,
         options: dict[str, str] | None = None,
         track_memory: bool = False,
+        show_session_metrics: bool = False,
         samply: bool = False,
         sample_rate: int | None = None,
         tracing: bool = False,
+        allocator_mode: Literal["pooled", "default"] = "pooled",
         on_result: Callable[[str], None] | None = None,
     ) -> list[str]:
         """
@@ -50,6 +54,8 @@ def run(
             iterations: Number of runs per query
             options: Additional options (e.g., scale_factor)
             track_memory: Enable memory tracking
+            show_session_metrics: Print session metrics at the end of the benchmark process
+            allocator_mode: Host allocator mode ("pooled" or "default")
             on_result: Callback for each result line (for streaming)
 
         Returns:
@@ -73,6 +79,8 @@ def run(
             cmd.extend(["--exclude-queries", ",".join(map(str, exclude_queries))])
         if track_memory:
             cmd.append("--track-memory")
+        if show_session_metrics:
+            cmd.append("--show-session-metrics")
         if tracing:
             cmd.append("--tracing")
         if options:
@@ -83,7 +91,7 @@ def run(
             cmd = ["--"] + cmd
             cmd_prefix = ["samply", "record"]
             if sample_rate:
-                cmd = cmd_prefix + ["--rate", sample_rate] + cmd
+                cmd = cmd_prefix + ["--rate", str(sample_rate)] + cmd
             else:
                 cmd = cmd_prefix + cmd
 
@@ -94,6 +102,9 @@ def run(
         if self.verbose:
             console.print(f"[dim]$ {' '.join(cmd)}[/dim]")
 
+        env = os.environ.copy()
+        env["VORTEX_HOST_ALLOCATOR"] = allocator_mode
+
         results = []
 
         with Progress(
@@ -108,11 +119,20 @@ def run(
                 cmd,
                 stdout=subprocess.PIPE,
                 text=True,
+                env=env,
             )
 
             for line in iter(process.stdout.readline, ""):
                 line = line.strip()
                 if line:
+                    if line.startswith("{"):
+                        try:
+                            payload = json.loads(line)
+                            if isinstance(payload.get("target"), dict):
+                                payload["target"]["allocator"] = allocator_mode
+                            line = json.dumps(payload)
+                        except json.JSONDecodeError:
+                            pass
                     results.append(line)
                     if on_result:
                         on_result(line)

diff --git a/bench-orchestrator/bench_orchestrator/storage/store.py b/bench-orchestrator/bench_orchestrator/storage/store.py
@@ -139,7 +139,10 @@ def create_run(self, config: RunConfig, build_config: BuildConfig) -> Iterator[R
             timestamp=datetime.now(),
             label=config.label,
             benchmark=config.benchmark.value,
-            dataset_config=config.options,
+            dataset_config={
+                **config.options,
+                "allocator_mode": config.allocator_mode,
+            },
             engines=[e.value for e in config.engines],
             formats=[f.value for f in config.formats],
             queries=config.queries or [],