From 10939a6117f1eaef46d7b88513dafcc0b0ac236a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 3 Jun 2026 16:46:17 +0000
Subject: [PATCH] bench: sweep bit-packed compare across all int types and bit
 widths

Add `bitpack_compare_sweep`, which exercises the public `array.binary(rhs,
op)` compare-against-constant path over all eight integer types and every
valid bit width (64Ki in-range elements per case, no patches). It isolates
the `<BitPacked as CompareKernel>` unpack + per-element compare kernel so a
kernel change shows up as a CodSpeed diff.

Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
---
 encodings/fastlanes/Cargo.toml                |   4 +
 .../benches/bitpack_compare_sweep.rs          | 111 ++++++++++++++++++
 2 files changed, 115 insertions(+)
 create mode 100644 encodings/fastlanes/benches/bitpack_compare_sweep.rs
diff --git a/encodings/fastlanes/Cargo.toml b/encodings/fastlanes/Cargo.toml
index 08c96c481d7..e0aa5cbb724 100644
--- a/encodings/fastlanes/Cargo.toml
+++ b/encodings/fastlanes/Cargo.toml
@@ -64,6 +64,10 @@ required-features = ["_test-harness"]
 name = "bitpack_compare"
 harness = false
 
+[[bench]]
+name = "bitpack_compare_sweep"
+harness = false
+
 [[bench]]
 name = "cast_bitpacked"
 harness = false
diff --git a/encodings/fastlanes/benches/bitpack_compare_sweep.rs b/encodings/fastlanes/benches/bitpack_compare_sweep.rs
new file mode 100644
index 00000000000..3eb0ba3b9a2
--- /dev/null
+++ b/encodings/fastlanes/benches/bitpack_compare_sweep.rs
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Sweeps the public `BitPackedArray` compare-against-constant path (`array.binary(rhs, op)`) over
+//! every integer type and every valid bit width, so a kernel change shows up as a CodSpeed diff.
+//!
+//! The array holds in-range values (no patches, no out-of-range fast path), so each iteration runs
+//! the full unpack + per-element compare kernel that backs `<BitPacked as CompareKernel>`.
+//!
+//! Run with `cargo bench -p vortex-fastlanes --bench bitpack_compare_sweep`.
+
+#![expect(clippy::unwrap_used)]
+#![expect(clippy::cast_possible_truncation)]
+
+use divan::Bencher;
+use divan::counter::ItemsCount;
+use vortex_array::ArrayRef;
+use vortex_array::ExecutionCtx;
+use vortex_array::IntoArray;
+use vortex_array::LEGACY_SESSION;
+use vortex_array::VortexSessionExecute;
+use vortex_array::arrays::BoolArray;
+use vortex_array::arrays::ConstantArray;
+use vortex_array::arrays::PrimitiveArray;
+use vortex_array::builtins::ArrayBuiltins;
+use vortex_array::dtype::NativePType;
+use vortex_array::scalar::Scalar;
+use vortex_array::scalar_fn::fns::operators::Operator;
+use vortex_array::validity::Validity;
+use vortex_buffer::BufferMut;
+use vortex_fastlanes::BitPackedData;
+
+fn main() {
+    divan::main();
+}
+
+/// Number of elements per benchmarked array (64 full FastLanes blocks).
+const LEN: usize = 64 * 1024;
+
+/// Operator under test. `Lt` exercises the full unpack + per-element comparison path.
+const OP: Operator = Operator::Lt;
+
+/// Integer types we can build packed arrays for in the benchmark.
+trait BenchInt: NativePType + Copy + Into<Scalar> {
+    /// Build an in-range value from a small counter.
+    fn from_counter(v: u64) -> Self;
+}
+
+macro_rules! impl_bench_int {
+    ($($T:ty),+) => {
+        $(impl BenchInt for $T {
+            #[inline]
+            fn from_counter(v: u64) -> Self {
+                v as $T
+            }
+        })+
+    };
+}
+
+impl_bench_int!(u8, u16, u32, u64, i8, i16, i32, i64);
+
+/// Encode `LEN` in-range values of type `T` at the given bit width, returning the packed array, a
+/// mid-range constant to compare against, and an execution context.
+fn setup<T: BenchInt>(width: usize) -> (ArrayRef, ArrayRef, ExecutionCtx) {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let cap = 1u64 << width;
+    let buf: BufferMut<T> = (0..LEN)
+        .map(|i| T::from_counter((i as u64) % cap))
+        .collect();
+    let array = BitPackedData::encode(
+        &PrimitiveArray::new(buf.freeze(), Validity::NonNullable).into_array(),
+        width as u8,
+        &mut ctx,
+    )
+    .unwrap()
+    .into_array();
+    let rhs = ConstantArray::new(T::from_counter(cap / 2), LEN).into_array();
+    (array, rhs, ctx)
+}
+
+/// Generate a compare benchmark over every valid bit width for one type. Valid widths are
+/// `1..native_bits` - bit-packing requires the target width to be strictly narrower than the type.
+macro_rules! bench_type {
+    ($modname:ident, $T:ty, $native_bits:expr) => {
+        mod $modname {
+            use super::*;
+
+            #[divan::bench(args = 1..$native_bits)]
+            fn compare(bencher: Bencher, width: usize) {
+                let (array, rhs, mut ctx) = setup::<$T>(width);
+                bencher.counter(ItemsCount::new(LEN)).bench_local(|| {
+                    array
+                        .clone()
+                        .binary(rhs.clone(), OP)
+                        .unwrap()
+                        .execute::<BoolArray>(&mut ctx)
+                        .unwrap()
+                });
+            }
+        }
+    };
+}
+
+bench_type!(u8, u8, 8usize);
+bench_type!(u16, u16, 16usize);
+bench_type!(u32, u32, 32usize);
+bench_type!(u64, u64, 64usize);
+bench_type!(i8, i8, 8usize);
+bench_type!(i16, i16, 16usize);
+bench_type!(i32, i32, 32usize);
+bench_type!(i64, i64, 64usize);