From 7a14a7a1d166094a7ce9b0591680137809882460 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 13 Apr 2026 23:08:04 +0100 Subject: [PATCH 1/3] Add benchmarks for take on a FilterArray Signed-off-by: Robert Kruszewski --- vortex-array/Cargo.toml | 4 + vortex-array/benches/take_filter.rs | 147 ++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 vortex-array/benches/take_filter.rs diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 74fa772820c..31cb0762714 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -177,6 +177,10 @@ harness = false name = "take_fsl" harness = false +[[bench]] +name = "take_filter" +harness = false + [[bench]] name = "filter_bool" harness = false diff --git a/vortex-array/benches/take_filter.rs b/vortex-array/benches/take_filter.rs new file mode 100644 index 00000000000..d5f99457e84 --- /dev/null +++ b/vortex-array/benches/take_filter.rs @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Benchmarks for taking from a lazy [`FilterArray`]. +//! +//! Parameterized over: +//! - Number of indices to take +//! - Number of rows retained by the filter +//! - Filter mask layout (single contiguous slice vs random positions) +//! - Take index layout (sequential vs random ranks) + +#![expect(clippy::unwrap_used)] +#![expect(clippy::cast_possible_truncation)] + +use divan::Bencher; +use rand::RngExt; +use rand::SeedableRng; +use rand::rngs::StdRng; +use rand::seq::SliceRandom; +use vortex_array::ArrayRef; +use vortex_array::IntoArray; +use vortex_array::LEGACY_SESSION; +use vortex_array::RecursiveCanonical; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::FilterArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_buffer::Buffer; +use vortex_mask::Mask; + +fn main() { + divan::main(); +} + +const ARRAY_LEN: usize = 100_000; +const FILTERED_LENS: &[usize] = &[10_000, 50_000, 90_000]; +const NUM_INDICES: &[usize] = &[1_000, 10_000]; +const MASK_SEED: u64 = 42; +const INDEX_SEED: u64 = 43; + +fn primitive_array() -> ArrayRef { + PrimitiveArray::from_iter(0..ARRAY_LEN as u32).into_array() +} + +fn filtered_array(mask: Mask) -> ArrayRef { + FilterArray::try_new(primitive_array(), mask) + .unwrap() + .into_array() +} + +fn slice_mask(filtered_len: usize) -> Mask { + let start = (ARRAY_LEN - filtered_len) / 2; + Mask::from_slices(ARRAY_LEN, vec![(start, start + filtered_len)]) +} + +fn random_mask(filtered_len: usize) -> Mask { + let mut indices: Vec = (0..ARRAY_LEN).collect(); + indices.shuffle(&mut StdRng::seed_from_u64(MASK_SEED)); + indices.truncate(filtered_len); + indices.sort_unstable(); + Mask::from_indices(ARRAY_LEN, indices) +} + +fn sequential_indices(num_indices: usize) -> ArrayRef { + Buffer::from_iter(0..num_indices as u64).into_array() +} + +fn random_indices(num_indices: usize, filtered_len: usize) -> ArrayRef { + let mut rng = StdRng::seed_from_u64(INDEX_SEED); + Buffer::from_iter((0..num_indices).map(|_| rng.random_range(0..filtered_len as u64))) + .into_array() +} + +#[divan::bench(args = NUM_INDICES, consts = FILTERED_LENS)] +fn take_filter_slice_mask_sequential_indices( + bencher: Bencher, + num_indices: usize, +) { + let array = filtered_array(slice_mask(FILTERED_LEN)); + let indices = sequential_indices(num_indices); + + bencher + .with_inputs(|| (&array, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(array, indices, ctx)| { + array + .take(indices.clone()) + .unwrap() + .execute::(ctx) + .unwrap() + }); +} + +#[divan::bench(args = NUM_INDICES, consts = FILTERED_LENS)] +fn take_filter_slice_mask_random_indices( + bencher: Bencher, + num_indices: usize, +) { + let array = filtered_array(slice_mask(FILTERED_LEN)); + let indices = random_indices(num_indices, FILTERED_LEN); + + bencher + .with_inputs(|| (&array, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(array, indices, ctx)| { + array + .take(indices.clone()) + .unwrap() + .execute::(ctx) + .unwrap() + }); +} + +#[divan::bench(args = NUM_INDICES, consts = FILTERED_LENS)] +fn take_filter_random_mask_sequential_indices( + bencher: Bencher, + num_indices: usize, +) { + let array = filtered_array(random_mask(FILTERED_LEN)); + let indices = sequential_indices(num_indices); + + bencher + .with_inputs(|| (&array, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(array, indices, ctx)| { + array + .take(indices.clone()) + .unwrap() + .execute::(ctx) + .unwrap() + }); +} + +#[divan::bench(args = NUM_INDICES, consts = FILTERED_LENS)] +fn take_filter_random_mask_random_indices( + bencher: Bencher, + num_indices: usize, +) { + let array = filtered_array(random_mask(FILTERED_LEN)); + let indices = random_indices(num_indices, FILTERED_LEN); + + bencher + .with_inputs(|| (&array, &indices, LEGACY_SESSION.create_execution_ctx())) + .bench_refs(|(array, indices, ctx)| { + array + .take(indices.clone()) + .unwrap() + .execute::(ctx) + .unwrap() + }); +} From 283f17c2b3ea92a434d0682b01078a456086b961 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 13 Apr 2026 23:13:02 +0100 Subject: [PATCH 2/3] inline Signed-off-by: Robert Kruszewski --- vortex-array/benches/take_filter.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/vortex-array/benches/take_filter.rs b/vortex-array/benches/take_filter.rs index d5f99457e84..39c24e6a346 100644 --- a/vortex-array/benches/take_filter.rs +++ b/vortex-array/benches/take_filter.rs @@ -41,12 +41,6 @@ fn primitive_array() -> ArrayRef { PrimitiveArray::from_iter(0..ARRAY_LEN as u32).into_array() } -fn filtered_array(mask: Mask) -> ArrayRef { - FilterArray::try_new(primitive_array(), mask) - .unwrap() - .into_array() -} - fn slice_mask(filtered_len: usize) -> Mask { let start = (ARRAY_LEN - filtered_len) / 2; Mask::from_slices(ARRAY_LEN, vec![(start, start + filtered_len)]) @@ -75,7 +69,7 @@ fn take_filter_slice_mask_sequential_indices( bencher: Bencher, num_indices: usize, ) { - let array = filtered_array(slice_mask(FILTERED_LEN)); + let array = primitive_array().filter(slice_mask(FILTERED_LEN)).unwrap(); let indices = sequential_indices(num_indices); bencher @@ -94,7 +88,7 @@ fn take_filter_slice_mask_random_indices( bencher: Bencher, num_indices: usize, ) { - let array = filtered_array(slice_mask(FILTERED_LEN)); + let array = primitive_array().filter(slice_mask(FILTERED_LEN)).unwrap(); let indices = random_indices(num_indices, FILTERED_LEN); bencher @@ -113,7 +107,7 @@ fn take_filter_random_mask_sequential_indices( bencher: Bencher, num_indices: usize, ) { - let array = filtered_array(random_mask(FILTERED_LEN)); + let array = primitive_array().filter(random_mask(FILTERED_LEN)).unwrap(); let indices = sequential_indices(num_indices); bencher @@ -132,7 +126,7 @@ fn take_filter_random_mask_random_indices( bencher: Bencher, num_indices: usize, ) { - let array = filtered_array(random_mask(FILTERED_LEN)); + let array = primitive_array().filter(random_mask(FILTERED_LEN)).unwrap(); let indices = random_indices(num_indices, FILTERED_LEN); bencher From 7c15de6aab8aee6a83563f7b9c5f9996f584e1a0 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 13 Apr 2026 23:19:09 +0100 Subject: [PATCH 3/3] imports Signed-off-by: Robert Kruszewski --- vortex-array/benches/take_filter.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/vortex-array/benches/take_filter.rs b/vortex-array/benches/take_filter.rs index 39c24e6a346..cfe837d37bf 100644 --- a/vortex-array/benches/take_filter.rs +++ b/vortex-array/benches/take_filter.rs @@ -22,7 +22,6 @@ use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; use vortex_array::RecursiveCanonical; use vortex_array::VortexSessionExecute; -use vortex_array::arrays::FilterArray; use vortex_array::arrays::PrimitiveArray; use vortex_buffer::Buffer; use vortex_mask::Mask;