Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions vortex-array/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2228,6 +2228,10 @@ impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::Extension

pub fn vortex_array::arrays::Extension::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::Extension>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::ArrayRef>>

impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::Filter

pub fn vortex_array::arrays::Filter::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::Filter>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::ArrayRef>>

impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::FixedSizeList

pub fn vortex_array::arrays::FixedSizeList::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::FixedSizeList>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::ArrayRef>>
Expand Down Expand Up @@ -2470,6 +2474,10 @@ impl vortex_array::ValidityVTable<vortex_array::arrays::Filter> for vortex_array

pub fn vortex_array::arrays::Filter::validity(array: vortex_array::ArrayView<'_, vortex_array::arrays::Filter>) -> vortex_error::VortexResult<vortex_array::validity::Validity>

impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::Filter

pub fn vortex_array::arrays::Filter::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::Filter>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::ArrayRef>>

pub struct vortex_array::arrays::filter::FilterData

impl vortex_array::arrays::filter::FilterData
Expand Down Expand Up @@ -5706,6 +5714,10 @@ impl vortex_array::ValidityVTable<vortex_array::arrays::Filter> for vortex_array

pub fn vortex_array::arrays::Filter::validity(array: vortex_array::ArrayView<'_, vortex_array::arrays::Filter>) -> vortex_error::VortexResult<vortex_array::validity::Validity>

impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::Filter

pub fn vortex_array::arrays::Filter::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::Filter>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::ArrayRef>>

pub struct vortex_array::arrays::FixedSizeList

impl core::clone::Clone for vortex_array::arrays::FixedSizeList
Expand Down
1 change: 1 addition & 0 deletions vortex-array/src/arrays/filter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub use kernel::FilterReduce;
pub use kernel::FilterReduceAdaptor;

mod rules;
mod take;

mod vtable;
pub use vtable::Filter;
131 changes: 131 additions & 0 deletions vortex-array/src/arrays/filter/take.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use num_traits::ToPrimitive;
use vortex_buffer::BufferMut;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;

use super::Filter;
use crate::ArrayRef;
use crate::IntoArray;
use crate::array::ArrayView;
use crate::arrays::PrimitiveArray;
use crate::arrays::dict::TakeExecute;
use crate::arrays::dict::TakeExecuteAdaptor;
use crate::arrays::filter::FilterArrayExt;
use crate::builtins::ArrayBuiltins;
use crate::dtype::DType;
use crate::executor::ExecutionCtx;
use crate::kernel::ParentKernelSet;
use crate::match_each_integer_ptype;
use crate::validity::Validity;

pub(super) const PARENT_KERNELS: ParentKernelSet<Filter> =
ParentKernelSet::new(&[ParentKernelSet::lift(&TakeExecuteAdaptor(Filter))]);

fn take_impl(array: ArrayView<'_, Filter>, indices: &PrimitiveArray) -> VortexResult<ArrayRef> {
let indices_validity = indices.validity_mask()?;
let mut translated = BufferMut::<u64>::with_capacity(indices.len());
translated.push_n(0u64, indices.len());

match_each_integer_ptype!(indices.ptype(), |P| {
// Collect valid (output_idx, rank) pairs — validates bounds up front.
let mut valid_indices = Vec::new();
let mut valid_ranks = Vec::new();
for (idx, rank) in indices.as_slice::<P>().iter().enumerate() {
if !indices_validity.value(idx) {
continue;
}
let Some(rank) = rank.to_usize() else {
vortex_bail!(OutOfBounds: 0, 0, array.len());
};
if rank >= array.len() {
vortex_bail!(OutOfBounds: rank, 0, array.len());
}
valid_indices.push(idx);
valid_ranks.push(rank);
}

// Batch rank: single-pass over the bitmap instead of per-element scan.
let positions = array.filter_mask().rank_batch(&valid_ranks);
for (&idx, pos) in valid_indices.iter().zip(positions.iter()) {
translated[idx] = u64::try_from(*pos)?;
}

Ok::<(), vortex_error::VortexError>(())
})?;

let translated_indices = PrimitiveArray::new(
translated.freeze(),
Validity::from_mask(indices_validity, indices.dtype().nullability()),
)
.into_array();

array.child().take(translated_indices)
}

impl TakeExecute for Filter {
fn take(
array: ArrayView<'_, Filter>,
indices: &ArrayRef,
ctx: &mut ExecutionCtx,
) -> VortexResult<Option<ArrayRef>> {
let DType::Primitive(ptype, nullability) = indices.dtype() else {
vortex_bail!("Invalid indices dtype: {}", indices.dtype())
};

let unsigned_indices = if ptype.is_unsigned_int() {
indices.clone().execute::<PrimitiveArray>(ctx)?
} else {
indices
.clone()
.cast(DType::Primitive(ptype.to_unsigned(), *nullability))?
.execute::<PrimitiveArray>(ctx)?
};

take_impl(array, &unsigned_indices).map(Some)
}
}

#[cfg(test)]
mod tests {
use vortex_buffer::buffer;
use vortex_error::VortexResult;
use vortex_mask::Mask;
use vortex_session::VortexSession;

use crate::IntoArray;
use crate::arrays::Dict;
use crate::arrays::DictArray;
use crate::arrays::FilterArray;
use crate::arrays::PrimitiveArray;
use crate::assert_arrays_eq;
use crate::executor::ExecutionCtx;

#[test]
fn test_take_execute_kernel_maps_indices_through_filter() -> VortexResult<()> {
let filter = FilterArray::new(
buffer![10i32, 20, 30, 40, 50].into_array(),
Mask::from_iter([true, false, true, true, false]),
)
.into_array();
let parent = DictArray::try_new(
PrimitiveArray::from_option_iter([Some(2u64), None, Some(0)]).into_array(),
filter.clone(),
)?
.into_array();
let mut ctx = ExecutionCtx::new(VortexSession::empty());

let result = filter
.execute_parent(&parent, 1, &mut ctx)?
.expect("filter child should execute its take parent");

assert!(result.as_opt::<Dict>().is_some());
assert_arrays_eq!(
result.to_canonical()?.into_array(),
PrimitiveArray::from_option_iter([Some(40i32), None, Some(10)]).into_array()
);
Ok(())
}
}
10 changes: 10 additions & 0 deletions vortex-array/src/arrays/filter/vtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use crate::arrays::filter::execute::execute_filter;
use crate::arrays::filter::execute::execute_filter_fast_paths;
use crate::arrays::filter::rules::PARENT_RULES;
use crate::arrays::filter::rules::RULES;
use crate::arrays::filter::take::PARENT_KERNELS;
use crate::buffer::BufferHandle;
use crate::dtype::DType;
use crate::executor::ExecutionCtx;
Expand Down Expand Up @@ -170,6 +171,15 @@ impl VTable for Filter {
PARENT_RULES.evaluate(array, parent, child_idx)
}

fn execute_parent(
array: ArrayView<'_, Self>,
parent: &ArrayRef,
child_idx: usize,
ctx: &mut ExecutionCtx,
) -> VortexResult<Option<ArrayRef>> {
PARENT_KERNELS.execute(array, parent, child_idx, ctx)
}

fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
RULES.evaluate(array)
}
Expand Down
22 changes: 22 additions & 0 deletions vortex-buffer/benches/vortex_bitbuffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,3 +288,25 @@ fn set_indices_arrow_buffer(bencher: Bencher, length: usize) {
}
});
}

// ── select benchmarks ───────────────────────────────────────────────────

#[divan::bench(args = INPUT_SIZE)]
fn select_mid_vortex_buffer(bencher: Bencher, length: usize) {
let buffer = BitBuffer::from_iter((0..length).map(true_count_pattern));
let mid = buffer.true_count() / 2;
bencher
.with_inputs(|| (&buffer, mid))
.bench_refs(|(buffer, mid)| buffer.select(*mid));
}

#[divan::bench(args = INPUT_SIZE)]
fn select_all_vortex_buffer(bencher: Bencher, length: usize) {
let buffer = BitBuffer::from_iter((0..length).map(true_count_pattern));
let tc = buffer.true_count();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for nth in 0..tc {
divan::black_box(buffer.select(nth));
}
});
}
4 changes: 4 additions & 0 deletions vortex-buffer/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ pub fn vortex_buffer::BitBuffer::new_with_offset(buffer: vortex_buffer::ByteBuff

pub fn vortex_buffer::BitBuffer::offset(&self) -> usize

pub fn vortex_buffer::BitBuffer::select(&self, nth: usize) -> usize

pub fn vortex_buffer::BitBuffer::select_sorted_batch(&self, sorted_ranks: &[usize]) -> alloc::vec::Vec<usize>

pub fn vortex_buffer::BitBuffer::set_indices(&self) -> arrow_buffer::util::bit_iterator::BitIndexIterator<'_>

pub fn vortex_buffer::BitBuffer::set_slices(&self) -> arrow_buffer::util::bit_iterator::BitSliceIterator<'_>
Expand Down
33 changes: 33 additions & 0 deletions vortex-buffer/src/bit/buf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ use crate::bit::count_ones::count_ones;
use crate::bit::get_bit_unchecked;
use crate::bit::ops::bitwise_binary_op;
use crate::bit::ops::bitwise_unary_op;
use crate::bit::select::bit_select;
use crate::bit::select::bit_select_sorted_batch;
use crate::buffer;

/// An immutable bitset stored as a packed byte buffer.
Expand Down Expand Up @@ -319,6 +321,37 @@ impl BitBuffer {
count_ones(self.buffer.as_slice(), self.offset, self.len)
}

/// Returns the position of the `nth` set bit (0-indexed).
///
/// This is the "select" operation on a bitmap: given a rank `nth`, find
/// which logical bit position holds that rank.
///
/// # Panics
///
/// Panics (debug) or produces undefined results (release) if `nth` is
/// greater than or equal to [`true_count`](Self::true_count).
pub fn select(&self, nth: usize) -> usize {
bit_select(self.buffer.as_slice(), self.offset, self.len, nth)
}

/// Select positions for multiple ranks in a single pass over the bitmap.
///
/// `sorted_ranks` must be sorted in non-decreasing order, with each value
/// less than [`true_count`](Self::true_count). This is O(L/64 + N) where
/// L = bitmap length and N = number of ranks, compared to O(N × L/64) for
/// individual [`select`](Self::select) calls.
pub fn select_sorted_batch(&self, sorted_ranks: &[usize]) -> Vec<usize> {
let mut out = vec![0; sorted_ranks.len()];
bit_select_sorted_batch(
self.buffer.as_slice(),
self.offset,
self.len,
sorted_ranks,
&mut out,
);
out
}

/// Get the number of unset bits in the buffer.
pub fn false_count(&self) -> usize {
self.len - self.true_count()
Expand Down
6 changes: 5 additions & 1 deletion vortex-buffer/src/bit/count_ones.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ pub fn count_ones(bytes: &[u8], offset: usize, len: usize) -> usize {
}

#[inline]
fn align_offset_len(bytes: &[u8], offset: usize, len: usize) -> (Option<u8>, &[u8], Option<u8>) {
pub(super) fn align_offset_len(
bytes: &[u8],
offset: usize,
len: usize,
) -> (Option<u8>, &[u8], Option<u8>) {
let start_byte = offset / 8;
let start_bit = offset % 8;
let end_bit = offset + len;
Expand Down
1 change: 1 addition & 0 deletions vortex-buffer/src/bit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod buf_mut;
mod count_ones;
mod macros;
mod ops;
mod select;

pub use arrow_buffer::bit_chunk_iterator::BitChunkIterator;
pub use arrow_buffer::bit_chunk_iterator::BitChunks;
Expand Down
Loading
Loading