diff --git a/vortex-array/src/arrays/filter/execute/bitbuffer.rs b/vortex-array/src/arrays/filter/execute/bitbuffer.rs index e1c61b2ebc1..5c20f195c0c 100644 --- a/vortex-array/src/arrays/filter/execute/bitbuffer.rs +++ b/vortex-array/src/arrays/filter/execute/bitbuffer.rs @@ -21,30 +21,50 @@ pub(super) fn filter_bit_buffer(bb: &BitBuffer, mask: &MaskValues) -> BitBuffer } fn filter_bitbuffer_by_indices(bb: &BitBuffer, indices: &[usize]) -> BitBuffer { + if indices.is_empty() { + return BitBuffer::empty(); + } + + let mut out = BitBufferMut::with_capacity(indices.len()); let bools = bb.inner().as_ref(); let bit_offset = bb.offset(); - // FIXME(ngates): this is slower than it could be! - BitBufferMut::collect_bool(indices.len(), |idx| { - let idx = *unsafe { indices.get_unchecked(idx) }; - get_bit(bools, bit_offset + idx) // Panics if out of bounds. - }) - .freeze() + // Scan for contiguous runs in the indices and copy them in bulk. + let mut i = 0; + while i < indices.len() { + let run_start = indices[i]; + let mut run_end = run_start + 1; + let mut j = i + 1; + while j < indices.len() && indices[j] == run_end { + run_end += 1; + j += 1; + } + + let run_len = j - i; + if run_len >= 64 { + // Bulk copy for long contiguous runs. + out.append_buffer(&bb.slice(run_start..run_end)); + } else { + // Gather individual bits for short/scattered indices. + for k in i..j { + let idx = unsafe { *indices.get_unchecked(k) }; + out.append(get_bit(bools, bit_offset + idx)); + } + } + + i = j; + } + + out.freeze() } #[allow(unused)] fn filter_bitbuffer_by_slices(bb: &BitBuffer, slices: &[(usize, usize)]) -> BitBuffer { - let bools = bb.inner().as_ref(); - let bit_offset = bb.offset(); let output_len: usize = slices.iter().map(|(start, end)| end - start).sum(); - let mut out = BitBufferMut::with_capacity(output_len); - // FIXME(ngates): this is slower than it could be! for &(start, end) in slices { - for idx in start..end { - out.append(get_bit(bools, bit_offset + idx)); // Panics if out of bounds. - } + out.append_buffer(&bb.slice(start..end)); } out.freeze() diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index 61cc5e30044..0ef41777289 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -24,7 +24,9 @@ use crate::bit::UnalignedBitChunk; use crate::bit::count_ones::count_ones; use crate::bit::get_bit_unchecked; use crate::bit::ops::bitwise_binary_op; +use crate::bit::ops::bitwise_binary_op_lhs_owned; use crate::bit::ops::bitwise_unary_op; +use crate::bit::ops::bitwise_unary_op_copy; use crate::buffer; /// An immutable bitset stored as a packed byte buffer. @@ -57,6 +59,29 @@ impl PartialEq for BitBuffer { return false; } + if self.len == 0 { + return true; + } + + // Fast path: both byte-aligned and same length — direct byte comparison. + if self.offset == 0 && other.offset == 0 { + let full_bytes = self.len / 8; + let self_bytes = &self.buffer.as_slice()[..full_bytes]; + let other_bytes = &other.buffer.as_slice()[..full_bytes]; + if self_bytes != other_bytes { + return false; + } + // Compare remaining bits in the last partial byte. + let rem = self.len % 8; + if rem != 0 { + let mask = (1u8 << rem) - 1; + let a = self.buffer.as_slice()[full_bytes] & mask; + let b = other.buffer.as_slice()[full_bytes] & mask; + return a == b; + } + return true; + } + self.chunks() .iter_padded() .zip(other.chunks().iter_padded()) @@ -315,11 +340,13 @@ impl BitBuffer { } /// Get the number of set bits in the buffer. + #[inline] pub fn true_count(&self) -> usize { count_ones(self.buffer.as_slice(), self.offset, self.len) } /// Get the number of unset bits in the buffer. + #[inline] pub fn false_count(&self) -> usize { self.len - self.true_count() } @@ -343,12 +370,14 @@ impl BitBuffer { pub fn sliced(&self) -> Self { if self.offset.is_multiple_of(8) { return Self::new( - self.buffer.slice(self.offset / 8..self.len.div_ceil(8)), + self.buffer + .slice(self.offset / 8..(self.offset + self.len).div_ceil(8)), self.len, ); } - bitwise_unary_op(self.clone(), |a| a) + // Allocate directly rather than clone + identity op which would fail try_into_mut. + bitwise_unary_op_copy(self, |a| a) } } @@ -392,7 +421,7 @@ impl BitOr for BitBuffer { #[inline] fn bitor(self, rhs: Self) -> Self::Output { - BitOr::bitor(&self, &rhs) + bitwise_binary_op_lhs_owned(self, &rhs, |a, b| a | b) } } @@ -410,7 +439,7 @@ impl BitOr<&BitBuffer> for BitBuffer { #[inline] fn bitor(self, rhs: &BitBuffer) -> Self::Output { - (&self).bitor(rhs) + bitwise_binary_op_lhs_owned(self, rhs, |a, b| a | b) } } @@ -437,7 +466,7 @@ impl BitAnd<&BitBuffer> for BitBuffer { #[inline] fn bitand(self, rhs: &BitBuffer) -> Self::Output { - (&self).bitand(rhs) + bitwise_binary_op_lhs_owned(self, rhs, |a, b| a & b) } } @@ -446,7 +475,7 @@ impl BitAnd for BitBuffer { #[inline] fn bitand(self, rhs: BitBuffer) -> Self::Output { - (&self).bitand(&rhs) + bitwise_binary_op_lhs_owned(self, &rhs, |a, b| a & b) } } @@ -455,7 +484,9 @@ impl Not for &BitBuffer { #[inline] fn not(self) -> Self::Output { - !self.clone() + // Allocate directly rather than clone+try_into_mut, which always fails + // since the clone shares the Arc with the original reference. + bitwise_unary_op_copy(self, |a| !a) } } @@ -482,7 +513,7 @@ impl BitXor<&BitBuffer> for BitBuffer { #[inline] fn bitxor(self, rhs: &BitBuffer) -> Self::Output { - (&self).bitxor(rhs) + bitwise_binary_op_lhs_owned(self, rhs, |a, b| a ^ b) } } @@ -495,6 +526,11 @@ impl BitBuffer { bitwise_binary_op(self, rhs, |a, b| a & !b) } + /// Owned variant of [`bitand_not`](Self::bitand_not) that can mutate in-place when possible. + pub fn into_bitand_not(self, rhs: &BitBuffer) -> BitBuffer { + bitwise_binary_op_lhs_owned(self, rhs, |a, b| a & !b) + } + /// Iterate through bits in a buffer. /// /// # Arguments @@ -514,44 +550,23 @@ impl BitBuffer { return; } - let is_bit_set = |byte: u8, bit_idx: usize| (byte & (1 << bit_idx)) != 0; - let bit_offset = self.offset % 8; - let mut buffer_ptr = unsafe { self.buffer.as_ptr().add(self.offset / 8) }; - let mut callback_idx = 0; - - // Handle incomplete first byte. - if bit_offset > 0 { - let bits_in_first_byte = (8 - bit_offset).min(total_bits); - let byte = unsafe { *buffer_ptr }; - - for bit_idx in 0..bits_in_first_byte { - f(callback_idx, is_bit_set(byte, bit_offset + bit_idx)); - callback_idx += 1; - } - - buffer_ptr = unsafe { buffer_ptr.add(1) }; - } - - // Process complete bytes. - let complete_bytes = (total_bits - callback_idx) / 8; - for _ in 0..complete_bytes { - let byte = unsafe { *buffer_ptr }; + // Process in 64-bit chunks for better ILP and fewer loop iterations. + let chunks = self.chunks(); + let chunks_count = total_bits / 64; + let remainder = total_bits % 64; - for bit_idx in 0..8 { - f(callback_idx, is_bit_set(byte, bit_idx)); - callback_idx += 1; + for (chunk_idx, chunk) in chunks.iter().enumerate() { + let base = chunk_idx * 64; + for bit_idx in 0..64 { + f(base + bit_idx, (chunk >> bit_idx) & 1 == 1); } - buffer_ptr = unsafe { buffer_ptr.add(1) }; } - // Handle remaining bits at the end. - let remaining_bits = total_bits - callback_idx; - if remaining_bits > 0 { - let byte = unsafe { *buffer_ptr }; - - for bit_idx in 0..remaining_bits { - f(callback_idx, is_bit_set(byte, bit_idx)); - callback_idx += 1; + if remainder != 0 { + let rem_chunk = chunks.remainder_bits(); + let base = chunks_count * 64; + for bit_idx in 0..remainder { + f(base + bit_idx, (rem_chunk >> bit_idx) & 1 == 1); } } } diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index e0e2f85877d..c3871a48a52 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -181,9 +181,17 @@ impl BitBufferMut { /// Create a bit buffer of `len` with `indices` set as true. pub fn from_indices(len: usize, indices: &[usize]) -> BitBufferMut { + let num_bytes = len.div_ceil(8); let mut buf = BitBufferMut::new_unset(len); - // TODO(ngates): for dense indices, we can do better by collecting into u64s. - indices.iter().for_each(|&idx| buf.set(idx)); + let slice = &mut buf.buffer.as_mut_slice()[..num_bytes]; + + // Batch set bits by writing directly to the byte slice. + let ptr = slice.as_mut_ptr(); + for &idx in indices { + debug_assert!(idx < len); + // SAFETY: idx < len ensures idx/8 < num_bytes. + unsafe { set_bit_unchecked(ptr, idx) }; + } buf } @@ -303,6 +311,7 @@ impl BitBufferMut { /// Set the bit at `index` to the given boolean value. /// /// This operation is checked so if `index` exceeds the buffer length, this will panic. + #[inline] pub fn set_to(&mut self, index: usize, value: bool) { if value { self.set(index); @@ -316,6 +325,7 @@ impl BitBufferMut { /// # Safety /// /// The caller must ensure that `index` does not exceed the largest bit index in the backing buffer. + #[inline] pub unsafe fn set_to_unchecked(&mut self, index: usize, value: bool) { if value { // SAFETY: checked by caller @@ -329,6 +339,7 @@ impl BitBufferMut { /// Set a position to `true`. /// /// This operation is checked so if `index` exceeds the buffer length, this will panic. + #[inline] pub fn set(&mut self, index: usize) { assert!(index < self.len, "index {index} exceeds len {}", self.len); @@ -354,6 +365,7 @@ impl BitBufferMut { /// # Safety /// /// The caller must ensure that `index` does not exceed the largest bit index in the backing buffer. + #[inline] unsafe fn set_unchecked(&mut self, index: usize) { // SAFETY: checked by caller unsafe { set_bit_unchecked(self.buffer.as_mut_ptr(), self.offset + index) } @@ -406,6 +418,7 @@ impl BitBufferMut { } /// Append a new boolean into the bit buffer, incrementing the length. + #[inline] pub fn append(&mut self, value: bool) { if value { self.append_true() @@ -415,6 +428,7 @@ impl BitBufferMut { } /// Append a new true value to the buffer. + #[inline] pub fn append_true(&mut self) { let bit_pos = self.offset + self.len; let byte_pos = bit_pos / 8; @@ -431,21 +445,18 @@ impl BitBufferMut { } /// Append a new false value to the buffer. + #[inline] pub fn append_false(&mut self) { let bit_pos = self.offset + self.len; let byte_pos = bit_pos / 8; - let bit_in_byte = bit_pos % 8; - // Ensure buffer has enough bytes + // Ensure buffer has enough bytes (pushed as 0x00, so bit is already unset). if byte_pos >= self.buffer.len() { self.buffer.push(0u8); } - // Bit is already 0 if we just pushed a new byte, otherwise ensure it's unset - if bit_in_byte != 0 { - self.buffer.as_mut_slice()[byte_pos] &= !(1 << bit_in_byte); - } - + // The bit is guaranteed to be 0: new bytes are zero-initialized, and + // existing bytes have this bit unset (it's beyond the current length). self.len += 1; } @@ -514,24 +525,39 @@ impl BitBufferMut { let end_bit_pos = start_bit_pos + bit_len; let required_bytes = end_bit_pos.div_ceil(8); - // Ensure buffer has enough bytes + // Ensure buffer has enough bytes, zero-initialized for OR-based writes. if required_bytes > self.buffer.len() { self.buffer.push_n(0x00, required_bytes - self.buffer.len()); } - // Use bitvec for efficient bit copying - let self_slice = self - .buffer - .as_mut_slice() - .view_bits_mut::(); - let other_slice = buffer - .inner() - .as_slice() - .view_bits::(); - - // Copy from source buffer (accounting for its offset) to destination (accounting for our offset + len) - let source_range = buffer.offset()..buffer.offset() + bit_len; - self_slice[start_bit_pos..end_bit_pos].copy_from_bitslice(&other_slice[source_range]); + let dst_bit_offset = start_bit_pos % 8; + let src_bit_offset = buffer.offset(); + + if dst_bit_offset == 0 && src_bit_offset == 0 { + // Both byte-aligned: use memcpy for full bytes, then mask the tail. + let dst_byte = start_bit_pos / 8; + let src_bytes = buffer.inner().as_slice(); + let full_bytes = bit_len / 8; + self.buffer.as_mut_slice()[dst_byte..dst_byte + full_bytes] + .copy_from_slice(&src_bytes[..full_bytes]); + let rem = bit_len % 8; + if rem != 0 { + let mask = (1u8 << rem) - 1; + self.buffer.as_mut_slice()[dst_byte + full_bytes] |= src_bytes[full_bytes] & mask; + } + } else { + // Use bitvec for unaligned bit copying. + let self_slice = self + .buffer + .as_mut_slice() + .view_bits_mut::(); + let other_slice = buffer + .inner() + .as_slice() + .view_bits::(); + let source_range = src_bit_offset..src_bit_offset + bit_len; + self_slice[start_bit_pos..end_bit_pos].copy_from_bitslice(&other_slice[source_range]); + } self.len += bit_len; } @@ -627,11 +653,13 @@ impl BitBufferMut { } /// Get the number of set bits in the buffer. + #[inline] pub fn true_count(&self) -> usize { self.unaligned_chunks().count_ones() } /// Get the number of unset bits in the buffer. + #[inline] pub fn false_count(&self) -> usize { self.len - self.true_count() } @@ -697,9 +725,68 @@ impl FromIterator for BitBufferMut { } } - // Append the remaining items (as we do not know how many more there are). - for v in iter { - buf.append(v); + // Batch remaining items in 64-bit words instead of appending one bit at a time. + 'outer: loop { + let mut packed = 0u64; + for bit_idx in 0..64 { + let Some(v) = iter.next() else { + // Flush partial word. + if bit_idx > 0 { + let old_len = buf.len; + let new_len = old_len + bit_idx; + let required_bytes = (buf.offset + new_len).div_ceil(8); + if required_bytes > buf.buffer.len() { + buf.buffer.push_n(0x00, required_bytes - buf.buffer.len()); + } + // Write the packed bits into the buffer at the current bit position. + let byte_start = (buf.offset + old_len) / 8; + let bit_start = (buf.offset + old_len) % 8; + if bit_start == 0 { + let bytes = packed.to_le_bytes(); + let bytes_needed = bit_idx.div_ceil(8); + buf.buffer.as_mut_slice()[byte_start..byte_start + bytes_needed] + .copy_from_slice(&bytes[..bytes_needed]); + } else { + // Unaligned: set bits individually from packed word. + let ptr = buf.buffer.as_mut_ptr(); + for j in 0..bit_idx { + if (packed >> j) & 1 == 1 { + unsafe { + set_bit_unchecked(ptr, buf.offset + old_len + j); + } + } + } + } + buf.len = new_len; + } + break 'outer; + }; + packed |= (v as u64) << bit_idx; + } + + // Flush full 64-bit word. + let old_len = buf.len; + let new_len = old_len + 64; + let required_bytes = (buf.offset + new_len).div_ceil(8); + if required_bytes > buf.buffer.len() { + buf.buffer.push_n(0x00, required_bytes - buf.buffer.len()); + } + let byte_start = (buf.offset + old_len) / 8; + let bit_start = (buf.offset + old_len) % 8; + if bit_start == 0 { + buf.buffer.as_mut_slice()[byte_start..byte_start + 8] + .copy_from_slice(&packed.to_le_bytes()); + } else { + let ptr = buf.buffer.as_mut_ptr(); + for j in 0..64usize { + if (packed >> j) & 1 == 1 { + unsafe { + set_bit_unchecked(ptr, buf.offset + old_len + j); + } + } + } + } + buf.len = new_len; } buf diff --git a/vortex-buffer/src/bit/ops.rs b/vortex-buffer/src/bit/ops.rs index 9406c728f06..78820aa4f99 100644 --- a/vortex-buffer/src/bit/ops.rs +++ b/vortex-buffer/src/bit/ops.rs @@ -7,50 +7,61 @@ use crate::Buffer; use crate::ByteBufferMut; use crate::trusted_len::TrustedLenExt; +/// Apply a unary operation to a [`BitBuffer`], always allocating a new output buffer. #[inline] -pub(super) fn bitwise_unary_op u64>(buffer: BitBuffer, mut op: F) -> BitBuffer { - match buffer.try_into_mut() { - Ok(mut buf) => { - bitwise_unary_op_mut(&mut buf, op); - buf.freeze() - } - Err(buffer) => { - let len = buffer.len(); - let offset = buffer.offset(); - let src = buffer.inner().as_slice(); +pub(super) fn bitwise_unary_op_copy u64>( + buffer: &BitBuffer, + mut op: F, +) -> BitBuffer { + let len = buffer.len(); + let offset = buffer.offset(); + let src = buffer.inner().as_slice(); - let mut dst = ByteBufferMut::with_capacity(src.len()); - let u64_len = src.len() / 8; - let remainder = src.len() % 8; + let mut dst = ByteBufferMut::with_capacity(src.len()); + let u64_len = src.len() / 8; + let remainder = src.len() % 8; - let mut src_ptr = src.as_ptr() as *const u64; - let mut dst_ptr = dst.spare_capacity_mut().as_mut_ptr() as *mut u64; - for _ in 0..u64_len { - let value = unsafe { src_ptr.read_unaligned() }; - unsafe { dst_ptr.write_unaligned(op(value)) }; - src_ptr = unsafe { src_ptr.add(1) }; - dst_ptr = unsafe { dst_ptr.add(1) }; - } + let mut src_ptr = src.as_ptr() as *const u64; + let mut dst_ptr = dst.spare_capacity_mut().as_mut_ptr() as *mut u64; + for _ in 0..u64_len { + let value = unsafe { src_ptr.read_unaligned() }; + unsafe { dst_ptr.write_unaligned(op(value)) }; + src_ptr = unsafe { src_ptr.add(1) }; + dst_ptr = unsafe { dst_ptr.add(1) }; + } - if remainder > 0 { - let mut remainder_u64 = 0u64; - let src_bytes = src_ptr as *const u8; - let dst_bytes = dst_ptr as *mut u8; - for i in 0..remainder { - let byte = unsafe { src_bytes.add(i).read() }; - remainder_u64 |= (byte as u64) << (i * 8); - } - let remainder_u64 = op(remainder_u64); - for i in 0..remainder { - let byte = ((remainder_u64 >> (i * 8)) & 0xFF) as u8; - unsafe { dst_bytes.add(i).write(byte) }; - } - } + if remainder > 0 { + let mut remainder_u64 = 0u64; + let src_bytes = src_ptr as *const u8; + let dst_bytes = dst_ptr as *mut u8; + for i in 0..remainder { + let byte = unsafe { src_bytes.add(i).read() }; + remainder_u64 |= (byte as u64) << (i * 8); + } + let remainder_u64 = op(remainder_u64); + for i in 0..remainder { + let byte = ((remainder_u64 >> (i * 8)) & 0xFF) as u8; + unsafe { dst_bytes.add(i).write(byte) }; + } + } + + // SAFETY: we wrote exactly src.len() bytes into the spare capacity. + unsafe { dst.set_len(src.len()) }; + BitBuffer::new_with_offset(dst.freeze(), len, offset) +} - // SAFETY: we wrote exactly src.len() bytes into the spare capacity. - unsafe { dst.set_len(src.len()) }; - BitBuffer::new_with_offset(dst.freeze(), len, offset) +/// Apply a unary operation to an owned [`BitBuffer`], mutating in-place when possible. +/// +/// Tries to get exclusive access via `try_into_mut`. If the backing storage is shared +/// (Arc refcount > 1), falls back to [`bitwise_unary_op_copy`]. +#[inline] +pub(super) fn bitwise_unary_op u64>(buffer: BitBuffer, op: F) -> BitBuffer { + match buffer.try_into_mut() { + Ok(mut buf) => { + bitwise_unary_op_mut(&mut buf, op); + buf.freeze() } + Err(buffer) => bitwise_unary_op_copy(&buffer, op), } } @@ -87,6 +98,57 @@ pub(super) fn bitwise_unary_op_mut u64>(buffer: &mut BitBufferM } } +/// Apply a binary operation with an owned left operand, mutating in-place when possible. +/// +/// Tries `try_into_mut` on the left operand. If the backing storage has exclusive access, +/// the operation is performed in-place (zero allocation). Otherwise, falls back to +/// [`bitwise_binary_op`] which allocates a new buffer. +pub(super) fn bitwise_binary_op_lhs_owned u64>( + left: BitBuffer, + right: &BitBuffer, + mut op: F, +) -> BitBuffer { + assert_eq!(left.len(), right.len()); + + match left.try_into_mut() { + Ok(mut buf) => { + let right_slice = right.inner().as_slice(); + let left_slice = buf.as_mut_slice(); + + let u64_len = left_slice.len().min(right_slice.len()) / 8; + let remainder = left_slice.len().min(right_slice.len()) % 8; + + let mut l_ptr = left_slice.as_mut_ptr() as *mut u64; + let mut r_ptr = right_slice.as_ptr() as *const u64; + for _ in 0..u64_len { + let lv = unsafe { l_ptr.read_unaligned() }; + let rv = unsafe { r_ptr.read_unaligned() }; + unsafe { l_ptr.write_unaligned(op(lv, rv)) }; + l_ptr = unsafe { l_ptr.add(1) }; + r_ptr = unsafe { r_ptr.add(1) }; + } + + if remainder > 0 { + let l_bytes = l_ptr as *mut u8; + let r_bytes = r_ptr as *const u8; + let mut l_u64 = 0u64; + let mut r_u64 = 0u64; + for i in 0..remainder { + l_u64 |= (unsafe { l_bytes.add(i).read() } as u64) << (i * 8); + r_u64 |= (unsafe { r_bytes.add(i).read() } as u64) << (i * 8); + } + let result = op(l_u64, r_u64); + for i in 0..remainder { + unsafe { l_bytes.add(i).write(((result >> (i * 8)) & 0xFF) as u8) }; + } + } + + buf.freeze() + } + Err(left) => bitwise_binary_op(&left, right, op), + } +} + pub(super) fn bitwise_binary_op u64>( left: &BitBuffer, right: &BitBuffer, diff --git a/vortex-layout/src/layouts/flat/reader.rs b/vortex-layout/src/layouts/flat/reader.rs index 29627e2b4fd..40695c0f2dd 100644 --- a/vortex-layout/src/layouts/flat/reader.rs +++ b/vortex-layout/src/layouts/flat/reader.rs @@ -145,7 +145,8 @@ impl LayoutReader for FlatReader { array = array.slice(row_range.clone())?; } - let array_mask = if mask.density() < EXPR_EVAL_THRESHOLD { + let mask_density = mask.density(); + let array_mask = if mask_density < EXPR_EVAL_THRESHOLD { // We have the choice to apply the filter or the expression first, we apply the // expression first so that it can try pushing down itself and then the filter // after this. @@ -168,7 +169,7 @@ impl LayoutReader for FlatReader { "Flat mask evaluation {} - {} (mask = {}) => {}", name, expr, - mask.density(), + mask_density, array_mask.density(), ); diff --git a/vortex-layout/src/layouts/zoned/reader.rs b/vortex-layout/src/layouts/zoned/reader.rs index 23118e8ce5c..5ed2918877a 100644 --- a/vortex-layout/src/layouts/zoned/reader.rs +++ b/vortex-layout/src/layouts/zoned/reader.rs @@ -288,6 +288,7 @@ impl LayoutReader for ZonedReader { assert_eq!(stats_mask.len(), mask.len(), "Mask length mismatch"); // Intersect the masks. + let mask_density = mask.density(); let mut stats_mask = mask.bitand(&stats_mask); // Forward to data child for further pruning. @@ -300,7 +301,7 @@ impl LayoutReader for ZonedReader { "Stats evaluation approx {} - {} (mask = {}) => {}", name, expr, - mask.density(), + mask_density, stats_mask.density(), ); diff --git a/vortex-mask/src/bitops.rs b/vortex-mask/src/bitops.rs index 3ad681db161..b8364135a31 100644 --- a/vortex-mask/src/bitops.rs +++ b/vortex-mask/src/bitops.rs @@ -28,6 +28,26 @@ impl BitAnd for &Mask { } } +impl BitAnd<&Mask> for Mask { + type Output = Mask; + + /// Owned-left AND: can reuse the left buffer in-place when possible. + fn bitand(self, rhs: &Mask) -> Self::Output { + if self.len() != rhs.len() { + vortex_panic!("Masks must have the same length"); + } + + match (self.bit_buffer(), rhs.bit_buffer()) { + (AllOr::All, _) => rhs.clone(), + (AllOr::None, _) | (_, AllOr::None) => Mask::new_false(self.len()), + (_, AllOr::All) => self, + (AllOr::Some(_), AllOr::Some(rhs_buf)) => { + Mask::from_buffer(self.into_bit_buffer() & rhs_buf) + } + } + } +} + impl BitOr for &Mask { type Output = Mask; @@ -46,6 +66,26 @@ impl BitOr for &Mask { } } +impl BitOr<&Mask> for Mask { + type Output = Mask; + + /// Owned-left OR: can reuse the left buffer in-place when possible. + fn bitor(self, rhs: &Mask) -> Self::Output { + if self.len() != rhs.len() { + vortex_panic!("Masks must have the same length"); + } + + match (self.bit_buffer(), rhs.bit_buffer()) { + (AllOr::All, _) | (_, AllOr::All) => Mask::new_true(self.len()), + (AllOr::None, _) => rhs.clone(), + (_, AllOr::None) => self, + (AllOr::Some(_), AllOr::Some(rhs_buf)) => { + Mask::from_buffer(self.into_bit_buffer() | rhs_buf) + } + } + } +} + impl Mask { /// Computes `self & !rhs` (AND NOT), equivalent to set difference. pub fn bitand_not(self, rhs: &Mask) -> Mask { @@ -56,7 +96,9 @@ impl Mask { (AllOr::None, _) | (_, AllOr::All) => Mask::new_false(self.len()), (_, AllOr::None) => self, (AllOr::All, _) => !rhs, - (AllOr::Some(lhs), AllOr::Some(rhs)) => Mask::from_buffer(lhs.bitand_not(rhs)), + (AllOr::Some(_), AllOr::Some(rhs_buf)) => { + Mask::from_buffer(self.into_bit_buffer().into_bitand_not(rhs_buf)) + } } } }