Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 3 additions & 13 deletions encodings/bytebool/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -76,32 +76,22 @@ pub struct vortex_bytebool::ByteBoolData

impl vortex_bytebool::ByteBoolData

pub fn vortex_bytebool::ByteBoolData::as_slice(&self) -> &[bool]

pub fn vortex_bytebool::ByteBoolData::buffer(&self) -> &vortex_array::buffer::BufferHandle

pub fn vortex_bytebool::ByteBoolData::from_vec<V: core::convert::Into<vortex_array::validity::Validity>>(data: alloc::vec::Vec<bool>, validity: V) -> Self

pub fn vortex_bytebool::ByteBoolData::is_empty(&self) -> bool

pub fn vortex_bytebool::ByteBoolData::len(&self) -> usize

pub fn vortex_bytebool::ByteBoolData::new(buffer: vortex_array::buffer::BufferHandle, validity: vortex_array::validity::Validity) -> Self
pub fn vortex_bytebool::ByteBoolData::new(buffer: vortex_array::buffer::BufferHandle) -> Self

pub fn vortex_bytebool::ByteBoolData::truthy_bytes(&self) -> &[u8]

pub fn vortex_bytebool::ByteBoolData::validate(buffer: &vortex_array::buffer::BufferHandle, validity: &vortex_array::validity::Validity, dtype: &vortex_array::dtype::DType, len: usize) -> vortex_error::VortexResult<()>

impl core::clone::Clone for vortex_bytebool::ByteBoolData

pub fn vortex_bytebool::ByteBoolData::clone(&self) -> vortex_bytebool::ByteBoolData

impl core::convert::From<alloc::vec::Vec<bool>> for vortex_bytebool::ByteBoolData

pub fn vortex_bytebool::ByteBoolData::from(value: alloc::vec::Vec<bool>) -> Self

impl core::convert::From<alloc::vec::Vec<core::option::Option<bool>>> for vortex_bytebool::ByteBoolData

pub fn vortex_bytebool::ByteBoolData::from(value: alloc::vec::Vec<core::option::Option<bool>>) -> Self

impl core::fmt::Debug for vortex_bytebool::ByteBoolData

pub fn vortex_bytebool::ByteBoolData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
Expand Down
91 changes: 30 additions & 61 deletions encodings/bytebool/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use vortex_array::vtable::VTable;
use vortex_array::vtable::ValidityVTable;
use vortex_array::vtable::child_to_validity;
use vortex_array::vtable::validity_to_child;
use vortex_buffer::BitBuffer;
use vortex_buffer::BitBufferMut;
use vortex_buffer::ByteBuffer;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
Expand Down Expand Up @@ -131,7 +131,7 @@ impl VTable for ByteBool {
}
let buffer = buffers[0].clone();

let data = ByteBoolData::new(buffer, validity.clone());
let data = ByteBoolData::new(buffer);
let slots = ByteBoolData::make_slots(&validity, len);
Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
}
Expand All @@ -149,7 +149,8 @@ impl VTable for ByteBool {
}

fn execute(array: Array<Self>, _ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
let boolean_buffer = BitBuffer::from(array.as_slice());
// convert truthy values to set/unset bits
let boolean_buffer = BitBufferMut::from(array.truthy_bytes()).freeze();
let validity = array.validity()?;
Ok(ExecutionResult::done(
BoolArray::new(boolean_buffer, validity).into_array(),
Expand Down Expand Up @@ -198,9 +199,17 @@ pub struct ByteBool;

impl ByteBool {
pub fn new(buffer: BufferHandle, validity: Validity) -> ByteBoolArray {
if let Some(len) = validity.maybe_len() {
assert_eq!(
buffer.len(),
len,
"ByteBool validity and bytes must have same length"
);
}
let dtype = DType::Bool(validity.nullability());

let slots = ByteBoolData::make_slots(&validity, buffer.len());
let data = ByteBoolData::new(buffer, validity);
let data = ByteBoolData::new(buffer);
let len = data.len();
unsafe {
Array::from_parts_unchecked(
Expand All @@ -212,29 +221,22 @@ impl ByteBool {
/// Construct a [`ByteBoolArray`] from a `Vec<bool>` and validity.
pub fn from_vec<V: Into<Validity>>(data: Vec<bool>, validity: V) -> ByteBoolArray {
let validity = validity.into();
let data = ByteBoolData::from_vec(data, validity.clone());
let dtype = DType::Bool(validity.nullability());
let len = data.len();
let slots = ByteBoolData::make_slots(&validity, len);
unsafe {
Array::from_parts_unchecked(
ArrayParts::new(ByteBool, dtype, len, data).with_slots(slots),
)
}
// NOTE: this will not cause allocation on release builds
let bytes: Vec<u8> = data.into_iter().map(|b| b as u8).collect();
let handle = BufferHandle::new_host(ByteBuffer::from(bytes));
ByteBool::new(handle, validity)
}

/// Construct a [`ByteBoolArray`] from optional bools.
pub fn from_option_vec(data: Vec<Option<bool>>) -> ByteBoolArray {
let validity = Validity::from_iter(data.iter().map(|v| v.is_some()));
let data = ByteBoolData::from(data);
let dtype = DType::Bool(validity.nullability());
let len = data.len();
let slots = ByteBoolData::make_slots(&validity, len);
unsafe {
Array::from_parts_unchecked(
ArrayParts::new(ByteBool, dtype, len, data).with_slots(slots),
)
}
// NOTE: this will not cause allocation on release builds
let bytes: Vec<u8> = data
.into_iter()
.map(|b| b.unwrap_or_default() as u8)
.collect();
let handle = BufferHandle::new_host(ByteBuffer::from(bytes));
ByteBool::new(handle, validity)
}
}

Expand Down Expand Up @@ -265,17 +267,7 @@ impl ByteBoolData {
vec![validity_to_child(validity, len)]
}

pub fn new(buffer: BufferHandle, validity: Validity) -> Self {
let length = buffer.len();
if let Some(vlen) = validity.maybe_len()
&& length != vlen
{
vortex_panic!(
"Buffer length ({}) does not match validity length ({})",
length,
vlen
);
}
pub fn new(buffer: BufferHandle) -> Self {
Self { buffer }
}

Expand All @@ -289,21 +281,15 @@ impl ByteBoolData {
self.buffer.len() == 0
}

// TODO(ngates): deprecate construction from vec
pub fn from_vec<V: Into<Validity>>(data: Vec<bool>, validity: V) -> Self {
let validity = validity.into();
// SAFETY: we are transmuting a Vec<bool> into a Vec<u8>
let data: Vec<u8> = unsafe { std::mem::transmute(data) };
Self::new(BufferHandle::new_host(ByteBuffer::from(data)), validity)
}

pub fn buffer(&self) -> &BufferHandle {
&self.buffer
}

pub fn as_slice(&self) -> &[bool] {
// Safety: The internal buffer contains byte-sized bools
unsafe { std::mem::transmute(self.buffer().as_host().as_slice()) }
/// Get access to the underlying 8-bit truthy values.
///
/// The zero byte indicates `false`, and any non-zero byte is a `true`.
pub fn truthy_bytes(&self) -> &[u8] {
self.buffer().as_host().as_slice()
}
}

Expand All @@ -326,23 +312,6 @@ impl OperationsVTable<ByteBool> for ByteBool {
}
}

impl From<Vec<bool>> for ByteBoolData {
fn from(value: Vec<bool>) -> Self {
Self::from_vec(value, Validity::AllValid)
}
}

impl From<Vec<Option<bool>>> for ByteBoolData {
fn from(value: Vec<Option<bool>>) -> Self {
let validity = Validity::from_iter(value.iter().map(|v| v.is_some()));

// This doesn't reallocate, and the compiler even vectorizes it
let data = value.into_iter().map(Option::unwrap_or_default).collect();

Self::from_vec(data, validity)
}
}

#[cfg(test)]
mod tests {
use vortex_array::ArrayContext;
Expand Down
14 changes: 9 additions & 5 deletions encodings/bytebool/src/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ use vortex_array::ExecutionCtx;
use vortex_array::IntoArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::dict::TakeExecute;
use vortex_array::buffer::BufferHandle;
use vortex_array::dtype::DType;
use vortex_array::match_each_integer_ptype;
use vortex_array::scalar_fn::fns::cast::CastReduce;
use vortex_array::scalar_fn::fns::mask::MaskReduce;
use vortex_array::validity::Validity;
use vortex_buffer::ByteBuffer;
use vortex_error::VortexResult;

use super::ByteBool;
Expand Down Expand Up @@ -58,23 +60,25 @@ impl TakeExecute for ByteBool {
ctx: &mut ExecutionCtx,
) -> VortexResult<Option<ArrayRef>> {
let indices = indices.clone().execute::<PrimitiveArray>(ctx)?;
let bools = array.as_slice();
let values = array.truthy_bytes();

// This handles combining validity from both source array and nullable indices
let validity = array.validity()?.take(&indices.clone().into_array())?;

let taken_bools = match_each_integer_ptype!(indices.ptype(), |I| {
let taken = match_each_integer_ptype!(indices.ptype(), |I| {
indices
.as_slice::<I>()
.iter()
.map(|&idx| {
let idx: usize = idx.as_();
bools[idx]
values[idx]
})
.collect::<Vec<bool>>()
.collect::<ByteBuffer>()
});

Ok(Some(ByteBool::from_vec(taken_bools, validity).into_array()))
Ok(Some(
ByteBool::new(BufferHandle::new_host(taken), validity).into_array(),
))
}
}

Expand Down
39 changes: 39 additions & 0 deletions encodings/bytebool/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,45 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! A Vortex encoding that mirrors Arrow's [8-bit Boolean canonical extension type][spec].
//!
//! Each element is stored as a single byte. The zero byte represents `false` and any
//! non-zero byte represents `true`, matching the truthy semantics of the Arrow spec. This
//! trades 8x the storage of the bit-packed `Bool` layout for cheaper per-byte access —
//! useful when data arrives from a C ABI or other source that already emits byte-wide
//! booleans. On execution the array materializes into the standard bit-packed
//! [`BoolArray`][vortex_array::arrays::BoolArray].
//!
//! # Examples
//!
//! Any non-zero byte in the backing buffer is treated as `true` when the array executes
//! to a canonical [`BoolArray`][vortex_array::arrays::BoolArray]:
//!
//! ```
//! # use vortex_array::{IntoArray, LEGACY_SESSION, VortexSessionExecute};
//! # use vortex_array::arrays::BoolArray;
//! # use vortex_array::arrays::bool::BoolArrayExt;
//! # use vortex_array::buffer::BufferHandle;
//! # use vortex_array::validity::Validity;
//! # use vortex_buffer::ByteBuffer;
//! # use vortex_bytebool::ByteBool;
//! # use vortex_error::VortexResult;
//! # fn main() -> VortexResult<()> {
//! # let mut ctx = LEGACY_SESSION.create_execution_ctx();
//! let handle = BufferHandle::new_host(ByteBuffer::from(vec![0u8, 1, 42, 0]));
//! let array = ByteBool::new(handle, Validity::NonNullable);
//!
//! let bits = array.into_array().execute::<BoolArray>(&mut ctx)?.to_bit_buffer();
//! assert!(!bits.value(0));
//! assert!(bits.value(1));
//! assert!(bits.value(2)); // byte 42 is truthy
//! assert!(!bits.value(3));
//! # Ok(())
//! # }
//! ```
//!
//! [spec]: https://arrow.apache.org/docs/format/CanonicalExtensions.html#bit-boolean

pub use array::*;

mod array;
Expand Down
4 changes: 4 additions & 0 deletions vortex-buffer/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,10 @@ impl core::convert::From<&[bool]> for vortex_buffer::BitBufferMut

pub fn vortex_buffer::BitBufferMut::from(value: &[bool]) -> Self

impl core::convert::From<&[u8]> for vortex_buffer::BitBufferMut

pub fn vortex_buffer::BitBufferMut::from(value: &[u8]) -> Self

impl core::convert::From<alloc::vec::Vec<bool>> for vortex_buffer::BitBufferMut

pub fn vortex_buffer::BitBufferMut::from(value: alloc::vec::Vec<bool>) -> Self
Expand Down
7 changes: 7 additions & 0 deletions vortex-buffer/src/bit/buf_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,13 @@ impl From<&[bool]> for BitBufferMut {
}
}

// allow building a buffer from a set of truthy byte values.
impl From<&[u8]> for BitBufferMut {
fn from(value: &[u8]) -> Self {
BitBufferMut::collect_bool(value.len(), |i| value[i] > 0)
}
}

impl From<Vec<bool>> for BitBufferMut {
fn from(value: Vec<bool>) -> Self {
value.as_slice().into()
Expand Down
Loading