diff --git a/encodings/alp/src/alp/plugin.rs b/encodings/alp/src/alp/plugin.rs index 3b2c36ebd02..2ac3ac85c54 100644 --- a/encodings/alp/src/alp/plugin.rs +++ b/encodings/alp/src/alp/plugin.rs @@ -133,7 +133,7 @@ mod tests { let array = alp_encoded.as_array(); - let metadata = array.metadata(&SESSION)?.unwrap_or_default(); + let metadata = SESSION.array_serialize(array)?.unwrap(); let children = array.children(); let buffers = array .buffers() @@ -182,7 +182,7 @@ mod tests { let array = alp_encoded.as_array(); - let metadata = array.metadata(&SESSION)?.unwrap_or_default(); + let metadata = SESSION.array_serialize(array)?.unwrap(); let children = array.children(); let buffers = array .buffers() @@ -213,7 +213,7 @@ mod tests { fn primitive_array_returns_error() { let array = PrimitiveArray::from_iter([1.0f64, 2.0, 3.0]).into_array(); - let metadata = array.metadata(&SESSION).unwrap().unwrap_or_default(); + let metadata = SESSION.array_serialize(&array).unwrap().unwrap(); let children = array.children(); let buffers = array .buffers() diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index 28205d973be..e98c5bd656d 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -352,7 +352,6 @@ impl From>> for ByteBoolData { mod tests { use vortex_array::ArrayContext; use vortex_array::IntoArray; - use vortex_array::LEGACY_SESSION; use vortex_array::assert_arrays_eq; use vortex_array::serde::SerializeOptions; use vortex_array::serde::SerializedArray; @@ -407,7 +406,7 @@ mod tests { let serialized = array .clone() .into_array() - .serialize(&ctx, &LEGACY_SESSION, &SerializeOptions::default()) + .serialize(&ctx, &session, &SerializeOptions::default()) .unwrap(); let mut concat = ByteBufferMut::empty(); diff --git a/encodings/fastlanes/src/bitpacking/plugin.rs b/encodings/fastlanes/src/bitpacking/plugin.rs index fec101ff895..49511a10748 100644 --- a/encodings/fastlanes/src/bitpacking/plugin.rs +++ b/encodings/fastlanes/src/bitpacking/plugin.rs @@ -133,7 +133,7 @@ mod tests { let array = bitpacked.as_array(); - let metadata = array.metadata(&SESSION)?.unwrap_or_default(); + let metadata = SESSION.array_serialize(array)?.unwrap(); let children = array.children(); let buffers = array .buffers() @@ -182,7 +182,7 @@ mod tests { let array = bitpacked.as_array(); - let metadata = array.metadata(&SESSION)?.unwrap_or_default(); + let metadata = SESSION.array_serialize(array)?.unwrap(); let children = array.children(); let buffers = array .buffers() @@ -212,7 +212,7 @@ mod tests { fn primitive_array_returns_error() -> VortexResult<()> { let array = PrimitiveArray::from_iter([1i32, 2, 3]).into_array(); - let metadata = array.metadata(&SESSION)?.unwrap_or_default(); + let metadata = SESSION.array_serialize(&array)?.unwrap(); let children = array.children(); let buffers = array .buffers() diff --git a/encodings/parquet-variant/src/vtable.rs b/encodings/parquet-variant/src/vtable.rs index 7dc3b3c90b2..75ea8036c9a 100644 --- a/encodings/parquet-variant/src/vtable.rs +++ b/encodings/parquet-variant/src/vtable.rs @@ -244,13 +244,13 @@ mod tests { use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::arrays::VarBinViewArray; - use vortex_array::arrays::Variant; use vortex_array::arrays::VariantArray; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; use vortex_array::serde::SerializeOptions; use vortex_array::serde::SerializedArray; + use vortex_array::session::ArraySession; use vortex_array::session::ArraySessionExt; use vortex_array::validity::Validity; use vortex_buffer::BitBuffer; @@ -261,11 +261,14 @@ mod tests { use crate::ParquetVariant; use crate::array::ParquetVariantArrayExt; + fn roundtrip(array: ArrayRef) -> ArrayRef { let dtype = array.dtype().clone(); let len = array.len(); - let session = VortexSession::empty().with::(); + let session = VortexSession::empty().with::(); + session.arrays().register(ParquetVariant); + let ctx = ArrayContext::empty(); let serialized = array .serialize(&ctx, &session, &SerializeOptions::default()) @@ -276,8 +279,6 @@ mod tests { concat.extend_from_slice(buf.as_ref()); } let concat = concat.freeze(); - session.arrays().register(ParquetVariant); - session.arrays().register(Variant); let parts = SerializedArray::try_from(concat).unwrap(); parts diff --git a/encodings/pco/src/lib.rs b/encodings/pco/src/lib.rs index 924322c2a7e..fcf9a9397fb 100644 --- a/encodings/pco/src/lib.rs +++ b/encodings/pco/src/lib.rs @@ -5,8 +5,6 @@ mod array; mod compute; mod rules; mod slice; -#[cfg(test)] -mod test; pub use array::*; @@ -35,3 +33,6 @@ pub struct PcoMetadata { #[prost(message, repeated, tag = "2")] pub chunks: Vec, } + +#[cfg(test)] +mod tests; diff --git a/encodings/pco/src/test.rs b/encodings/pco/src/tests.rs similarity index 99% rename from encodings/pco/src/test.rs rename to encodings/pco/src/tests.rs index f674fb7d1bb..d694a0efdbe 100644 --- a/encodings/pco/src/test.rs +++ b/encodings/pco/src/tests.rs @@ -184,7 +184,7 @@ fn test_serde() -> VortexResult<()> { let bytes = pco .serialize( &context, - &LEGACY_SESSION, + &SESSION, &SerializeOptions { offset: 0, include_padding: true, diff --git a/encodings/runend/src/arrow.rs b/encodings/runend/src/arrow.rs index 564c1d06cb2..206a014fa7f 100644 --- a/encodings/runend/src/arrow.rs +++ b/encodings/runend/src/arrow.rs @@ -90,6 +90,7 @@ mod tests { use vortex_array::search_sorted::SearchSorted; use vortex_array::search_sorted::SearchSortedSide; use vortex_array::session::ArraySession; + use vortex_array::session::ArraySessionExt; use vortex_array::validity::Validity; use vortex_buffer::Buffer; use vortex_buffer::buffer; @@ -99,8 +100,11 @@ mod tests { use crate::RunEnd; use crate::ops::find_slice_end_index; - static SESSION: LazyLock = - LazyLock::new(|| VortexSession::empty().with::()); + static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + session.arrays().register(RunEnd); + session + }); fn decode_run_array( array: &RunArray, diff --git a/encodings/zigzag/public-api.lock b/encodings/zigzag/public-api.lock index 299344043ad..e655f900189 100644 --- a/encodings/zigzag/public-api.lock +++ b/encodings/zigzag/public-api.lock @@ -2,122 +2,46 @@ pub mod vortex_zigzag pub struct vortex_zigzag::ZigZag -impl vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::try_new(encoded: vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult - impl core::clone::Clone for vortex_zigzag::ZigZag pub fn vortex_zigzag::ZigZag::clone(&self) -> vortex_zigzag::ZigZag -impl core::fmt::Debug for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result - -impl vortex_array::array::vtable::VTable for vortex_zigzag::ZigZag - -pub type vortex_zigzag::ZigZag::ArrayData = vortex_zigzag::ZigZagData - -pub type vortex_zigzag::ZigZag::OperationsVTable = vortex_zigzag::ZigZag - -pub type vortex_zigzag::ZigZag::ValidityVTable = vortex_array::array::vtable::validity::ValidityVTableFromChild - -pub fn vortex_zigzag::ZigZag::buffer(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> vortex_array::buffer::BufferHandle - -pub fn vortex_zigzag::ZigZag::buffer_name(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> core::option::Option - -pub fn vortex_zigzag::ZigZag::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren, _session: &vortex_session::VortexSession) -> vortex_error::VortexResult> - -pub fn vortex_zigzag::ZigZag::execute(array: vortex_array::array::typed::Array, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult - -pub fn vortex_zigzag::ZigZag::execute_parent(array: vortex_array::array::view::ArrayView<'_, Self>, parent: &vortex_array::array::erased::ArrayRef, child_idx: usize, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> - -pub fn vortex_zigzag::ZigZag::id(&self) -> vortex_array::array::ArrayId - -pub fn vortex_zigzag::ZigZag::nbuffers(_array: vortex_array::array::view::ArrayView<'_, Self>) -> usize - -pub fn vortex_zigzag::ZigZag::reduce_parent(array: vortex_array::array::view::ArrayView<'_, Self>, parent: &vortex_array::array::erased::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> - -pub fn vortex_zigzag::ZigZag::serialize(_array: vortex_array::array::view::ArrayView<'_, Self>, _session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> - -pub fn vortex_zigzag::ZigZag::slot_name(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> alloc::string::String - -pub fn vortex_zigzag::ZigZag::validate(&self, _data: &Self::ArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> - -impl vortex_array::array::vtable::operations::OperationsVTable for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::scalar_at(array: vortex_array::array::view::ArrayView<'_, vortex_zigzag::ZigZag>, index: usize, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult - -impl vortex_array::array::vtable::validity::ValidityChild for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::validity_child(array: vortex_array::array::view::ArrayView<'_, vortex_zigzag::ZigZag>) -> vortex_array::array::erased::ArrayRef - -impl vortex_array::arrays::dict::take::TakeExecute for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::take(array: vortex_array::array::view::ArrayView<'_, Self>, indices: &vortex_array::array::erased::ArrayRef, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> - -impl vortex_array::arrays::filter::kernel::FilterReduce for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::filter(array: vortex_array::array::view::ArrayView<'_, Self>, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> - -impl vortex_array::arrays::slice::SliceReduce for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::slice(array: vortex_array::array::view::ArrayView<'_, Self>, range: core::ops::range::Range) -> vortex_error::VortexResult> - -impl vortex_array::scalar_fn::fns::cast::kernel::CastReduce for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::cast(array: vortex_array::array::view::ArrayView<'_, Self>, dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult> - -impl vortex_array::scalar_fn::fns::mask::kernel::MaskReduce for vortex_zigzag::ZigZag - -pub fn vortex_zigzag::ZigZag::mask(array: vortex_array::array::view::ArrayView<'_, Self>, mask: &vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult> - -pub struct vortex_zigzag::ZigZagData - -impl vortex_zigzag::ZigZagData - -pub fn vortex_zigzag::ZigZagData::new() -> Self - -pub fn vortex_zigzag::ZigZagData::try_new(encoded_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult - -impl core::clone::Clone for vortex_zigzag::ZigZagData - -pub fn vortex_zigzag::ZigZagData::clone(&self) -> vortex_zigzag::ZigZagData +impl vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable for vortex_zigzag::ZigZag -impl core::default::Default for vortex_zigzag::ZigZagData +pub fn vortex_zigzag::ZigZag::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], children: &dyn vortex_array::serde::ArrayChildren, _session: &vortex_session::VortexSession) -> vortex_error::VortexResult> -pub fn vortex_zigzag::ZigZagData::default() -> Self +pub fn vortex_zigzag::ZigZag::serialize(&self, _view: &vortex_array::arrays::scalar_fn::vtable::ScalarFnArrayView<'_, Self>, _session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> -impl core::fmt::Debug for vortex_zigzag::ZigZagData +impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_zigzag::ZigZag -pub fn vortex_zigzag::ZigZagData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub type vortex_zigzag::ZigZag::Options = vortex_array::scalar_fn::vtable::EmptyOptions -impl core::fmt::Display for vortex_zigzag::ZigZagData +pub fn vortex_zigzag::ZigZag::arity(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions) -> vortex_array::scalar_fn::vtable::Arity -pub fn vortex_zigzag::ZigZagData::fmt(&self, _f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub fn vortex_zigzag::ZigZag::child_name(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, child_idx: usize) -> vortex_array::scalar_fn::vtable::ChildName -impl vortex_array::hash::ArrayEq for vortex_zigzag::ZigZagData +pub fn vortex_zigzag::ZigZag::deserialize(&self, _metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult -pub fn vortex_zigzag::ZigZagData::array_eq(&self, _other: &Self, _precision: vortex_array::hash::Precision) -> bool +pub fn vortex_zigzag::ZigZag::execute(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, args: &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult -impl vortex_array::hash::ArrayHash for vortex_zigzag::ZigZagData +pub fn vortex_zigzag::ZigZag::fmt_sql(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, expr: &vortex_array::expr::expression::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result -pub fn vortex_zigzag::ZigZagData::array_hash(&self, _state: &mut H, _precision: vortex_array::hash::Precision) +pub fn vortex_zigzag::ZigZag::id(&self) -> vortex_array::scalar_fn::ScalarFnId -pub trait vortex_zigzag::ZigZagArrayExt: vortex_array::array::typed::TypedArrayRef +pub fn vortex_zigzag::ZigZag::is_fallible(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions) -> bool -pub fn vortex_zigzag::ZigZagArrayExt::encoded(&self) -> &vortex_array::array::erased::ArrayRef +pub fn vortex_zigzag::ZigZag::is_null_sensitive(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions) -> bool -pub fn vortex_zigzag::ZigZagArrayExt::ptype(&self) -> vortex_array::dtype::ptype::PType +pub fn vortex_zigzag::ZigZag::return_dtype(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult -impl> vortex_zigzag::ZigZagArrayExt for T +pub fn vortex_zigzag::ZigZag::serialize(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions) -> vortex_error::VortexResult>> -pub fn T::encoded(&self) -> &vortex_array::array::erased::ArrayRef +pub fn vortex_zigzag::ZigZag::validity(&self, _options: &vortex_array::scalar_fn::vtable::EmptyOptions, expression: &vortex_array::expr::expression::Expression) -> vortex_error::VortexResult> -pub fn T::ptype(&self) -> vortex_array::dtype::ptype::PType +pub fn vortex_zigzag::initialize(session: &vortex_session::VortexSession) pub fn vortex_zigzag::zigzag_decode(parray: vortex_array::arrays::primitive::vtable::PrimitiveArray) -> vortex_array::arrays::primitive::vtable::PrimitiveArray -pub fn vortex_zigzag::zigzag_encode(parray: vortex_array::array::view::ArrayView<'_, vortex_array::arrays::primitive::vtable::Primitive>) -> vortex_error::VortexResult +pub fn vortex_zigzag::zigzag_encode(parray: vortex_array::array::view::ArrayView<'_, vortex_array::arrays::primitive::vtable::Primitive>) -> vortex_error::VortexResult -pub type vortex_zigzag::ZigZagArray = vortex_array::array::typed::Array +pub fn vortex_zigzag::zigzag_try_new(encoded: vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs deleted file mode 100644 index bc10192ce78..00000000000 --- a/encodings/zigzag/src/array.rs +++ /dev/null @@ -1,321 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::fmt::Display; -use std::fmt::Formatter; -use std::hash::Hasher; - -use vortex_array::Array; -use vortex_array::ArrayEq; -use vortex_array::ArrayHash; -use vortex_array::ArrayId; -use vortex_array::ArrayParts; -use vortex_array::ArrayRef; -use vortex_array::ArrayView; -use vortex_array::ExecutionCtx; -use vortex_array::ExecutionResult; -use vortex_array::IntoArray; -use vortex_array::Precision; -use vortex_array::TypedArrayRef; -use vortex_array::buffer::BufferHandle; -use vortex_array::dtype::DType; -use vortex_array::dtype::PType; -use vortex_array::match_each_unsigned_integer_ptype; -use vortex_array::scalar::Scalar; -use vortex_array::serde::ArrayChildren; -use vortex_array::vtable::OperationsVTable; -use vortex_array::vtable::VTable; -use vortex_array::vtable::ValidityChild; -use vortex_array::vtable::ValidityVTableFromChild; -use vortex_error::VortexExpect; -use vortex_error::VortexResult; -use vortex_error::vortex_bail; -use vortex_error::vortex_ensure; -use vortex_error::vortex_panic; -use vortex_session::VortexSession; -use vortex_session::registry::CachedId; -use zigzag::ZigZag as ExternalZigZag; - -use crate::compute::ZigZagEncoded; -use crate::kernel::PARENT_KERNELS; -use crate::rules::RULES; -use crate::zigzag_decode; - -/// A [`ZigZag`]-encoded Vortex array. -pub type ZigZagArray = Array; - -impl VTable for ZigZag { - type ArrayData = ZigZagData; - - type OperationsVTable = Self; - type ValidityVTable = ValidityVTableFromChild; - - fn id(&self) -> ArrayId { - static ID: CachedId = CachedId::new("vortex.zigzag"); - *ID - } - - fn validate( - &self, - _data: &Self::ArrayData, - dtype: &DType, - len: usize, - slots: &[Option], - ) -> VortexResult<()> { - let encoded = slots[ENCODED_SLOT] - .as_ref() - .vortex_expect("ZigZagArray encoded slot"); - let expected_dtype = ZigZagData::dtype_from_encoded_dtype(encoded.dtype())?; - vortex_ensure!( - dtype == &expected_dtype, - "expected dtype {expected_dtype}, got {dtype}" - ); - vortex_ensure!( - encoded.len() == len, - "expected len {len}, got {}", - encoded.len() - ); - Ok(()) - } - - fn nbuffers(_array: ArrayView<'_, Self>) -> usize { - 0 - } - - fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle { - vortex_panic!("ZigZagArray buffer index {idx} out of bounds") - } - - fn buffer_name(_array: ArrayView<'_, Self>, idx: usize) -> Option { - vortex_panic!("ZigZagArray buffer_name index {idx} out of bounds") - } - - fn serialize( - _array: ArrayView<'_, Self>, - _session: &VortexSession, - ) -> VortexResult>> { - Ok(Some(vec![])) - } - - fn deserialize( - &self, - dtype: &DType, - len: usize, - metadata: &[u8], - _buffers: &[BufferHandle], - children: &dyn ArrayChildren, - _session: &VortexSession, - ) -> VortexResult> { - if !metadata.is_empty() { - vortex_bail!( - "ZigZagArray expects empty metadata, got {} bytes", - metadata.len() - ); - } - if children.len() != 1 { - vortex_bail!("Expected 1 child, got {}", children.len()); - } - - let ptype = PType::try_from(dtype)?; - let encoded_type = DType::Primitive(ptype.to_unsigned(), dtype.nullability()); - - let encoded = children.get(0, &encoded_type, len)?; - let slots = vec![Some(encoded.clone())]; - let data = ZigZagData::try_new(encoded.dtype())?; - Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) - } - - fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { - SLOT_NAMES[idx].to_string() - } - - fn execute(array: Array, ctx: &mut ExecutionCtx) -> VortexResult { - Ok(ExecutionResult::done( - zigzag_decode(array.encoded().clone().execute(ctx)?).into_array(), - )) - } - - fn reduce_parent( - array: ArrayView<'_, Self>, - parent: &ArrayRef, - child_idx: usize, - ) -> VortexResult> { - RULES.evaluate(array, parent, child_idx) - } - - fn execute_parent( - array: ArrayView<'_, Self>, - parent: &ArrayRef, - child_idx: usize, - ctx: &mut ExecutionCtx, - ) -> VortexResult> { - PARENT_KERNELS.execute(array, parent, child_idx, ctx) - } -} - -impl ArrayHash for ZigZagData { - fn array_hash(&self, _state: &mut H, _precision: Precision) {} -} - -impl ArrayEq for ZigZagData { - fn array_eq(&self, _other: &Self, _precision: Precision) -> bool { - true - } -} - -/// The zigzag-encoded values (signed integers mapped to unsigned). -pub(super) const ENCODED_SLOT: usize = 0; -pub(super) const NUM_SLOTS: usize = 1; -pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["encoded"]; - -#[derive(Clone, Debug)] -pub struct ZigZagData {} - -impl Display for ZigZagData { - fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { - Ok(()) - } -} - -pub trait ZigZagArrayExt: TypedArrayRef { - fn encoded(&self) -> &ArrayRef { - self.as_ref().slots()[ENCODED_SLOT] - .as_ref() - .vortex_expect("ZigZagArray encoded slot") - } - - fn ptype(&self) -> PType { - PType::try_from(self.encoded().dtype()) - .vortex_expect("ZigZagArray encoded dtype") - .to_signed() - } -} - -impl> ZigZagArrayExt for T {} - -#[derive(Clone, Debug)] -pub struct ZigZag; - -impl ZigZag { - /// Construct a new [`ZigZagArray`] from an encoded unsigned integer array. - pub fn try_new(encoded: ArrayRef) -> VortexResult { - let dtype = ZigZagData::dtype_from_encoded_dtype(encoded.dtype())?; - let len = encoded.len(); - let slots = vec![Some(encoded.clone())]; - let data = ZigZagData::try_new(encoded.dtype())?; - Ok(unsafe { - Array::from_parts_unchecked(ArrayParts::new(ZigZag, dtype, len, data).with_slots(slots)) - }) - } -} - -impl ZigZagData { - fn dtype_from_encoded_dtype(encoded_dtype: &DType) -> VortexResult { - Ok(DType::from(PType::try_from(encoded_dtype)?.to_signed()) - .with_nullability(encoded_dtype.nullability())) - } - - pub fn new() -> Self { - Self {} - } - - pub fn try_new(encoded_dtype: &DType) -> VortexResult { - if !encoded_dtype.is_unsigned_int() { - vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype); - } - - Self::dtype_from_encoded_dtype(encoded_dtype)?; - - Ok(Self {}) - } -} - -impl Default for ZigZagData { - fn default() -> Self { - Self::new() - } -} - -impl OperationsVTable for ZigZag { - fn scalar_at( - array: ArrayView<'_, ZigZag>, - index: usize, - _ctx: &mut ExecutionCtx, - ) -> VortexResult { - let scalar = array.encoded().scalar_at(index)?; - if scalar.is_null() { - return scalar.primitive_reinterpret_cast(ZigZagArrayExt::ptype(&array)); - } - - let pscalar = scalar.as_primitive(); - Ok(match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| { - Scalar::primitive( - <

::Int>::decode( - pscalar - .typed_value::

() - .vortex_expect("zigzag corruption"), - ), - array.dtype().nullability(), - ) - })) - } -} - -impl ValidityChild for ZigZag { - fn validity_child(array: ArrayView<'_, ZigZag>) -> ArrayRef { - array.encoded().clone() - } -} - -#[cfg(test)] -mod test { - use vortex_array::IntoArray; - use vortex_array::ToCanonical; - use vortex_array::scalar::Scalar; - use vortex_buffer::buffer; - - use super::*; - use crate::zigzag_encode; - - #[test] - fn test_compute_statistics() -> VortexResult<()> { - let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10] - .into_array() - .to_primitive(); - let zigzag = zigzag_encode(array.as_view())?; - - assert_eq!( - zigzag.statistics().compute_max::(), - array.statistics().compute_max::() - ); - assert_eq!( - zigzag.statistics().compute_null_count(), - array.statistics().compute_null_count() - ); - assert_eq!( - zigzag.statistics().compute_is_constant(), - array.statistics().compute_is_constant() - ); - - let sliced = zigzag.slice(0..2).unwrap(); - let sliced = sliced.as_::(); - assert_eq!( - sliced.array().scalar_at(sliced.len() - 1).unwrap(), - Scalar::from(-5i32) - ); - - assert_eq!( - sliced.statistics().compute_min::(), - array.statistics().compute_min::() - ); - assert_eq!( - sliced.statistics().compute_null_count(), - array.statistics().compute_null_count() - ); - assert_eq!( - sliced.statistics().compute_is_constant(), - array.statistics().compute_is_constant() - ); - Ok(()) - } -} diff --git a/encodings/zigzag/src/compress.rs b/encodings/zigzag/src/compress.rs index 3c96c8f5778..5ac69466087 100644 --- a/encodings/zigzag/src/compress.rs +++ b/encodings/zigzag/src/compress.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex_array::ArrayRef; use vortex_array::ArrayView; use vortex_array::IntoArray; use vortex_array::arrays::Primitive; @@ -15,12 +16,14 @@ use vortex_error::vortex_bail; use vortex_error::vortex_panic; use zigzag::ZigZag as ExternalZigZag; -use crate::ZigZag; -use crate::ZigZagArray; -pub fn zigzag_encode(parray: ArrayView<'_, Primitive>) -> VortexResult { - let parray = parray.into_owned(); +use crate::vtable::zigzag_try_new; + +pub fn zigzag_encode(parray: ArrayView<'_, Primitive>) -> VortexResult { let validity = parray.validity()?; - let encoded = match parray.ptype() { + let ptype = parray.ptype(); + let parray = parray.into_owned(); + + let encoded = match ptype { PType::I8 => zigzag_encode_primitive::(parray.into_buffer_mut(), validity), PType::I16 => zigzag_encode_primitive::(parray.into_buffer_mut(), validity), PType::I32 => zigzag_encode_primitive::(parray.into_buffer_mut(), validity), @@ -30,7 +33,8 @@ pub fn zigzag_encode(parray: ArrayView<'_, Primitive>) -> VortexResult( @@ -77,19 +81,15 @@ where #[cfg(test)] mod test { - use vortex_array::IntoArray; use vortex_array::ToCanonical; use vortex_array::assert_arrays_eq; use super::*; - use crate::ZigZag; #[test] fn test_compress_i8() { - let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i8..100).as_view()) - .unwrap() - .into_array(); - assert!(compressed.is::()); + let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i8..100).as_view()).unwrap(); + assert_eq!(compressed.encoding_id().as_ref(), "vortex.zigzag"); assert_arrays_eq!( compressed.to_primitive(), PrimitiveArray::from_iter(-100_i8..100) @@ -97,10 +97,8 @@ mod test { } #[test] fn test_compress_i16() { - let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i16..100).as_view()) - .unwrap() - .into_array(); - assert!(compressed.is::()); + let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i16..100).as_view()).unwrap(); + assert_eq!(compressed.encoding_id().as_ref(), "vortex.zigzag"); assert_arrays_eq!( compressed.to_primitive(), PrimitiveArray::from_iter(-100_i16..100) @@ -108,10 +106,8 @@ mod test { } #[test] fn test_compress_i32() { - let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i32..100).as_view()) - .unwrap() - .into_array(); - assert!(compressed.is::()); + let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i32..100).as_view()).unwrap(); + assert_eq!(compressed.encoding_id().as_ref(), "vortex.zigzag"); assert_arrays_eq!( compressed.to_primitive(), PrimitiveArray::from_iter(-100_i32..100) @@ -119,10 +115,8 @@ mod test { } #[test] fn test_compress_i64() { - let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i64..100).as_view()) - .unwrap() - .into_array(); - assert!(compressed.is::()); + let compressed = zigzag_encode(PrimitiveArray::from_iter(-100_i64..100).as_view()).unwrap(); + assert_eq!(compressed.encoding_id().as_ref(), "vortex.zigzag"); assert_arrays_eq!( compressed.to_primitive(), PrimitiveArray::from_iter(-100_i64..100) diff --git a/encodings/zigzag/src/compute/cast.rs b/encodings/zigzag/src/compute/cast.rs deleted file mode 100644 index a71eee7a838..00000000000 --- a/encodings/zigzag/src/compute/cast.rs +++ /dev/null @@ -1,132 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_array::ArrayRef; -use vortex_array::ArrayView; -use vortex_array::IntoArray; -use vortex_array::builtins::ArrayBuiltins; -use vortex_array::dtype::DType; -use vortex_array::scalar_fn::fns::cast::CastReduce; -use vortex_error::VortexResult; - -use crate::ZigZag; -use crate::array::ZigZagArrayExt; -impl CastReduce for ZigZag { - fn cast(array: ArrayView<'_, Self>, dtype: &DType) -> VortexResult> { - if !dtype.is_signed_int() { - return Ok(None); - } - - let new_encoded_dtype = - DType::Primitive(dtype.as_ptype().to_unsigned(), dtype.nullability()); - let new_encoded = array.encoded().cast(new_encoded_dtype)?; - Ok(Some(ZigZag::try_new(new_encoded)?.into_array())) - } -} - -#[cfg(test)] -mod tests { - use rstest::rstest; - use vortex_array::IntoArray; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::assert_arrays_eq; - use vortex_array::builtins::ArrayBuiltins; - use vortex_array::compute::conformance::cast::test_cast_conformance; - use vortex_array::dtype::DType; - use vortex_array::dtype::Nullability; - use vortex_array::dtype::PType; - - use crate::ZigZagArray; - use crate::zigzag_encode; - - #[test] - fn test_cast_zigzag_i32_to_i64() { - let values = PrimitiveArray::from_iter([-100i32, -1, 0, 1, 100]); - let zigzag = zigzag_encode(values.as_view()).unwrap(); - - let casted = zigzag - .into_array() - .cast(DType::Primitive(PType::I64, Nullability::NonNullable)) - .unwrap(); - assert_eq!( - casted.dtype(), - &DType::Primitive(PType::I64, Nullability::NonNullable) - ); - - // Verify the result is still a ZigZagArray (not decoded) - // Note: The result might be wrapped, so let's check the encoding ID - assert_eq!( - casted.encoding_id().as_ref(), - "vortex.zigzag", - "Cast should preserve ZigZag encoding" - ); - - assert_arrays_eq!(casted, PrimitiveArray::from_iter([-100i64, -1, 0, 1, 100])); - } - - #[test] - fn test_cast_zigzag_width_changes() { - // Test i32 to i16 (narrowing) - let values = PrimitiveArray::from_iter([100i32, -50, 0, 25, -100]); - let zigzag = zigzag_encode(values.as_view()).unwrap(); - - let casted = zigzag - .into_array() - .cast(DType::Primitive(PType::I16, Nullability::NonNullable)) - .unwrap(); - assert_eq!( - casted.encoding_id().as_ref(), - "vortex.zigzag", - "Should remain ZigZag encoded" - ); - - assert_arrays_eq!( - casted, - PrimitiveArray::from_iter([100i16, -50, 0, 25, -100]) - ); - - // Test i16 to i64 (widening) - let values16 = PrimitiveArray::from_iter([1000i16, -500, 0, 250, -1000]); - let zigzag16 = zigzag_encode(values16.as_view()).unwrap(); - - let casted64 = zigzag16 - .into_array() - .cast(DType::Primitive(PType::I64, Nullability::NonNullable)) - .unwrap(); - assert_eq!( - casted64.encoding_id().as_ref(), - "vortex.zigzag", - "Should remain ZigZag encoded" - ); - - assert_arrays_eq!( - casted64, - PrimitiveArray::from_iter([1000i64, -500, 0, 250, -1000]) - ); - } - - #[test] - fn test_cast_zigzag_nullable() { - let values = - PrimitiveArray::from_option_iter([Some(-10i32), None, Some(0), Some(10), None]); - let zigzag = zigzag_encode(values.as_view()).unwrap(); - - let casted = zigzag - .into_array() - .cast(DType::Primitive(PType::I64, Nullability::Nullable)) - .unwrap(); - assert_eq!( - casted.dtype(), - &DType::Primitive(PType::I64, Nullability::Nullable) - ); - } - - #[rstest] - #[case(zigzag_encode(PrimitiveArray::from_iter([-100i32, -50, -1, 0, 1, 50, 100]).as_view()).unwrap())] - #[case(zigzag_encode(PrimitiveArray::from_iter([-1000i64, -1, 0, 1, 1000]).as_view()).unwrap())] - #[case(zigzag_encode(PrimitiveArray::from_option_iter([Some(-5i16), None, Some(0), Some(5), None]).as_view()).unwrap())] - #[case(zigzag_encode(PrimitiveArray::from_iter([i32::MIN, -1, 0, 1, i32::MAX]).as_view()).unwrap())] - fn test_cast_zigzag_conformance(#[case] array: ZigZagArray) { - test_cast_conformance(&array.into_array()); - } -} diff --git a/encodings/zigzag/src/compute/mod.rs b/encodings/zigzag/src/compute/mod.rs deleted file mode 100644 index c038e98d629..00000000000 --- a/encodings/zigzag/src/compute/mod.rs +++ /dev/null @@ -1,222 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -mod cast; - -use vortex_array::ArrayRef; -use vortex_array::ArrayView; -use vortex_array::ExecutionCtx; -use vortex_array::IntoArray; -use vortex_array::arrays::dict::TakeExecute; -use vortex_array::arrays::filter::FilterReduce; -use vortex_array::arrays::scalar_fn::ScalarFnFactoryExt; -use vortex_array::scalar_fn::EmptyOptions; -use vortex_array::scalar_fn::fns::mask::Mask as MaskExpr; -use vortex_array::scalar_fn::fns::mask::MaskReduce; -use vortex_error::VortexResult; -use vortex_mask::Mask; - -use crate::ZigZag; -use crate::array::ZigZagArrayExt; - -impl FilterReduce for ZigZag { - fn filter(array: ArrayView<'_, Self>, mask: &Mask) -> VortexResult> { - let encoded = array.encoded().filter(mask.clone())?; - Ok(Some(ZigZag::try_new(encoded)?.into_array())) - } -} - -impl TakeExecute for ZigZag { - fn take( - array: ArrayView<'_, Self>, - indices: &ArrayRef, - _ctx: &mut ExecutionCtx, - ) -> VortexResult> { - let encoded = array.encoded().take(indices.clone())?; - Ok(Some(ZigZag::try_new(encoded)?.into_array())) - } -} - -impl MaskReduce for ZigZag { - fn mask(array: ArrayView<'_, Self>, mask: &ArrayRef) -> VortexResult> { - let masked_encoded = MaskExpr.try_new_array( - array.encoded().len(), - EmptyOptions, - [array.encoded().clone(), mask.clone()], - )?; - Ok(Some(ZigZag::try_new(masked_encoded)?.into_array())) - } -} - -pub(crate) trait ZigZagEncoded { - type Int: zigzag::ZigZag; -} - -impl ZigZagEncoded for u8 { - type Int = i8; -} - -impl ZigZagEncoded for u16 { - type Int = i16; -} - -impl ZigZagEncoded for u32 { - type Int = i32; -} - -impl ZigZagEncoded for u64 { - type Int = i64; -} - -#[cfg(test)] -mod tests { - use rstest::rstest; - use vortex_array::ArrayRef; - use vortex_array::IntoArray; - use vortex_array::ToCanonical; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::assert_arrays_eq; - use vortex_array::compute::conformance::binary_numeric::test_binary_numeric_array; - use vortex_array::compute::conformance::consistency::test_array_consistency; - use vortex_array::dtype::Nullability; - use vortex_array::scalar::Scalar; - use vortex_array::validity::Validity; - use vortex_buffer::BitBuffer; - use vortex_buffer::buffer; - use vortex_error::VortexResult; - - use crate::ZigZagArray; - use crate::zigzag_encode; - - #[test] - pub fn nullable_scalar_at() -> VortexResult<()> { - let zigzag = zigzag_encode( - PrimitiveArray::new(buffer![-189, -160, 1], Validity::AllValid).as_view(), - )?; - assert_eq!( - zigzag.scalar_at(1)?, - Scalar::primitive(-160, Nullability::Nullable) - ); - Ok(()) - } - - #[test] - fn take_zigzag() -> VortexResult<()> { - let zigzag = zigzag_encode( - PrimitiveArray::new(buffer![-189, -160, 1], Validity::AllValid).as_view(), - )?; - - let indices = buffer![0, 2].into_array(); - let actual = zigzag.take(indices).unwrap(); - let expected = - zigzag_encode(PrimitiveArray::new(buffer![-189, 1], Validity::AllValid).as_view())? - .into_array(); - assert_arrays_eq!(actual, expected); - Ok(()) - } - - #[test] - fn filter_zigzag() -> VortexResult<()> { - let zigzag = zigzag_encode( - PrimitiveArray::new(buffer![-189, -160, 1], Validity::AllValid).as_view(), - )?; - - let filter_mask = BitBuffer::from(vec![true, false, true]).into(); - let actual = zigzag.filter(filter_mask).unwrap(); - let expected = - zigzag_encode(PrimitiveArray::new(buffer![-189, 1], Validity::AllValid).as_view())? - .into_array(); - assert_arrays_eq!(actual, expected); - Ok(()) - } - - #[test] - fn test_filter_conformance() -> VortexResult<()> { - use vortex_array::compute::conformance::filter::test_filter_conformance; - - // Test with i32 values - let zigzag = zigzag_encode( - PrimitiveArray::new(buffer![-189i32, -160, 1, 42, -73], Validity::AllValid).as_view(), - )?; - test_filter_conformance(&zigzag.into_array()); - - // Test with i64 values - let zigzag = zigzag_encode( - PrimitiveArray::new( - buffer![1000i64, -2000, 3000, -4000, 5000], - Validity::AllValid, - ) - .as_view(), - )?; - test_filter_conformance(&zigzag.into_array()); - - // Test with nullable values - let array = - PrimitiveArray::from_option_iter([Some(-10i16), None, Some(20), Some(-30), None]); - let zigzag = zigzag_encode(array.as_view())?; - test_filter_conformance(&zigzag.into_array()); - Ok(()) - } - - #[test] - fn test_mask_conformance() -> VortexResult<()> { - use vortex_array::compute::conformance::mask::test_mask_conformance; - - // Test with i32 values - let zigzag = zigzag_encode( - PrimitiveArray::new(buffer![-100i32, 200, -300, 400, -500], Validity::AllValid) - .as_view(), - )?; - test_mask_conformance(&zigzag.into_array()); - - // Test with i8 values - let zigzag = zigzag_encode( - PrimitiveArray::new(buffer![-127i8, 0, 127, -1, 1], Validity::AllValid).as_view(), - )?; - test_mask_conformance(&zigzag.into_array()); - Ok(()) - } - - #[rstest] - #[case(buffer![-189i32, -160, 1, 42, -73].into_array())] - #[case(buffer![1000i64, -2000, 3000, -4000, 5000].into_array())] - #[case(PrimitiveArray::from_option_iter([Some(-10i16), None, Some(20), Some(-30), None]).into_array() - )] - #[case(buffer![42i32].into_array())] - fn test_take_zigzag_conformance(#[case] array: ArrayRef) -> VortexResult<()> { - use vortex_array::compute::conformance::take::test_take_conformance; - - let zigzag = zigzag_encode(array.to_primitive().as_view())?; - test_take_conformance(&zigzag.into_array()); - Ok(()) - } - - #[rstest] - // Basic ZigZag arrays - #[case::zigzag_i8(zigzag_encode(PrimitiveArray::from_iter([-128i8, -1, 0, 1, 127]).as_view()).unwrap())] - #[case::zigzag_i16(zigzag_encode(PrimitiveArray::from_iter([-1000i16, -100, 0, 100, 1000]).as_view()).unwrap())] - #[case::zigzag_i32(zigzag_encode(PrimitiveArray::from_iter([-100000i32, -1000, 0, 1000, 100000]).as_view()).unwrap())] - #[case::zigzag_i64(zigzag_encode(PrimitiveArray::from_iter([-1000000i64, -10000, 0, 10000, 1000000]).as_view()).unwrap())] - // Nullable arrays - #[case::zigzag_nullable_i32(zigzag_encode(PrimitiveArray::from_option_iter([Some(-100i32), None, Some(0), Some(100), None]).as_view()).unwrap())] - #[case::zigzag_nullable_i64(zigzag_encode(PrimitiveArray::from_option_iter([Some(-1000i64), None, Some(0), Some(1000), None]).as_view()).unwrap())] - // Edge cases - #[case::zigzag_single(zigzag_encode(PrimitiveArray::from_iter([-42i32]).as_view()).unwrap())] - #[case::zigzag_alternating(zigzag_encode(PrimitiveArray::from_iter([-1i32, 1, -2, 2, -3, 3]).as_view()).unwrap())] - // Large arrays - #[case::zigzag_large_i32(zigzag_encode(PrimitiveArray::from_iter(-500..500).as_view()).unwrap())] - #[case::zigzag_large_i64(zigzag_encode(PrimitiveArray::from_iter((-1000..1000).map(|i| i as i64 * 100)).as_view()).unwrap())] - fn test_zigzag_consistency(#[case] array: ZigZagArray) { - test_array_consistency(&array.into_array()); - } - - #[rstest] - #[case::zigzag_i8_basic(zigzag_encode(PrimitiveArray::from_iter([-10i8, -5, 0, 5, 10]).as_view()).unwrap())] - #[case::zigzag_i16_basic(zigzag_encode(PrimitiveArray::from_iter([-100i16, -50, 0, 50, 100]).as_view()).unwrap())] - #[case::zigzag_i32_basic(zigzag_encode(PrimitiveArray::from_iter([-1000i32, -500, 0, 500, 1000]).as_view()).unwrap())] - #[case::zigzag_i64_basic(zigzag_encode(PrimitiveArray::from_iter([-10000i64, -5000, 0, 5000, 10000]).as_view()).unwrap())] - #[case::zigzag_i32_large(zigzag_encode(PrimitiveArray::from_iter((-50..50).map(|i| i * 10)).as_view()).unwrap())] - fn test_zigzag_binary_numeric(#[case] array: ZigZagArray) { - test_binary_numeric_array(array.into_array()); - } -} diff --git a/encodings/zigzag/src/kernel.rs b/encodings/zigzag/src/kernel.rs deleted file mode 100644 index d0096abaae1..00000000000 --- a/encodings/zigzag/src/kernel.rs +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_array::arrays::dict::TakeExecuteAdaptor; -use vortex_array::kernel::ParentKernelSet; - -use crate::ZigZag; - -pub(crate) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&TakeExecuteAdaptor(ZigZag))]); diff --git a/encodings/zigzag/src/lib.rs b/encodings/zigzag/src/lib.rs index 89da8bd6069..7d321d28ae0 100644 --- a/encodings/zigzag/src/lib.rs +++ b/encodings/zigzag/src/lib.rs @@ -1,12 +1,17 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -pub use array::*; pub use compress::*; +pub use vtable::*; -mod array; mod compress; -mod compute; -mod kernel; -mod rules; -mod slice; +mod vtable; + +use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayPlugin; +use vortex_array::session::ArraySessionExt; +use vortex_session::VortexSession; + +/// Initialize sequence encoding in the given session. +pub fn initialize(session: &VortexSession) { + session.arrays().register(ScalarFnArrayPlugin::new(ZigZag)); +} diff --git a/encodings/zigzag/src/rules.rs b/encodings/zigzag/src/rules.rs deleted file mode 100644 index c3b612d101d..00000000000 --- a/encodings/zigzag/src/rules.rs +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_array::arrays::filter::FilterReduceAdaptor; -use vortex_array::arrays::slice::SliceReduceAdaptor; -use vortex_array::optimizer::rules::ParentRuleSet; -use vortex_array::scalar_fn::fns::cast::CastReduceAdaptor; -use vortex_array::scalar_fn::fns::mask::MaskReduceAdaptor; - -use crate::ZigZag; - -pub(crate) static RULES: ParentRuleSet = ParentRuleSet::new(&[ - ParentRuleSet::lift(&CastReduceAdaptor(ZigZag)), - ParentRuleSet::lift(&FilterReduceAdaptor(ZigZag)), - ParentRuleSet::lift(&MaskReduceAdaptor(ZigZag)), - ParentRuleSet::lift(&SliceReduceAdaptor(ZigZag)), -]); diff --git a/encodings/zigzag/src/slice.rs b/encodings/zigzag/src/slice.rs deleted file mode 100644 index 6a7128d5111..00000000000 --- a/encodings/zigzag/src/slice.rs +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::ops::Range; - -use vortex_array::ArrayRef; -use vortex_array::ArrayView; -use vortex_array::IntoArray; -use vortex_array::arrays::slice::SliceReduce; -use vortex_error::VortexResult; - -use crate::ZigZag; -use crate::array::ZigZagArrayExt; - -impl SliceReduce for ZigZag { - fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { - Ok(Some( - ZigZag::try_new(array.encoded().slice(range)?)?.into_array(), - )) - } -} diff --git a/encodings/zigzag/src/vtable.rs b/encodings/zigzag/src/vtable.rs new file mode 100644 index 00000000000..4c8e9571026 --- /dev/null +++ b/encodings/zigzag/src/vtable.rs @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt::Formatter; + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::scalar_fn::ScalarFnArrayView; +use vortex_array::arrays::scalar_fn::ScalarFnFactoryExt; +use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts; +use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable; +use vortex_array::dtype::DType; +use vortex_array::dtype::PType; +use vortex_array::expr::Expression; +use vortex_array::scalar_fn::Arity; +use vortex_array::scalar_fn::ChildName; +use vortex_array::scalar_fn::EmptyOptions; +use vortex_array::scalar_fn::ExecutionArgs; +use vortex_array::scalar_fn::ScalarFnId; +use vortex_array::scalar_fn::ScalarFnVTable; +use vortex_array::serde::ArrayChildren; +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; +use vortex_session::VortexSession; + +use crate::compress::zigzag_decode; + +/// ZigZag encoding maps signed integers to unsigned integers so that small absolute values +/// have small encoded values. +#[derive(Clone)] +pub struct ZigZag; + +impl ScalarFnVTable for ZigZag { + type Options = EmptyOptions; + + fn id(&self) -> ScalarFnId { + ScalarFnId::new("vortex.zigzag") + } + + fn serialize(&self, _options: &EmptyOptions) -> VortexResult>> { + Ok(Some(vec![])) + } + + fn deserialize( + &self, + _metadata: &[u8], + _session: &VortexSession, + ) -> VortexResult { + Ok(EmptyOptions) + } + + fn arity(&self, _options: &EmptyOptions) -> Arity { + Arity::Exact(1) + } + + fn child_name(&self, _options: &EmptyOptions, child_idx: usize) -> ChildName { + match child_idx { + 0 => ChildName::from("encoded"), + _ => unreachable!("Invalid child index {child_idx} for ZigZag"), + } + } + + fn fmt_sql( + &self, + _options: &EmptyOptions, + expr: &Expression, + f: &mut Formatter<'_>, + ) -> std::fmt::Result { + write!(f, "zigzag_decode(")?; + expr.children()[0].fmt_sql(f)?; + write!(f, ")") + } + + fn return_dtype(&self, _options: &EmptyOptions, arg_dtypes: &[DType]) -> VortexResult { + let encoded_dtype = &arg_dtypes[0]; + let ptype = PType::try_from(encoded_dtype)?; + vortex_ensure!( + ptype.is_unsigned_int(), + "ZigZag encoded child must be unsigned integer, got {encoded_dtype}" + ); + Ok(DType::from(ptype.to_signed()).with_nullability(encoded_dtype.nullability())) + } + + fn execute( + &self, + _options: &EmptyOptions, + args: &dyn ExecutionArgs, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + let encoded = args.get(0)?; + let decoded = zigzag_decode(encoded.execute::(ctx)?); + Ok(decoded.into_array()) + } + + fn validity( + &self, + _options: &EmptyOptions, + expression: &Expression, + ) -> VortexResult> { + Ok(Some(expression.child(0).validity()?)) + } + + fn is_null_sensitive(&self, _options: &EmptyOptions) -> bool { + false + } + + fn is_fallible(&self, _options: &EmptyOptions) -> bool { + false + } +} + +impl ScalarFnArrayVTable for ZigZag { + fn serialize( + &self, + _view: &ScalarFnArrayView, + _session: &VortexSession, + ) -> VortexResult>> { + Ok(Some(vec![])) + } + + fn deserialize( + &self, + dtype: &DType, + len: usize, + metadata: &[u8], + children: &dyn ArrayChildren, + _session: &VortexSession, + ) -> VortexResult> { + vortex_ensure!( + metadata.is_empty(), + "ZigZag expects empty metadata, got {} bytes", + metadata.len() + ); + vortex_ensure!( + children.len() == 1, + "ZigZag expects 1 child, got {}", + children.len() + ); + + let ptype = PType::try_from(dtype)?; + let encoded_dtype = DType::Primitive(ptype.to_unsigned(), dtype.nullability()); + let encoded = children.get(0, &encoded_dtype, len)?; + + Ok(ScalarFnArrayParts { + options: EmptyOptions, + children: vec![encoded], + }) + } +} + +/// Construct a ZigZag-encoded array from an unsigned encoded child. +pub fn zigzag_try_new(encoded: ArrayRef) -> VortexResult { + let len = encoded.len(); + ZigZag.try_new_array(len, EmptyOptions, [encoded]) +} diff --git a/encodings/zstd/src/zstd_buffers.rs b/encodings/zstd/src/zstd_buffers.rs index ffeb70210ae..5d00fd5a6a4 100644 --- a/encodings/zstd/src/zstd_buffers.rs +++ b/encodings/zstd/src/zstd_buffers.rs @@ -59,9 +59,9 @@ impl ZstdBuffers { session: &VortexSession, ) -> VortexResult { let encoding_id = array.encoding_id(); - let metadata = array - .metadata(session)? - .ok_or_else(|| vortex_err!("Array does not support serialization"))?; + let metadata = session + .array_serialize(array)? + .ok_or_else(|| vortex_err!("[ZstdBuffers]: Array does not support serialization"))?; let buffer_handles = array.buffer_handles(); let children = array.children(); diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 8f26230791a..25736c6df84 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -22024,8 +22024,6 @@ pub fn vortex_array::ArrayRef::is_valid(&self, index: usize) -> vortex_error::Vo pub fn vortex_array::ArrayRef::len(&self) -> usize -pub fn vortex_array::ArrayRef::metadata(&self, session: &vortex_session::VortexSession) -> vortex_error::VortexResult>> - pub fn vortex_array::ArrayRef::metadata_fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result pub fn vortex_array::ArrayRef::named_buffers(&self) -> alloc::vec::Vec<(alloc::string::String, vortex_array::buffer::BufferHandle)> diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index cfefe316ca4..f951f9423ae 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -15,7 +15,6 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; use vortex_mask::Mask; -use vortex_session::VortexSession; use crate::AnyCanonical; use crate::Array; @@ -561,11 +560,6 @@ impl ArrayRef { self.0.slot_name(self, idx) } - /// Returns the serialized metadata of the array. - pub fn metadata(&self, session: &VortexSession) -> VortexResult>> { - self.0.metadata(self, session) - } - /// Formats a human-readable metadata description. pub fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { self.0.metadata_fmt(f) diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index fae2d486ab6..eada9e79a0a 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -13,7 +13,6 @@ use vortex_error::VortexResult; use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; -use vortex_session::VortexSession; use vortex_session::registry::Id; use crate::ExecutionCtx; @@ -132,10 +131,6 @@ pub(crate) trait DynArray: 'static + private::Sealed + Send + Sync + Debug { /// Returns the name of the slot at the given index. fn slot_name(&self, this: &ArrayRef, idx: usize) -> String; - /// Returns the serialized metadata of the array, or `None` if the array does not - /// support serialization. - fn metadata(&self, this: &ArrayRef, session: &VortexSession) -> VortexResult>>; - /// Formats a human-readable metadata description. fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result; @@ -341,11 +336,6 @@ impl DynArray for ArrayInner { V::slot_name(view, idx) } - fn metadata(&self, this: &ArrayRef, session: &VortexSession) -> VortexResult>> { - let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; - V::serialize(view, session) - } - fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(&self.data, f) } diff --git a/vortex-array/src/arrow/executor/run_end.rs b/vortex-array/src/arrow/executor/run_end.rs index 69aca62a994..b9424bc866b 100644 --- a/vortex-array/src/arrow/executor/run_end.rs +++ b/vortex-array/src/arrow/executor/run_end.rs @@ -24,6 +24,7 @@ use crate::IntoArray; use crate::arrays::Constant; use crate::arrays::ConstantArray; use crate::arrow::ArrowArrayExecutor; +use crate::session::ArraySessionExt; /// The encoding ID used by `vortex-runend`. We match on this string to avoid a crate dependency. const VORTEX_RUNEND_ID: &str = "vortex.runend"; @@ -79,8 +80,9 @@ fn run_end_to_arrow( ctx: &mut ExecutionCtx, ) -> VortexResult { let length = array.len(); - let metadata_bytes = array - .metadata(ctx.session())? + let metadata_bytes = ctx + .session() + .array_serialize(&array)? .ok_or_else(|| vortex_err!("RunEndArray missing metadata"))?; let metadata = RunEndMetadata::decode(&*metadata_bytes) .map_err(|e| vortex_err!("Failed to decode RunEndMetadata: {e}"))?; diff --git a/vortex-array/src/serde.rs b/vortex-array/src/serde.rs index 946a40518bc..c560aa9fb7e 100644 --- a/vortex-array/src/serde.rs +++ b/vortex-array/src/serde.rs @@ -170,16 +170,6 @@ impl<'a> ArrayNodeFlatBuffer<'a> { session: &'a VortexSession, array: &'a ArrayRef, ) -> VortexResult { - // Depth-first traversal of the array to ensure it supports serialization. - // FIXME(ngates): this serializes the metadata and throws it away! - for child in array.depth_first_traversal() { - if child.metadata(session)?.is_none() { - vortex_bail!( - "Array {} does not support serialization", - child.encoding_id() - ); - } - } let n_buffers_recursive = array.nbuffers_recursive(); if n_buffers_recursive > u16::MAX as usize { vortex_bail!( @@ -210,13 +200,13 @@ impl<'a> ArrayNodeFlatBuffer<'a> { ) })?; - let metadata = self.array.metadata(self.session)?.ok_or_else(|| { + let metadata_bytes = self.session.array_serialize(self.array)?.ok_or_else(|| { vortex_err!( "Array {} does not support serialization", self.array.encoding_id() ) })?; - let metadata = Some(fbb.create_vector(metadata.as_slice())); + let metadata = Some(fbb.create_vector(metadata_bytes.as_slice())); // Assign buffer indices for all child arrays. let nbuffers = u16::try_from(self.array.nbuffers()) @@ -701,101 +691,3 @@ impl TryFrom for SerializedArray { Self::try_from(value.try_to_host_sync()?) } } - -#[cfg(test)] -mod tests { - use std::sync::LazyLock; - - use flatbuffers::FlatBufferBuilder; - use vortex_session::VortexSession; - use vortex_session::registry::ReadContext; - - use super::SerializeOptions; - use super::SerializedArray; - use crate::ArrayContext; - use crate::array::ArrayId; - use crate::dtype::DType; - use crate::dtype::Nullability; - use crate::flatbuffers as fba; - use crate::session::ArraySession; - - static SESSION: LazyLock = LazyLock::new(VortexSession::empty); - - #[test] - fn unknown_array_encoding_allow_unknown() { - let mut fbb = FlatBufferBuilder::new(); - - let child_metadata = fbb.create_vector(&[9u8]); - let child = fba::ArrayNode::create( - &mut fbb, - &fba::ArrayNodeArgs { - encoding: 1, - metadata: Some(child_metadata), - children: None, - buffers: None, - stats: None, - }, - ); - - let children = fbb.create_vector(&[child]); - let metadata = fbb.create_vector(&[1u8, 2, 3]); - let root = fba::ArrayNode::create( - &mut fbb, - &fba::ArrayNodeArgs { - encoding: 0, - metadata: Some(metadata), - children: Some(children), - buffers: None, - stats: None, - }, - ); - let array = fba::Array::create( - &mut fbb, - &fba::ArrayArgs { - root: Some(root), - buffers: None, - }, - ); - fbb.finish_minimal(array); - let (buf, start) = fbb.collapse(); - let tree = vortex_buffer::ByteBuffer::from(buf).slice(start..); - - let ser = SerializedArray::from_array_tree(tree).unwrap(); - let ctx = ReadContext::new([ - ArrayId::new("vortex.test.foreign_array"), - ArrayId::new("vortex.test.foreign_child"), - ]); - let session = VortexSession::empty() - .with::() - .allow_unknown(); - - let decoded = ser - .decode(&DType::Variant(Nullability::Nullable), 5, &ctx, &session) - .unwrap(); - assert_eq!(decoded.encoding_id().as_ref(), "vortex.test.foreign_array"); - assert_eq!(decoded.nchildren(), 1); - assert_eq!( - decoded.nth_child(0).unwrap().encoding_id().as_ref(), - "vortex.test.foreign_child" - ); - assert_eq!(decoded.metadata(&SESSION).unwrap().unwrap(), vec![1, 2, 3]); - assert_eq!( - decoded - .nth_child(0) - .unwrap() - .metadata(&SESSION) - .unwrap() - .unwrap(), - vec![9] - ); - - let serialized = decoded - .serialize( - &ArrayContext::default(), - &SESSION, - &SerializeOptions::default(), - ) - .unwrap(); - assert!(!serialized.is_empty()); - } -} diff --git a/vortex-array/src/session/mod.rs b/vortex-array/src/session/mod.rs index 62ed46cb34c..034a369694f 100644 --- a/vortex-array/src/session/mod.rs +++ b/vortex-array/src/session/mod.rs @@ -4,6 +4,7 @@ use std::sync::Arc; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_session::Ref; use vortex_session::SessionExt; use vortex_session::registry::Registry; @@ -15,6 +16,7 @@ use crate::arrays::Bool; use crate::arrays::Chunked; use crate::arrays::Constant; use crate::arrays::Decimal; +use crate::arrays::Dict; use crate::arrays::Extension; use crate::arrays::FixedSizeList; use crate::arrays::List; @@ -26,6 +28,7 @@ use crate::arrays::Primitive; use crate::arrays::Struct; use crate::arrays::VarBin; use crate::arrays::VarBinView; +use crate::arrays::Variant; pub type ArrayRegistry = Registry; @@ -68,11 +71,13 @@ impl Default for ArraySession { this.register(ListView); this.register(FixedSizeList); this.register(Struct); + this.register(Variant); this.register(Extension); // Register the utility encodings. this.register(Chunked); this.register(Constant); + this.register(Dict); this.register(List); this.register(Masked); this.register(Patched); @@ -92,8 +97,12 @@ pub trait ArraySessionExt: SessionExt { /// Serialize an array using a plugin from the registry. fn array_serialize(&self, array: &ArrayRef) -> VortexResult>> { let Some(plugin) = self.arrays().registry.find(&array.encoding_id()) else { - return Ok(None); + vortex_bail!( + "Array {} is not registered for serializations", + array.encoding_id() + ); }; + plugin.serialize(array, &self.session()) } } diff --git a/vortex-btrblocks/src/schemes/integer.rs b/vortex-btrblocks/src/schemes/integer.rs index 47fc52225aa..68f997eef87 100644 --- a/vortex-btrblocks/src/schemes/integer.rs +++ b/vortex-btrblocks/src/schemes/integer.rs @@ -42,9 +42,8 @@ use vortex_runend::RunEnd; use vortex_runend::compress::runend_encode; use vortex_sequence::sequence_encode; use vortex_sparse::Sparse; -use vortex_zigzag::ZigZag; -use vortex_zigzag::ZigZagArrayExt; use vortex_zigzag::zigzag_encode; +use vortex_zigzag::zigzag_try_new; use crate::ArrayAndStats; use crate::CascadingCompressor; @@ -290,13 +289,13 @@ impl Scheme for ZigZagScheme { ) -> VortexResult { // Zigzag encode the values, then recursively compress the inner values. let zag = zigzag_encode(data.array_as_primitive())?; - let encoded = zag.encoded().to_primitive(); + let encoded = zag.nth_child(0).vortex_expect("ZigZag should have 1 child"); - let compressed = compressor.compress_child(&encoded.into_array(), &ctx, self.id(), 0)?; + let compressed = compressor.compress_child(&encoded, &ctx, self.id(), 0)?; tracing::debug!("zigzag output: {}", compressed.encoding_id()); - Ok(ZigZag::try_new(compressed)?.into_array()) + zigzag_try_new(compressed) } } diff --git a/vortex-cuda/src/dynamic_dispatch/mod.rs b/vortex-cuda/src/dynamic_dispatch/mod.rs index 2bc12a536d0..63701684d44 100644 --- a/vortex-cuda/src/dynamic_dispatch/mod.rs +++ b/vortex-cuda/src/dynamic_dispatch/mod.rs @@ -515,7 +515,7 @@ mod tests { use vortex::encodings::fastlanes::FoR; use vortex::encodings::fastlanes::FoRArrayExt; use vortex::encodings::runend::RunEnd; - use vortex::encodings::zigzag::ZigZag; + use vortex::encodings::zigzag::zigzag_try_new; use vortex::error::VortexExpect; use vortex::error::VortexResult; use vortex::session::VortexSession; @@ -912,10 +912,10 @@ mod tests { let prim = PrimitiveArray::new(Buffer::from(raw), NonNullable); let bp = BitPacked::encode(&prim.into_array(), bit_width)?; - let zz = ZigZag::try_new(bp.into_array())?; + let zz = zigzag_try_new(bp.into_array())?; let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = dispatch_plan(&zz.into_array(), &cuda_ctx)?; + let plan = dispatch_plan(&zz, &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; @@ -1215,9 +1215,9 @@ mod tests { let prim = PrimitiveArray::new(Buffer::from(raw), NonNullable); let bp = BitPacked::encode(&prim.into_array(), bit_width)?; - let zz = ZigZag::try_new(bp.into_array())?; + let zz = zigzag_try_new(bp.into_array())?; - let sliced = zz.into_array().slice(slice_start..slice_end)?; + let sliced = zz.slice(slice_start..slice_end)?; let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; diff --git a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs index bec93d4336f..ac4acf8502b 100644 --- a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs +++ b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs @@ -30,10 +30,11 @@ use vortex::encodings::runend::RunEnd; use vortex::encodings::runend::RunEndArrayExt; use vortex::encodings::sequence::Sequence; use vortex::encodings::zigzag::ZigZag; -use vortex::encodings::zigzag::ZigZagArrayExt; use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::error::vortex_err; +use vortex::scalar_fn::ScalarFnPlugin; +use vortex_error::VortexExpect; use super::CudaDispatchPlan; use super::MaterializedStage; @@ -546,8 +547,10 @@ impl FusedPlan { array: ArrayRef, pending_subtrees: &mut Vec, ) -> VortexResult { - let zz = array.as_::(); - let encoded = zz.encoded().clone(); + let encoded = array + .nth_child(0) + .vortex_expect("ZigZag should have 1 child"); + let output_ptype = ptype_to_tag(PType::try_from(array.dtype()).map_err(|_| { vortex_err!("ZigZag must have primitive dtype, got {:?}", array.dtype()) })?); diff --git a/vortex-cuda/src/hybrid_dispatch/mod.rs b/vortex-cuda/src/hybrid_dispatch/mod.rs index 0845a46fdc3..7fed8add88e 100644 --- a/vortex-cuda/src/hybrid_dispatch/mod.rs +++ b/vortex-cuda/src/hybrid_dispatch/mod.rs @@ -241,6 +241,7 @@ mod tests { let session = VortexSession::empty(); fastlanes::initialize(&session); session.arrays().register(ZstdBuffers); + let mut ctx = CudaSession::create_execution_ctx(&session).vortex_expect("ctx"); let num_values: u32 = 64; @@ -257,8 +258,7 @@ mod tests { 0u32.into(), ) .vortex_expect("for"); - let vals = ZstdBuffers::compress(&vals.into_array(), 3, &VortexSession::empty()) - .vortex_expect("zstd"); + let vals = ZstdBuffers::compress(&vals.into_array(), 3, &session).vortex_expect("zstd"); // codes = FoR(BitPacked) let codes = PrimitiveArray::new( diff --git a/vortex-cuda/src/kernel/encodings/zigzag.rs b/vortex-cuda/src/kernel/encodings/zigzag.rs index f13a19bf0e1..fb2a5d3776c 100644 --- a/vortex-cuda/src/kernel/encodings/zigzag.rs +++ b/vortex-cuda/src/kernel/encodings/zigzag.rs @@ -10,13 +10,15 @@ use tracing::instrument; use vortex::array::ArrayRef; use vortex::array::Canonical; use vortex::array::arrays::PrimitiveArray; +use vortex::array::arrays::ScalarFnVTable; use vortex::array::arrays::primitive::PrimitiveDataParts; +use vortex::array::arrays::scalar_fn::ExactScalarFn; +use vortex::array::arrays::scalar_fn::ScalarFnArrayExt; use vortex::array::match_each_unsigned_integer_ptype; +use vortex::array::matcher::Matcher; use vortex::dtype::NativePType; use vortex::dtype::PType; use vortex::encodings::zigzag::ZigZag; -use vortex::encodings::zigzag::ZigZagArray; -use vortex::encodings::zigzag::ZigZagArrayExt; use vortex::error::VortexResult; use vortex::error::vortex_ensure; use vortex::error::vortex_err; @@ -31,8 +33,8 @@ use crate::executor::CudaExecutionCtx; pub(crate) struct ZigZagExecutor; impl ZigZagExecutor { - fn try_specialize(array: ArrayRef) -> Option { - array.try_downcast::().ok() + fn try_specialize(array: &ArrayRef) -> bool { + ExactScalarFn::::matches(array) } } @@ -44,32 +46,36 @@ impl CudaExecute for ZigZagExecutor { array: ArrayRef, ctx: &mut CudaExecutionCtx, ) -> VortexResult { - let array = - Self::try_specialize(array).ok_or_else(|| vortex_err!("Expected ZigZagArray"))?; + if !Self::try_specialize(&array) { + return Err(vortex_err!("Expected ZigZag ScalarFnArray")); + } + + let sfn_view = array.as_::(); // The encoded array is unsigned, we decode to signed of the same width. - let encoded_ptype = array.encoded().dtype().as_ptype(); + let encoded = sfn_view.child_at(0); + let encoded_ptype = encoded.dtype().as_ptype(); let output_ptype = PType::try_from(array.dtype())?; match_each_unsigned_integer_ptype!(encoded_ptype, |U| { - decode_zigzag::(array, output_ptype, ctx).await + decode_zigzag::(encoded, output_ptype, ctx).await }) } } async fn decode_zigzag( - array: ZigZagArray, + encoded: &ArrayRef, output_ptype: PType, ctx: &mut CudaExecutionCtx, ) -> VortexResult where U: NativePType + DeviceRepr + Send + Sync + 'static, { - let array_len = array.encoded().len(); + let array_len = encoded.len(); vortex_ensure!(array_len > 0, "ZigZag array must not be empty"); // Execute child and copy to device - let canonical = array.encoded().clone().execute_cuda(ctx).await?; + let canonical = encoded.clone().execute_cuda(ctx).await?; let primitive = canonical.into_primitive(); let PrimitiveDataParts { buffer, validity, .. @@ -103,7 +109,7 @@ mod tests { use vortex::array::assert_arrays_eq; use vortex::array::validity::Validity::NonNullable; use vortex::buffer::Buffer; - use vortex::encodings::zigzag::ZigZag; + use vortex::encodings::zigzag::zigzag_try_new; use vortex::error::VortexExpect; use vortex::session::VortexSession; @@ -120,14 +126,14 @@ mod tests { // So encoded [0, 2, 4, 1, 3] should decode to [0, 1, 2, -1, -2] let encoded_data: Vec = vec![0, 2, 4, 1, 3]; - let zigzag_array = ZigZag::try_new( + let zigzag_array = zigzag_try_new( PrimitiveArray::new(Buffer::from(encoded_data), NonNullable).into_array(), )?; let cpu_result = zigzag_array.to_canonical()?; let gpu_result = ZigZagExecutor - .execute(zigzag_array.into_array(), &mut cuda_ctx) + .execute(zigzag_array, &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() diff --git a/vortex-cuda/src/lib.rs b/vortex-cuda/src/lib.rs index d23cbac1403..773a85402fa 100644 --- a/vortex-cuda/src/lib.rs +++ b/vortex-cuda/src/lib.rs @@ -76,6 +76,7 @@ use vortex::encodings::zigzag::ZigZag; use vortex::encodings::zstd::Zstd; #[cfg(feature = "unstable_encodings")] use vortex::encodings::zstd::ZstdBuffers; +use vortex::scalar_fn::ScalarFnPlugin; #[cfg(test)] use vortex_cuda_macros::test; pub use vortex_nvcomp as nvcomp; diff --git a/vortex-file/src/lib.rs b/vortex-file/src/lib.rs index 3cc3b6d0858..f68bbb85446 100644 --- a/vortex-file/src/lib.rs +++ b/vortex-file/src/lib.rs @@ -116,7 +116,6 @@ use vortex_fsst::FSST; use vortex_pco::Pco; use vortex_session::VortexSession; use vortex_sparse::Sparse; -use vortex_zigzag::ZigZag; pub use writer::*; /// The current version of the Vortex file format @@ -163,7 +162,7 @@ pub fn register_default_encodings(session: &VortexSession) { arrays.register(FSST); arrays.register(Pco); arrays.register(Sparse); - arrays.register(ZigZag); + #[cfg(feature = "zstd")] arrays.register(vortex_zstd::Zstd); #[cfg(all(feature = "zstd", feature = "unstable_encodings"))] @@ -178,6 +177,7 @@ pub fn register_default_encodings(session: &VortexSession) { vortex_fastlanes::initialize(session); vortex_runend::initialize(session); vortex_sequence::initialize(session); + vortex_zigzag::initialize(session); #[cfg(feature = "unstable_encodings")] vortex_tensor::initialize(session); diff --git a/vortex-file/src/strategy.rs b/vortex-file/src/strategy.rs index 0a3e31f32e3..96f619d78a5 100644 --- a/vortex-file/src/strategy.rs +++ b/vortex-file/src/strategy.rs @@ -28,6 +28,7 @@ use vortex_array::arrays::VarBin; use vortex_array::arrays::VarBinView; use vortex_array::arrays::patched::USE_EXPERIMENTAL_PATCHES; use vortex_array::dtype::FieldPath; +use vortex_array::scalar_fn::ScalarFnVTable; use vortex_btrblocks::BtrBlocksCompressorBuilder; use vortex_btrblocks::SchemeExt; use vortex_btrblocks::schemes::integer::IntDictScheme; diff --git a/vortex-layout/src/layouts/flat/writer.rs b/vortex-layout/src/layouts/flat/writer.rs index 0d804ee6fb8..98b2f670f9c 100644 --- a/vortex-layout/src/layouts/flat/writer.rs +++ b/vortex-layout/src/layouts/flat/writer.rs @@ -468,27 +468,23 @@ mod tests { let ctx = ArrayContext::empty(); // Write the array into a byte buffer. - let (layout, _segments) = { - let segments = Arc::new(TestSegments::default()); - let (ptr, eof) = SequenceId::root().split(); - // Only allow the dict encoding; canonical primitive children remain permitted. - let mut allowed = HashSet::default(); - allowed.insert(Dict.id()); - let layout = FlatLayoutStrategy::default() - .with_allow_encodings(allowed) - .write_stream( - ctx, - Arc::::clone(&segments), - dict.into_array().to_array_stream().sequenced(ptr), - eof, - &session, - ) - .await; - (layout, segments) - }; + let segments = Arc::new(TestSegments::default()); + let (ptr, eof) = SequenceId::root().split(); + // Only allow the dict encoding; canonical primitive children remain permitted. + let mut allowed = HashSet::default(); + allowed.insert(Dict.id()); - assert!(layout.is_ok()); + FlatLayoutStrategy::default() + .with_allow_encodings(allowed) + .write_stream( + ctx, + Arc::::clone(&segments), + dict.into_array().to_array_stream().sequenced(ptr), + eof, + &session, + ) + .await?; Ok(()) }) diff --git a/vortex-python/src/arrays/compressed.rs b/vortex-python/src/arrays/compressed.rs index fd21c54fe69..f5045e6a6e0 100644 --- a/vortex-python/src/arrays/compressed.rs +++ b/vortex-python/src/arrays/compressed.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use pyo3::prelude::*; -use vortex::array::IntoArray; use vortex::array::ToCanonical; use vortex::array::arrays::Dict; use vortex::encodings::alp::ALP; @@ -12,7 +11,6 @@ use vortex::encodings::fsst::FSST; use vortex::encodings::runend::RunEnd; use vortex::encodings::sequence::Sequence; use vortex::encodings::sparse::Sparse; -use vortex::encodings::zigzag::ZigZag; use vortex::encodings::zigzag::zigzag_encode; use crate::PyVortex; @@ -81,17 +79,13 @@ impl EncodingSubclass for PySparseArray { #[pyclass(name = "ZigZagArray", module = "vortex", extends=PyNativeArray, frozen)] pub(crate) struct PyZigZagArray; -impl EncodingSubclass for PyZigZagArray { - type VTable = ZigZag; -} - #[pymethods] impl PyZigZagArray { #[staticmethod] pub fn encode(array: PyArrayRef) -> PyVortexResult { - Ok(PyVortex( - zigzag_encode(array.inner().clone().to_primitive().as_view())?.into_array(), - )) + Ok(PyVortex(zigzag_encode( + array.inner().clone().to_primitive().as_view(), + )?)) } } diff --git a/vortex-python/src/arrays/native.rs b/vortex-python/src/arrays/native.rs index 2ff7945fc55..f6c8c9b16a2 100644 --- a/vortex-python/src/arrays/native.rs +++ b/vortex-python/src/arrays/native.rs @@ -31,7 +31,6 @@ use vortex::encodings::fsst::FSST; use vortex::encodings::runend::RunEnd; use vortex::encodings::sequence::Sequence; use vortex::encodings::sparse::Sparse; -use vortex::encodings::zigzag::ZigZag; use vortex::error::VortexExpect; use crate::arrays::PyArray; @@ -56,7 +55,6 @@ use crate::arrays::compressed::PyFsstArray; use crate::arrays::compressed::PyRunEndArray; use crate::arrays::compressed::PySequenceArray; use crate::arrays::compressed::PySparseArray; -use crate::arrays::compressed::PyZigZagArray; use crate::arrays::fastlanes::PyFastLanesBitPackedArray; use crate::arrays::fastlanes::PyFastLanesDeltaArray; use crate::arrays::fastlanes::PyFastLanesFoRArray; @@ -153,10 +151,6 @@ impl PyNativeArray { return Self::with_subclass(py, array, PyRunEndArray); } - if array.is::() { - return Self::with_subclass(py, array, PyZigZagArray); - } - if array.is::() { return Self::with_subclass(py, array, PyFastLanesBitPackedArray); } diff --git a/vortex-python/src/io.rs b/vortex-python/src/io.rs index edf1061188d..1d1cb63cce7 100644 --- a/vortex-python/src/io.rs +++ b/vortex-python/src/io.rs @@ -280,7 +280,7 @@ impl PyVortexWriteOptions { /// >>> vx.io.VortexWriteOptions.default().write(sprl, "chonky.vortex") /// >>> import os /// >>> os.path.getsize('chonky.vortex') - /// 216004 + /// 216036 /// ``` /// /// Wow, Vortex manages to use about two bytes per integer! So advanced. So tiny. @@ -292,7 +292,7 @@ impl PyVortexWriteOptions { /// ```python /// >>> vx.io.VortexWriteOptions.compact().write(sprl, "tiny.vortex") /// >>> os.path.getsize('tiny.vortex') - /// 55120 + /// 55152 /// ``` /// /// Random numbers are not (usually) composed of random bytes! diff --git a/vortex-python/test/test_compress.py b/vortex-python/test/test_compress.py index 5909390d623..77b47f091f0 100644 --- a/vortex-python/test/test_compress.py +++ b/vortex-python/test/test_compress.py @@ -32,13 +32,6 @@ def test_arrange_encode(): assert compressed.nbytes < a.nbytes -def test_zigzag_encode(): - a = vortex.array(pa.array([-1, -1, 0, -1, 1, -1])) - zarr = vortex.ZigZagArray.encode(a) - assert isinstance(zarr, vortex.ZigZagArray) - # TODO(ngates): support decoding once we have decompressor. - - def test_chunked_encode(): chunked = pa.chunked_array([pa.array([0, 1, 2]), pa.array([3, 4, 5])]) encoded = vortex.array(chunked) diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/zigzag.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/zigzag.rs index 16e553854a0..045c17fb5af 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/zigzag.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/zigzag.rs @@ -12,6 +12,7 @@ use vortex::array::validity::Validity; use vortex::encodings::zigzag::ZigZag; use vortex::encodings::zigzag::zigzag_encode; use vortex::error::VortexResult; +use vortex::scalar_fn::ScalarFnVTable; use super::N; use crate::fixtures::FlatLayoutFixture; @@ -81,17 +82,17 @@ impl FlatLayoutFixture for ZigZagFixture { "head_tail_nulls", ]), vec![ - zigzag_encode(alternating_i32.as_view())?.into_array(), - zigzag_encode(small_i64.as_view())?.into_array(), - zigzag_encode(deltas_i32.as_view())?.into_array(), - zigzag_encode(small_i16.as_view())?.into_array(), - zigzag_encode(small_i8.as_view())?.into_array(), - zigzag_encode(nullable_zigzag.as_view())?.into_array(), - zigzag_encode(extremes_i32.as_view())?.into_array(), - zigzag_encode(zero_heavy_outliers.as_view())?.into_array(), - zigzag_encode(repeated_negative.as_view())?.into_array(), - zigzag_encode(zero_crossing.as_view())?.into_array(), - zigzag_encode(head_tail_nulls.as_view())?.into_array(), + zigzag_encode(alternating_i32.as_view())?, + zigzag_encode(small_i64.as_view())?, + zigzag_encode(deltas_i32.as_view())?, + zigzag_encode(small_i16.as_view())?, + zigzag_encode(small_i8.as_view())?, + zigzag_encode(nullable_zigzag.as_view())?, + zigzag_encode(extremes_i32.as_view())?, + zigzag_encode(zero_heavy_outliers.as_view())?, + zigzag_encode(repeated_negative.as_view())?, + zigzag_encode(zero_crossing.as_view())?, + zigzag_encode(head_tail_nulls.as_view())?, ], N, Validity::NonNullable, diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs index 4cedf23c07d..160b55fb32e 100644 --- a/vortex/benches/single_encoding_throughput.rs +++ b/vortex/benches/single_encoding_throughput.rs @@ -222,7 +222,7 @@ fn bench_zigzag_compress_i32(bencher: Bencher) { #[divan::bench(name = "zigzag_decompress_i32")] fn bench_zigzag_decompress_i32(bencher: Bencher) { let (_, int_array, _) = setup_primitive_arrays(); - let compressed = zigzag_encode(int_array.as_view()).unwrap().into_array(); + let compressed = zigzag_encode(int_array.as_view()).unwrap(); with_byte_counter(bencher, NUM_VALUES * 4) .with_inputs(|| &compressed)