From 0cd4f368baecda5d1aae0cfa04ca593b698fac29 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Wed, 8 Apr 2026 14:21:15 +0100 Subject: [PATCH 1/7] fix: avoid rebuild in ListView take_reduce for dense selections When the selection density is above the threshold, skip take_reduce so we don't rebuild the ListView for dense takes. Signed-off-by: Dimitar Dimitrov Signed-off-by: Dimitar Dimitrov --- vortex-array/public-api.lock | 12 ++ vortex-array/src/arrays/dict/execute.rs | 24 +++- .../src/arrays/filter/execute/listview.rs | 12 -- .../src/arrays/listview/compute/take.rs | 131 +++++++++++------- 4 files changed, 109 insertions(+), 70 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 687b9d33fef..dff84ded1b7 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -2200,6 +2200,10 @@ impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::List pub fn vortex_array::arrays::List::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::List>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::ListView + +pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::Primitive pub fn vortex_array::arrays::Primitive::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::Primitive>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> @@ -2960,6 +2964,10 @@ impl vortex_array::ValidityVTable for vortex_arr pub fn vortex_array::arrays::ListView::validity(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>) -> vortex_error::VortexResult +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::ListView + +pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::arrays::dict::TakeReduce for vortex_array::arrays::ListView pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> @@ -5976,6 +5984,10 @@ impl vortex_array::ValidityVTable for vortex_arr pub fn vortex_array::arrays::ListView::validity(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>) -> vortex_error::VortexResult +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::ListView + +pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::arrays::dict::TakeReduce for vortex_array::arrays::ListView pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> diff --git a/vortex-array/src/arrays/dict/execute.rs b/vortex-array/src/arrays/dict/execute.rs index d95cd239a44..66f398397bf 100644 --- a/vortex-array/src/arrays/dict/execute.rs +++ b/vortex-array/src/arrays/dict/execute.rs @@ -46,7 +46,7 @@ pub fn take_canonical( Canonical::Primitive(a) => Canonical::Primitive(take_primitive(&a, codes, ctx)), Canonical::Decimal(a) => Canonical::Decimal(take_decimal(&a, codes, ctx)), Canonical::VarBinView(a) => Canonical::VarBinView(take_varbinview(&a, codes, ctx)), - Canonical::List(a) => Canonical::List(take_listview(&a, codes)), + Canonical::List(a) => Canonical::List(take_listview(&a, codes, ctx)), Canonical::FixedSizeList(a) => { Canonical::FixedSizeList(take_fixed_size_list(&a, codes, ctx)) } @@ -123,14 +123,24 @@ fn take_varbinview( .into_owned() } -fn take_listview(array: &ListViewArray, codes: &PrimitiveArray) -> ListViewArray { +fn take_listview( + array: &ListViewArray, + codes: &PrimitiveArray, + ctx: &mut ExecutionCtx, +) -> ListViewArray { let codes_ref = codes.clone().into_array(); let array = array.as_view(); - ::take(array, &codes_ref) - .vortex_expect("take listview array") - .vortex_expect("take listview should not return None") - .as_::() - .into_owned() + // Try the cheap metadata-only path first; fall back to the rebuilding execute path when the + // reduce impl declines (see `ListView::TakeReduce` for the density heuristic). + let taken = match ::take(array, &codes_ref) + .vortex_expect("take listview reduce") + { + Some(taken) => taken, + None => ::take(array, &codes_ref, ctx) + .vortex_expect("take listview execute") + .vortex_expect("ListView TakeExecute should not return None"), + }; + taken.as_::().into_owned() } fn take_fixed_size_list( diff --git a/vortex-array/src/arrays/filter/execute/listview.rs b/vortex-array/src/arrays/filter/execute/listview.rs index e6c30416e17..25dd08b11ef 100644 --- a/vortex-array/src/arrays/filter/execute/listview.rs +++ b/vortex-array/src/arrays/filter/execute/listview.rs @@ -12,18 +12,6 @@ use crate::arrays::filter::execute::values_to_mask; use crate::arrays::listview::ListViewArrayExt; use crate::arrays::listview::ListViewRebuildMode; -// TODO(connor)[ListView]: Make use of this threshold after we start migrating operators. -/// The threshold for triggering a rebuild of the [`ListViewArray`]. -/// -/// By default, we will not touch the underlying `elements` array of the [`ListViewArray`] since it -/// can be potentially expensive to reorganize the array based on what views we have into it. -/// -/// However, we also do not want to carry around a large amount of garbage data. Below this -/// threshold of the density of the selection mask, we will rebuild the [`ListViewArray`], removing -/// any garbage data. -#[allow(unused)] -const REBUILD_DENSITY_THRESHOLD: f64 = 0.1; - /// [`ListViewArray`] filter implementation. /// /// This implementation is deliberately simple and read-optimized. We just filter the `offsets` and diff --git a/vortex-array/src/arrays/listview/compute/take.rs b/vortex-array/src/arrays/listview/compute/take.rs index 5cfadd40214..28c8b7f80d2 100644 --- a/vortex-array/src/arrays/listview/compute/take.rs +++ b/vortex-array/src/arrays/listview/compute/take.rs @@ -5,10 +5,12 @@ use num_traits::Zero; use vortex_error::VortexResult; use crate::ArrayRef; +use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; use crate::arrays::ListView; use crate::arrays::ListViewArray; +use crate::arrays::dict::TakeExecute; use crate::arrays::dict::TakeReduce; use crate::arrays::listview::ListViewArrayExt; use crate::arrays::listview::ListViewRebuildMode; @@ -17,72 +19,99 @@ use crate::dtype::Nullability; use crate::match_each_integer_ptype; use crate::scalar::Scalar; -// TODO(connor)[ListView]: Make use of this threshold after we start migrating operators. -/// The threshold for triggering a rebuild of the [`ListViewArray`]. +/// The threshold below which we return `None` from [`TakeReduce`] so callers fall back to +/// [`TakeExecute`] and rebuild the underlying `elements` buffer. /// -/// By default, we will not touch the underlying `elements` array of the [`ListViewArray`] since it -/// can be potentially expensive to reorganize the array based on what views we have into it. -/// -/// However, we also do not want to carry around a large amount of garbage data. Below this -/// threshold of the density of the selection mask, we will rebuild the [`ListViewArray`], removing -/// any garbage data. -#[allow(unused)] -const REBUILD_DENSITY_THRESHOLD: f64 = 0.1; +/// We don't touch `elements` on the metadata-only path since reorganizing it can be expensive. +/// However, we also don't want to drag around a large amount of garbage data when the selection +/// is sparse. Below this fraction of list rows retained, the rebuild is worth it. +const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; -/// [`ListViewArray`] take implementation. +/// Metadata-only take for [`ListViewArray`]. /// /// This implementation is deliberately simple and read-optimized. We just take the `offsets` and -/// `sizes` at the requested indices and reuse the original `elements` array. This works because -/// `ListView` (unlike `List`) allows non-contiguous and out-of-order lists. +/// `sizes` at the requested indices and reuse the original `elements` buffer as-is. This works +/// because `ListView` (unlike `List`) allows non-contiguous and out-of-order lists. /// /// We don't slice the `elements` array because it would require computing min/max offsets and -/// adjusting all offsets accordingly, which is not really worth the small potential memory we would -/// be able to get back. +/// adjusting all offsets accordingly, which is not really worth the small potential memory we +/// would be able to get back. +/// +/// The trade-off is that we may keep unreferenced elements in memory, but this is acceptable +/// since we're optimizing for read performance and the data isn't being copied. /// -/// The trade-off is that we may keep unreferenced elements in memory, but this is acceptable since -/// we're optimizing for read performance and the data isn't being copied. +/// When the selection density drops below `REBUILD_DENSITY_THRESHOLD`, we return `None` so +/// callers can fall back to [`TakeExecute`], which compacts `elements` via a rebuild. Dense +/// selections keep the cheap metadata-only path. impl TakeReduce for ListView { fn take(array: ArrayView<'_, ListView>, indices: &ArrayRef) -> VortexResult> { - let elements = array.elements(); - let offsets = array.offsets(); - let sizes = array.sizes(); - - // Compute the new validity by combining the array's validity with the indices' validity. - let new_validity = array.validity()?.take(indices)?; - - // Take the offsets and sizes arrays at the requested indices. - // Take can reorder offsets, create gaps, and may introduce overlaps if the `indices` - // contain duplicates. - let nullable_new_offsets = offsets.take(indices.clone())?; - let nullable_new_sizes = sizes.take(indices.clone())?; + // Approximate element density by the fraction of list rows retained. Assumes roughly + // uniform list sizes; good enough to decide whether dragging along the full `elements` + // buffer is worth avoiding a rebuild. + let kept_row_fraction = indices.len() as f32 / array.sizes().len() as f32; + if kept_row_fraction < REBUILD_DENSITY_THRESHOLD { + return Ok(None); + } - // Since `take` returns nullable arrays, we simply cast it back to non-nullable (filled with - // zeros to represent null lists). - let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| { - nullable_new_offsets - .fill_null(Scalar::primitive(O::zero(), Nullability::NonNullable))? - }); - let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| { - nullable_new_sizes.fill_null(Scalar::primitive(S::zero(), Nullability::NonNullable))? - }); - // SAFETY: Take operation maintains all `ListViewArray` invariants: - // - `new_offsets` and `new_sizes` are derived from existing valid child arrays. - // - `new_offsets` and `new_sizes` are non-nullable. - // - `new_offsets` and `new_sizes` have the same length (both taken with the same - // `indices`). - // - Validity correctly reflects the combination of array and indices validity. - let new_array = unsafe { - ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity) - }; + Ok(Some(apply_take(array, indices)?.into_array())) + } +} +/// Execution-path take for [`ListViewArray`]. +/// +/// This does the same metadata-only take as [`TakeReduce`], then unconditionally rebuilds the +/// result via [`ListViewRebuildMode::MakeZeroCopyToList`] so the output does not carry +/// unreferenced elements from the source. Callers reach this path when [`TakeReduce`] returns +/// `None` (sparse selections) or during `Dict` canonicalization, where we want to materialize a +/// compacted result. +impl TakeExecute for ListView { + fn take( + array: ArrayView<'_, ListView>, + indices: &ArrayRef, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter` - // compute functions have run, at the "top" of the operator tree. However, we cannot do this - // right now, so we will just rebuild every time (similar to `ListArray`). - + // compute functions have run, at the "top" of the operator tree. However, we cannot do + // this right now, so we will just rebuild every time (similar to `ListArray`). + let taken = apply_take(array, indices)?; Ok(Some( - new_array + taken .rebuild(ListViewRebuildMode::MakeZeroCopyToList)? .into_array(), )) } } + +/// Shared metadata-only take: take `offsets`, `sizes` and `validity` at `indices` while reusing +/// the original `elements` buffer as-is. +fn apply_take(array: ArrayView<'_, ListView>, indices: &ArrayRef) -> VortexResult { + let elements = array.elements(); + let offsets = array.offsets(); + let sizes = array.sizes(); + + // Combine the array's validity with the indices' validity. + let new_validity = array.validity()?.take(indices)?; + + // Take can reorder offsets, create gaps, and may introduce overlaps if `indices` contain + // duplicates. + let nullable_new_offsets = offsets.take(indices.clone())?; + let nullable_new_sizes = sizes.take(indices.clone())?; + + // `take` returns nullable arrays; cast back to non-nullable (filling with zeros to represent + // the null lists — the validity mask tracks nullness separately). + let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| { + nullable_new_offsets.fill_null(Scalar::primitive(O::zero(), Nullability::NonNullable))? + }); + let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| { + nullable_new_sizes.fill_null(Scalar::primitive(S::zero(), Nullability::NonNullable))? + }); + + // SAFETY: Take operation maintains all `ListViewArray` invariants: + // - `new_offsets` and `new_sizes` are derived from existing valid child arrays. + // - `new_offsets` and `new_sizes` are non-nullable. + // - `new_offsets` and `new_sizes` have the same length (both taken with the same `indices`). + // - Validity correctly reflects the combination of array and indices validity. + Ok(unsafe { + ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity) + }) +} From 9694e13a72e8911d9615a08fda475facd9bb5b58 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 9 Apr 2026 17:58:30 +0100 Subject: [PATCH 2/7] Only sometimes rebuild Signed-off-by: Dimitar Dimitrov --- vortex-array/public-api.lock | 12 -- vortex-array/src/arrays/dict/execute.rs | 24 +--- .../src/arrays/listview/compute/take.rs | 127 +++++++----------- 3 files changed, 58 insertions(+), 105 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 6b86cfd3663..08dffbab678 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -2208,10 +2208,6 @@ impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::List pub fn vortex_array::arrays::List::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::List>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::ListView - -pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> - impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::Primitive pub fn vortex_array::arrays::Primitive::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::Primitive>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> @@ -2978,10 +2974,6 @@ impl vortex_array::ValidityVTable for vortex_arr pub fn vortex_array::arrays::ListView::validity(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>) -> vortex_error::VortexResult -impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::ListView - -pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> - impl vortex_array::arrays::dict::TakeReduce for vortex_array::arrays::ListView pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> @@ -5926,10 +5918,6 @@ impl vortex_array::ValidityVTable for vortex_arr pub fn vortex_array::arrays::ListView::validity(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>) -> vortex_error::VortexResult -impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::ListView - -pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> - impl vortex_array::arrays::dict::TakeReduce for vortex_array::arrays::ListView pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> diff --git a/vortex-array/src/arrays/dict/execute.rs b/vortex-array/src/arrays/dict/execute.rs index 66f398397bf..d95cd239a44 100644 --- a/vortex-array/src/arrays/dict/execute.rs +++ b/vortex-array/src/arrays/dict/execute.rs @@ -46,7 +46,7 @@ pub fn take_canonical( Canonical::Primitive(a) => Canonical::Primitive(take_primitive(&a, codes, ctx)), Canonical::Decimal(a) => Canonical::Decimal(take_decimal(&a, codes, ctx)), Canonical::VarBinView(a) => Canonical::VarBinView(take_varbinview(&a, codes, ctx)), - Canonical::List(a) => Canonical::List(take_listview(&a, codes, ctx)), + Canonical::List(a) => Canonical::List(take_listview(&a, codes)), Canonical::FixedSizeList(a) => { Canonical::FixedSizeList(take_fixed_size_list(&a, codes, ctx)) } @@ -123,24 +123,14 @@ fn take_varbinview( .into_owned() } -fn take_listview( - array: &ListViewArray, - codes: &PrimitiveArray, - ctx: &mut ExecutionCtx, -) -> ListViewArray { +fn take_listview(array: &ListViewArray, codes: &PrimitiveArray) -> ListViewArray { let codes_ref = codes.clone().into_array(); let array = array.as_view(); - // Try the cheap metadata-only path first; fall back to the rebuilding execute path when the - // reduce impl declines (see `ListView::TakeReduce` for the density heuristic). - let taken = match ::take(array, &codes_ref) - .vortex_expect("take listview reduce") - { - Some(taken) => taken, - None => ::take(array, &codes_ref, ctx) - .vortex_expect("take listview execute") - .vortex_expect("ListView TakeExecute should not return None"), - }; - taken.as_::().into_owned() + ::take(array, &codes_ref) + .vortex_expect("take listview array") + .vortex_expect("take listview should not return None") + .as_::() + .into_owned() } fn take_fixed_size_list( diff --git a/vortex-array/src/arrays/listview/compute/take.rs b/vortex-array/src/arrays/listview/compute/take.rs index 28c8b7f80d2..c828980843d 100644 --- a/vortex-array/src/arrays/listview/compute/take.rs +++ b/vortex-array/src/arrays/listview/compute/take.rs @@ -5,12 +5,10 @@ use num_traits::Zero; use vortex_error::VortexResult; use crate::ArrayRef; -use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; use crate::arrays::ListView; use crate::arrays::ListViewArray; -use crate::arrays::dict::TakeExecute; use crate::arrays::dict::TakeReduce; use crate::arrays::listview::ListViewArrayExt; use crate::arrays::listview::ListViewRebuildMode; @@ -19,99 +17,76 @@ use crate::dtype::Nullability; use crate::match_each_integer_ptype; use crate::scalar::Scalar; -/// The threshold below which we return `None` from [`TakeReduce`] so callers fall back to -/// [`TakeExecute`] and rebuild the underlying `elements` buffer. +/// The density threshold at which we skip rebuilding the underlying `elements` buffer. /// -/// We don't touch `elements` on the metadata-only path since reorganizing it can be expensive. -/// However, we also don't want to drag around a large amount of garbage data when the selection -/// is sparse. Below this fraction of list rows retained, the rebuild is worth it. +/// Rebuilding `elements` can be expensive, so we avoid it when the selection keeps most of +/// the list rows. However, we also don't want to drag around a large amount of garbage data +/// when the selection is sparse — below this fraction of list rows retained, the rebuild is +/// worth it. const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; -/// Metadata-only take for [`ListViewArray`]. +/// [`ListViewArray`] take implementation. /// -/// This implementation is deliberately simple and read-optimized. We just take the `offsets` and -/// `sizes` at the requested indices and reuse the original `elements` buffer as-is. This works -/// because `ListView` (unlike `List`) allows non-contiguous and out-of-order lists. +/// We always take the `offsets` and `sizes` at the requested indices. Whether we also rebuild +/// the `elements` buffer depends on the selection density: /// -/// We don't slice the `elements` array because it would require computing min/max offsets and -/// adjusting all offsets accordingly, which is not really worth the small potential memory we -/// would be able to get back. +/// - **Dense selections** (above [`REBUILD_DENSITY_THRESHOLD`]): reuse the original `elements` +/// buffer as-is. This is the cheap, read-optimized path. We may keep some unreferenced +/// elements in memory, but this is acceptable since we're not copying the data. +/// - **Sparse selections**: rebuild via [`ListViewRebuildMode::MakeZeroCopyToList`] so the +/// result does not carry a large amount of garbage data. /// -/// The trade-off is that we may keep unreferenced elements in memory, but this is acceptable -/// since we're optimizing for read performance and the data isn't being copied. -/// -/// When the selection density drops below `REBUILD_DENSITY_THRESHOLD`, we return `None` so -/// callers can fall back to [`TakeExecute`], which compacts `elements` via a rebuild. Dense -/// selections keep the cheap metadata-only path. +/// This works because `ListView` (unlike `List`) allows non-contiguous and out-of-order lists, +/// so the metadata-only take is valid without touching `elements`. impl TakeReduce for ListView { fn take(array: ArrayView<'_, ListView>, indices: &ArrayRef) -> VortexResult> { + let elements = array.elements(); + let offsets = array.offsets(); + let sizes = array.sizes(); + + // Combine the array's validity with the indices' validity. + let new_validity = array.validity()?.take(indices)?; + + // Take can reorder offsets, create gaps, and may introduce overlaps if `indices` contain + // duplicates. + let nullable_new_offsets = offsets.take(indices.clone())?; + let nullable_new_sizes = sizes.take(indices.clone())?; + + // `take` returns nullable arrays; cast back to non-nullable (filling with zeros to + // represent the null lists — the validity mask tracks nullness separately). + let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| { + nullable_new_offsets + .fill_null(Scalar::primitive(O::zero(), Nullability::NonNullable))? + }); + let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| { + nullable_new_sizes.fill_null(Scalar::primitive(S::zero(), Nullability::NonNullable))? + }); + + // SAFETY: Take operation maintains all `ListViewArray` invariants: + // - `new_offsets` and `new_sizes` are derived from existing valid child arrays. + // - `new_offsets` and `new_sizes` are non-nullable. + // - `new_offsets` and `new_sizes` have the same length (both taken with the same + // `indices`). + // - Validity correctly reflects the combination of array and indices validity. + let new_array = unsafe { + ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity) + }; + // Approximate element density by the fraction of list rows retained. Assumes roughly // uniform list sizes; good enough to decide whether dragging along the full `elements` // buffer is worth avoiding a rebuild. let kept_row_fraction = indices.len() as f32 / array.sizes().len() as f32; - if kept_row_fraction < REBUILD_DENSITY_THRESHOLD { - return Ok(None); + if kept_row_fraction >= REBUILD_DENSITY_THRESHOLD { + return Ok(Some(new_array.into_array())); } - Ok(Some(apply_take(array, indices)?.into_array())) - } -} - -/// Execution-path take for [`ListViewArray`]. -/// -/// This does the same metadata-only take as [`TakeReduce`], then unconditionally rebuilds the -/// result via [`ListViewRebuildMode::MakeZeroCopyToList`] so the output does not carry -/// unreferenced elements from the source. Callers reach this path when [`TakeReduce`] returns -/// `None` (sparse selections) or during `Dict` canonicalization, where we want to materialize a -/// compacted result. -impl TakeExecute for ListView { - fn take( - array: ArrayView<'_, ListView>, - indices: &ArrayRef, - _ctx: &mut ExecutionCtx, - ) -> VortexResult> { // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter` // compute functions have run, at the "top" of the operator tree. However, we cannot do - // this right now, so we will just rebuild every time (similar to `ListArray`). - let taken = apply_take(array, indices)?; + // this right now, so we rebuild here for sparse selections. Ok(Some( - taken + new_array .rebuild(ListViewRebuildMode::MakeZeroCopyToList)? .into_array(), )) } } - -/// Shared metadata-only take: take `offsets`, `sizes` and `validity` at `indices` while reusing -/// the original `elements` buffer as-is. -fn apply_take(array: ArrayView<'_, ListView>, indices: &ArrayRef) -> VortexResult { - let elements = array.elements(); - let offsets = array.offsets(); - let sizes = array.sizes(); - - // Combine the array's validity with the indices' validity. - let new_validity = array.validity()?.take(indices)?; - - // Take can reorder offsets, create gaps, and may introduce overlaps if `indices` contain - // duplicates. - let nullable_new_offsets = offsets.take(indices.clone())?; - let nullable_new_sizes = sizes.take(indices.clone())?; - - // `take` returns nullable arrays; cast back to non-nullable (filling with zeros to represent - // the null lists — the validity mask tracks nullness separately). - let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| { - nullable_new_offsets.fill_null(Scalar::primitive(O::zero(), Nullability::NonNullable))? - }); - let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| { - nullable_new_sizes.fill_null(Scalar::primitive(S::zero(), Nullability::NonNullable))? - }); - - // SAFETY: Take operation maintains all `ListViewArray` invariants: - // - `new_offsets` and `new_sizes` are derived from existing valid child arrays. - // - `new_offsets` and `new_sizes` are non-nullable. - // - `new_offsets` and `new_sizes` have the same length (both taken with the same `indices`). - // - Validity correctly reflects the combination of array and indices validity. - Ok(unsafe { - ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity) - }) -} From 6a7b4b99966b18cbabaff27abdb31b5cb6c7a5d9 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 9 Apr 2026 20:16:58 +0100 Subject: [PATCH 3/7] fix: avoid private intra-doc link to REBUILD_DENSITY_THRESHOLD Signed-off-by: Dimitar Dimitrov --- vortex-array/src/arrays/listview/compute/take.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/arrays/listview/compute/take.rs b/vortex-array/src/arrays/listview/compute/take.rs index c828980843d..f6b35855354 100644 --- a/vortex-array/src/arrays/listview/compute/take.rs +++ b/vortex-array/src/arrays/listview/compute/take.rs @@ -30,7 +30,7 @@ const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; /// We always take the `offsets` and `sizes` at the requested indices. Whether we also rebuild /// the `elements` buffer depends on the selection density: /// -/// - **Dense selections** (above [`REBUILD_DENSITY_THRESHOLD`]): reuse the original `elements` +/// - **Dense selections** (above `REBUILD_DENSITY_THRESHOLD`): reuse the original `elements` /// buffer as-is. This is the cheap, read-optimized path. We may keep some unreferenced /// elements in memory, but this is acceptable since we're not copying the data. /// - **Sparse selections**: rebuild via [`ListViewRebuildMode::MakeZeroCopyToList`] so the From e0a30c5f2e4f97b095fc84f5c00be42c76ec52da Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 10 Apr 2026 11:48:47 +0100 Subject: [PATCH 4/7] Always only sometimes rebuild even when we know we have to rebuild Signed-off-by: Dimitar Dimitrov --- vortex-array/src/arrays/dict/execute.rs | 14 +- .../src/arrays/filter/execute/listview.rs | 17 ++- .../src/arrays/listview/compute/mod.rs | 1 + .../src/arrays/listview/compute/take.rs | 139 ++++++++++-------- 4 files changed, 97 insertions(+), 74 deletions(-) diff --git a/vortex-array/src/arrays/dict/execute.rs b/vortex-array/src/arrays/dict/execute.rs index d95cd239a44..19eb6d56b3d 100644 --- a/vortex-array/src/arrays/dict/execute.rs +++ b/vortex-array/src/arrays/dict/execute.rs @@ -46,7 +46,7 @@ pub fn take_canonical( Canonical::Primitive(a) => Canonical::Primitive(take_primitive(&a, codes, ctx)), Canonical::Decimal(a) => Canonical::Decimal(take_decimal(&a, codes, ctx)), Canonical::VarBinView(a) => Canonical::VarBinView(take_varbinview(&a, codes, ctx)), - Canonical::List(a) => Canonical::List(take_listview(&a, codes)), + Canonical::List(a) => Canonical::List(take_listview(&a, codes, ctx)), Canonical::FixedSizeList(a) => { Canonical::FixedSizeList(take_fixed_size_list(&a, codes, ctx)) } @@ -123,12 +123,16 @@ fn take_varbinview( .into_owned() } -fn take_listview(array: &ListViewArray, codes: &PrimitiveArray) -> ListViewArray { +fn take_listview( + array: &ListViewArray, + codes: &PrimitiveArray, + ctx: &mut ExecutionCtx, +) -> ListViewArray { let codes_ref = codes.clone().into_array(); let array = array.as_view(); - ::take(array, &codes_ref) - .vortex_expect("take listview array") - .vortex_expect("take listview should not return None") + ::take(array, &codes_ref, ctx) + .vortex_expect("take listview execute") + .vortex_expect("ListView TakeExecute should not return None") .as_::() .into_owned() } diff --git a/vortex-array/src/arrays/filter/execute/listview.rs b/vortex-array/src/arrays/filter/execute/listview.rs index 25dd08b11ef..3e6b8a3aa6e 100644 --- a/vortex-array/src/arrays/filter/execute/listview.rs +++ b/vortex-array/src/arrays/filter/execute/listview.rs @@ -6,11 +6,11 @@ use std::sync::Arc; use vortex_error::VortexExpect; use vortex_mask::MaskValues; -use crate::arrays::ListViewArray; use crate::arrays::filter::execute::filter_validity; use crate::arrays::filter::execute::values_to_mask; use crate::arrays::listview::ListViewArrayExt; use crate::arrays::listview::ListViewRebuildMode; +use crate::arrays::{ListViewArray, listview}; /// [`ListViewArray`] filter implementation. /// @@ -58,13 +58,14 @@ pub fn filter_listview(array: &ListViewArray, selection_mask: &Arc) ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity) }; - // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter` - // compute functions have run, at the "top" of the operator tree. However, we cannot do this - // right now, so we will just rebuild every time (similar to `ListArray`). - - new_array - .rebuild(ListViewRebuildMode::MakeZeroCopyToList) - .vortex_expect("ListViewArray rebuild to zero-copy List should always succeed") + let kept_row_fraction = selection_mask.true_count() as f32 / array.sizes().len() as f32; + if kept_row_fraction < listview::compute::REBUILD_DENSITY_THRESHOLD { + new_array + .rebuild(ListViewRebuildMode::MakeZeroCopyToList) + .vortex_expect("ListViewArray rebuild to zero-copy List should always succeed") + } else { + new_array + } } #[cfg(test)] diff --git a/vortex-array/src/arrays/listview/compute/mod.rs b/vortex-array/src/arrays/listview/compute/mod.rs index 9a43503c4b5..2359727395e 100644 --- a/vortex-array/src/arrays/listview/compute/mod.rs +++ b/vortex-array/src/arrays/listview/compute/mod.rs @@ -6,3 +6,4 @@ mod mask; pub(crate) mod rules; mod slice; mod take; +pub use take::REBUILD_DENSITY_THRESHOLD; diff --git a/vortex-array/src/arrays/listview/compute/take.rs b/vortex-array/src/arrays/listview/compute/take.rs index f6b35855354..9eeb83b6a51 100644 --- a/vortex-array/src/arrays/listview/compute/take.rs +++ b/vortex-array/src/arrays/listview/compute/take.rs @@ -5,10 +5,12 @@ use num_traits::Zero; use vortex_error::VortexResult; use crate::ArrayRef; +use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; use crate::arrays::ListView; use crate::arrays::ListViewArray; +use crate::arrays::dict::TakeExecute; use crate::arrays::dict::TakeReduce; use crate::arrays::listview::ListViewArrayExt; use crate::arrays::listview::ListViewRebuildMode; @@ -17,76 +19,91 @@ use crate::dtype::Nullability; use crate::match_each_integer_ptype; use crate::scalar::Scalar; -/// The density threshold at which we skip rebuilding the underlying `elements` buffer. +/// The threshold below which we rebuild the elements of a listview. /// -/// Rebuilding `elements` can be expensive, so we avoid it when the selection keeps most of -/// the list rows. However, we also don't want to drag around a large amount of garbage data -/// when the selection is sparse — below this fraction of list rows retained, the rebuild is -/// worth it. -const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; - -/// [`ListViewArray`] take implementation. -/// -/// We always take the `offsets` and `sizes` at the requested indices. Whether we also rebuild -/// the `elements` buffer depends on the selection density: -/// -/// - **Dense selections** (above `REBUILD_DENSITY_THRESHOLD`): reuse the original `elements` -/// buffer as-is. This is the cheap, read-optimized path. We may keep some unreferenced -/// elements in memory, but this is acceptable since we're not copying the data. -/// - **Sparse selections**: rebuild via [`ListViewRebuildMode::MakeZeroCopyToList`] so the -/// result does not carry a large amount of garbage data. +/// We don't touch `elements` on the metadata-only path since reorganizing it can be expensive. +/// However, we also don't want to drag around a large amount of garbage data when the selection +/// is sparse. Below this fraction of list rows retained, the rebuild is worth it. +/// Rebuilding is needed when exporting the ListView's elements. /// -/// This works because `ListView` (unlike `List`) allows non-contiguous and out-of-order lists, -/// so the metadata-only take is valid without touching `elements`. +// TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter` +// compute functions have run, at the "top" of the operator tree. However, we cannot do this +// right now, so we will just rebuild every time (similar to [`ListArray`]). +pub const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; + +/// Metadata-only take for [`ListViewArray`]. impl TakeReduce for ListView { fn take(array: ArrayView<'_, ListView>, indices: &ArrayRef) -> VortexResult> { - let elements = array.elements(); - let offsets = array.offsets(); - let sizes = array.sizes(); - - // Combine the array's validity with the indices' validity. - let new_validity = array.validity()?.take(indices)?; - - // Take can reorder offsets, create gaps, and may introduce overlaps if `indices` contain - // duplicates. - let nullable_new_offsets = offsets.take(indices.clone())?; - let nullable_new_sizes = sizes.take(indices.clone())?; - - // `take` returns nullable arrays; cast back to non-nullable (filling with zeros to - // represent the null lists — the validity mask tracks nullness separately). - let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| { - nullable_new_offsets - .fill_null(Scalar::primitive(O::zero(), Nullability::NonNullable))? - }); - let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| { - nullable_new_sizes.fill_null(Scalar::primitive(S::zero(), Nullability::NonNullable))? - }); - - // SAFETY: Take operation maintains all `ListViewArray` invariants: - // - `new_offsets` and `new_sizes` are derived from existing valid child arrays. - // - `new_offsets` and `new_sizes` are non-nullable. - // - `new_offsets` and `new_sizes` have the same length (both taken with the same - // `indices`). - // - Validity correctly reflects the combination of array and indices validity. - let new_array = unsafe { - ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity) - }; - // Approximate element density by the fraction of list rows retained. Assumes roughly // uniform list sizes; good enough to decide whether dragging along the full `elements` // buffer is worth avoiding a rebuild. let kept_row_fraction = indices.len() as f32 / array.sizes().len() as f32; - if kept_row_fraction >= REBUILD_DENSITY_THRESHOLD { - return Ok(Some(new_array.into_array())); + if kept_row_fraction < REBUILD_DENSITY_THRESHOLD { + return Ok(None); } - // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter` - // compute functions have run, at the "top" of the operator tree. However, we cannot do - // this right now, so we rebuild here for sparse selections. - Ok(Some( - new_array - .rebuild(ListViewRebuildMode::MakeZeroCopyToList)? - .into_array(), - )) + Ok(Some(apply_take(array, indices)?.into_array())) + } +} + +/// Execution-path take for [`ListViewArray`]. +/// +/// This does the same metadata-only take as [`TakeReduce`], but also reuilds the array if the +/// resulting array will be less dense than [`REBUILD_DENSITY_THRESHOLD`] +impl TakeExecute for ListView { + fn take( + array: ArrayView<'_, ListView>, + indices: &ArrayRef, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let kept_row_fraction = indices.len() as f32 / array.sizes().len() as f32; + let taken = apply_take(array, indices)?; + + if kept_row_fraction < REBUILD_DENSITY_THRESHOLD { + // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter` + // compute functions have run, at the "top" of the operator tree. However, we cannot do + // this right now, so we will just rebuild every time (similar to `ListArray`). + Ok(Some( + taken + .rebuild(ListViewRebuildMode::MakeZeroCopyToList)? + .into_array(), + )) + } else { + Ok(Some(taken.into_array())) + } } } + +/// Shared metadata-only take: take `offsets`, `sizes` and `validity` at `indices` while reusing +/// the original `elements` buffer as-is. +fn apply_take(array: ArrayView<'_, ListView>, indices: &ArrayRef) -> VortexResult { + let elements = array.elements(); + let offsets = array.offsets(); + let sizes = array.sizes(); + + // Combine the array's validity with the indices' validity. + let new_validity = array.validity()?.take(indices)?; + + // Take can reorder offsets, create gaps, and may introduce overlaps if `indices` contain + // duplicates. + let nullable_new_offsets = offsets.take(indices.clone())?; + let nullable_new_sizes = sizes.take(indices.clone())?; + + // `take` returns nullable arrays; cast back to non-nullable (filling with zeros to represent + // the null lists — the validity mask tracks nullness separately). + let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| { + nullable_new_offsets.fill_null(Scalar::primitive(O::zero(), Nullability::NonNullable))? + }); + let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| { + nullable_new_sizes.fill_null(Scalar::primitive(S::zero(), Nullability::NonNullable))? + }); + + // SAFETY: Take operation maintains all `ListViewArray` invariants: + // - `new_offsets` and `new_sizes` are derived from existing valid child arrays. + // - `new_offsets` and `new_sizes` are non-nullable. + // - `new_offsets` and `new_sizes` have the same length (both taken with the same `indices`). + // - Validity correctly reflects the combination of array and indices validity. + Ok(unsafe { + ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity) + }) +} From a108dc660bc692a142702cc90d37009433a41f6e Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 10 Apr 2026 11:57:07 +0100 Subject: [PATCH 5/7] Fix listview CI checks Signed-off-by: Dimitar Dimitrov --- vortex-array/public-api.lock | 12 ++++++++++++ vortex-array/src/arrays/filter/execute/listview.rs | 3 ++- vortex-array/src/arrays/listview/compute/take.rs | 4 ++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 08dffbab678..6b86cfd3663 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -2208,6 +2208,10 @@ impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::List pub fn vortex_array::arrays::List::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::List>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::ListView + +pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::Primitive pub fn vortex_array::arrays::Primitive::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::Primitive>, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> @@ -2974,6 +2978,10 @@ impl vortex_array::ValidityVTable for vortex_arr pub fn vortex_array::arrays::ListView::validity(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>) -> vortex_error::VortexResult +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::ListView + +pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::arrays::dict::TakeReduce for vortex_array::arrays::ListView pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> @@ -5918,6 +5926,10 @@ impl vortex_array::ValidityVTable for vortex_arr pub fn vortex_array::arrays::ListView::validity(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>) -> vortex_error::VortexResult +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::ListView + +pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::arrays::dict::TakeReduce for vortex_array::arrays::ListView pub fn vortex_array::arrays::ListView::take(array: vortex_array::ArrayView<'_, vortex_array::arrays::ListView>, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> diff --git a/vortex-array/src/arrays/filter/execute/listview.rs b/vortex-array/src/arrays/filter/execute/listview.rs index 3e6b8a3aa6e..6f782a50531 100644 --- a/vortex-array/src/arrays/filter/execute/listview.rs +++ b/vortex-array/src/arrays/filter/execute/listview.rs @@ -6,11 +6,12 @@ use std::sync::Arc; use vortex_error::VortexExpect; use vortex_mask::MaskValues; +use crate::arrays::ListViewArray; use crate::arrays::filter::execute::filter_validity; use crate::arrays::filter::execute::values_to_mask; +use crate::arrays::listview; use crate::arrays::listview::ListViewArrayExt; use crate::arrays::listview::ListViewRebuildMode; -use crate::arrays::{ListViewArray, listview}; /// [`ListViewArray`] filter implementation. /// diff --git a/vortex-array/src/arrays/listview/compute/take.rs b/vortex-array/src/arrays/listview/compute/take.rs index 9eeb83b6a51..2ab43486a20 100644 --- a/vortex-array/src/arrays/listview/compute/take.rs +++ b/vortex-array/src/arrays/listview/compute/take.rs @@ -48,8 +48,8 @@ impl TakeReduce for ListView { /// Execution-path take for [`ListViewArray`]. /// -/// This does the same metadata-only take as [`TakeReduce`], but also reuilds the array if the -/// resulting array will be less dense than [`REBUILD_DENSITY_THRESHOLD`] +/// This does the same metadata-only take as [`TakeReduce`], but also rebuilds the array if the +/// resulting array will be less dense than `REBUILD_DENSITY_THRESHOLD`. impl TakeExecute for ListView { fn take( array: ArrayView<'_, ListView>, From 3baac2068a1192f6095c2ac43ca240cd2b0fd0a4 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 10 Apr 2026 15:36:13 +0100 Subject: [PATCH 6/7] pub(crate) Signed-off-by: Dimitar Dimitrov --- vortex-array/src/arrays/listview/compute/mod.rs | 2 +- vortex-array/src/arrays/listview/compute/take.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/listview/compute/mod.rs b/vortex-array/src/arrays/listview/compute/mod.rs index 2359727395e..9321e82c769 100644 --- a/vortex-array/src/arrays/listview/compute/mod.rs +++ b/vortex-array/src/arrays/listview/compute/mod.rs @@ -6,4 +6,4 @@ mod mask; pub(crate) mod rules; mod slice; mod take; -pub use take::REBUILD_DENSITY_THRESHOLD; +pub(crate) use take::REBUILD_DENSITY_THRESHOLD; diff --git a/vortex-array/src/arrays/listview/compute/take.rs b/vortex-array/src/arrays/listview/compute/take.rs index 2ab43486a20..8068c8e674f 100644 --- a/vortex-array/src/arrays/listview/compute/take.rs +++ b/vortex-array/src/arrays/listview/compute/take.rs @@ -29,7 +29,7 @@ use crate::scalar::Scalar; // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter` // compute functions have run, at the "top" of the operator tree. However, we cannot do this // right now, so we will just rebuild every time (similar to [`ListArray`]). -pub const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; +pub(crate) const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; /// Metadata-only take for [`ListViewArray`]. impl TakeReduce for ListView { From 2eb3db2b2a55ca402db60096c4897618170f17bc Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 16 Apr 2026 16:46:36 +0100 Subject: [PATCH 7/7] refactor: move listview rebuild density threshold to compute mod Signed-off-by: Dimitar Dimitrov --- vortex-array/src/arrays/listview/compute/mod.rs | 13 ++++++++++++- vortex-array/src/arrays/listview/compute/take.rs | 13 +------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/vortex-array/src/arrays/listview/compute/mod.rs b/vortex-array/src/arrays/listview/compute/mod.rs index 9321e82c769..3ea82cafb33 100644 --- a/vortex-array/src/arrays/listview/compute/mod.rs +++ b/vortex-array/src/arrays/listview/compute/mod.rs @@ -6,4 +6,15 @@ mod mask; pub(crate) mod rules; mod slice; mod take; -pub(crate) use take::REBUILD_DENSITY_THRESHOLD; + +/// The threshold below which we rebuild the elements of a listview. +/// +/// We don't touch `elements` on the metadata-only path since reorganizing it can be expensive. +/// However, we also don't want to drag around a large amount of garbage data when the selection +/// is sparse. Below this fraction of list rows retained, the rebuild is worth it. +/// Rebuilding is needed when exporting the ListView's elements. +/// +// TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter` +// compute functions have run, at the "top" of the operator tree. However, we cannot do this +// right now, so we will just rebuild every time (similar to [`ListArray`]). +pub(crate) const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; diff --git a/vortex-array/src/arrays/listview/compute/take.rs b/vortex-array/src/arrays/listview/compute/take.rs index 8068c8e674f..04e404a846e 100644 --- a/vortex-array/src/arrays/listview/compute/take.rs +++ b/vortex-array/src/arrays/listview/compute/take.rs @@ -4,6 +4,7 @@ use num_traits::Zero; use vortex_error::VortexResult; +use super::REBUILD_DENSITY_THRESHOLD; use crate::ArrayRef; use crate::ExecutionCtx; use crate::IntoArray; @@ -19,18 +20,6 @@ use crate::dtype::Nullability; use crate::match_each_integer_ptype; use crate::scalar::Scalar; -/// The threshold below which we rebuild the elements of a listview. -/// -/// We don't touch `elements` on the metadata-only path since reorganizing it can be expensive. -/// However, we also don't want to drag around a large amount of garbage data when the selection -/// is sparse. Below this fraction of list rows retained, the rebuild is worth it. -/// Rebuilding is needed when exporting the ListView's elements. -/// -// TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter` -// compute functions have run, at the "top" of the operator tree. However, we cannot do this -// right now, so we will just rebuild every time (similar to [`ListArray`]). -pub(crate) const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; - /// Metadata-only take for [`ListViewArray`]. impl TakeReduce for ListView { fn take(array: ArrayView<'_, ListView>, indices: &ArrayRef) -> VortexResult> {