From e8ec3b2afdd0cf4fb17554d12df71ee664b45584 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Tue, 7 Apr 2026 20:48:37 +0200 Subject: [PATCH 1/2] vortex-array: Add test for prune scan --- vortex-array/src/arrow/executor/struct_.rs | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/vortex-array/src/arrow/executor/struct_.rs b/vortex-array/src/arrow/executor/struct_.rs index a85005c0a20..97faa4be941 100644 --- a/vortex-array/src/arrow/executor/struct_.rs +++ b/vortex-array/src/arrow/executor/struct_.rs @@ -320,6 +320,53 @@ mod tests { Ok(()) } + /// Test that converting a struct to Arrow with fewer target fields (field + /// pruning) works by falling through to the cast path. + #[test] + fn struct_to_arrow_with_field_pruning() -> VortexResult<()> { + let array = StructArray::from_fields( + vec![ + ( + "a", + PrimitiveArray::new(buffer![1i32, 2, 3], Validity::AllValid).into_array(), + ), + ( + "b", + arrays::varbinview::VarBinViewArray::from_iter_str(vec!["x", "y", "z"]) + .into_array(), + ), + ( + "c", + PrimitiveArray::new(buffer![10i64, 20, 30], Validity::AllValid).into_array(), + ), + ] + .as_slice(), + )?; + + // Request only field "b" — fewer fields than the struct has. + let target_fields = vec![Field::new("b", DataType::Utf8View, true)]; + let arrow_dt = DataType::Struct(target_fields.into()); + + let result = array.into_array().into_arrow(&arrow_dt)?; + let struct_arr = result + .as_any() + .downcast_ref::() + .expect("should be a StructArray"); + + assert_eq!(struct_arr.num_columns(), 1); + assert_eq!(struct_arr.column_names(), vec!["b"]); + let col = struct_arr + .column(0) + .as_any() + .downcast_ref::() + .expect("should be StringViewArray"); + assert_eq!(col.value(0), "x"); + assert_eq!(col.value(1), "y"); + assert_eq!(col.value(2), "z"); + + Ok(()) + } + #[test] fn to_arrow_with_non_nullable_fields() -> VortexResult<()> { let array = StructArray::from_fields( From 5fd74d7fb29523045c79c1a68149c2734b513aef Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Tue, 7 Apr 2026 20:50:27 +0200 Subject: [PATCH 2/2] vortex-array: Allow to_arrow_struct with a subset of fields --- vortex-array/src/arrow/executor/struct_.rs | 48 +++++++++++++++------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/vortex-array/src/arrow/executor/struct_.rs b/vortex-array/src/arrow/executor/struct_.rs index 97faa4be941..58fe30c99ca 100644 --- a/vortex-array/src/arrow/executor/struct_.rs +++ b/vortex-array/src/arrow/executor/struct_.rs @@ -47,23 +47,41 @@ pub(super) fn to_arrow_struct( Err(array) => array, }; - // Attempt to short-circuit if the array is already a Struct: + // Attempt to short-circuit if the array is already a Struct and the target + // fields match (same count). When target_fields has fewer fields (e.g., due + // to nested field pruning), we skip the fast path and fall through to the + // cast path which can handle field selection. let array = match array.try_downcast::() { Ok(array) => { - let StructDataParts { - validity, - fields, - struct_fields, - .. - } = array.into_data_parts(); - let validity = to_arrow_null_buffer(validity, len, ctx)?; - return create_from_fields( - target_fields.ok_or_else(|| struct_fields.names().clone()), - &fields, - validity, - len, - ctx, - ); + let n_struct_fields = match array.dtype() { + DType::Struct(sf, _) => sf.nfields(), + _ => 0, + }; + // Skip the fast path only when target has strictly fewer fields + // (nested field pruning). When target has same or more fields, + // use the fast path which will validate the count. + let can_fast_path = match target_fields { + None => true, + Some(fields) => fields.len() >= n_struct_fields, + }; + if can_fast_path { + let StructDataParts { + validity, + fields, + struct_fields, + .. + } = array.into_data_parts(); + let validity = to_arrow_null_buffer(validity, len, ctx)?; + return create_from_fields( + target_fields.ok_or_else(|| struct_fields.names().clone()), + &fields, + validity, + len, + ctx, + ); + } + // Field count mismatch — fall through to cast path. + array.into_array() } Err(array) => array, };