diff --git a/vortex-tensor/Cargo.toml b/vortex-tensor/Cargo.toml index b14e82287bb..2f92ce5a107 100644 --- a/vortex-tensor/Cargo.toml +++ b/vortex-tensor/Cargo.toml @@ -37,8 +37,3 @@ rand = { workspace = true } rand_distr = { workspace = true } rstest = { workspace = true } vortex-btrblocks = { path = "../vortex-btrblocks" } - -[[bench]] -name = "similarity_search" -harness = false -test = false diff --git a/vortex-tensor/benches/similarity_search.rs b/vortex-tensor/benches/similarity_search.rs deleted file mode 100644 index 1112eda11b2..00000000000 --- a/vortex-tensor/benches/similarity_search.rs +++ /dev/null @@ -1,108 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! End-to-end similarity-search execution benchmark. -//! -//! For each of three compression strategies (uncompressed, default BtrBlocks, TurboQuant), this -//! bench: -//! -//! 1. Generates a deterministic random `Vector` batch. -//! 2. Applies the compression strategy *outside* the timed region. -//! 3. Builds the lazy -//! `Binary(Gt, [CosineSimilarity(data, query), threshold])` -//! tree *outside* the timed region. -//! 4. Times *only* `tree.execute::(&mut ctx)`. -//! -//! Run with: `cargo bench -p vortex-tensor --bench similarity_search` - -use divan::Bencher; -use mimalloc::MiMalloc; -use vortex_array::VortexSessionExecute; -use vortex_array::arrays::BoolArray; -use vortex_error::VortexExpect; - -#[path = "similarity_search_common/mod.rs"] -mod common; - -use common::Variant; -use common::build_similarity_search_tree; -use common::extract_row_as_query; -use common::generate_random_vectors; - -#[global_allocator] -static GLOBAL: MiMalloc = MiMalloc; - -/// Number of vectors in the benchmark dataset. -const NUM_ROWS: usize = 100_000; - -/// Dimensionality of each vector. Must be `>= vortex_tensor::encodings::turboquant::MIN_DIMENSION` -/// (128) for the TurboQuant variant to work. -const DIM: u32 = 768; - -/// Deterministic PRNG seed for the generated dataset. -const SEED: u64 = 0xC0FFEE; - -/// Cosine similarity threshold for the "greater than" filter. Random f32 vectors from N(0, 1) at -/// this dimension have near-zero pairwise similarity, so picking a row of the dataset as the -/// query guarantees at least that row matches. -const THRESHOLD: f32 = 0.8; - -fn main() { - divan::main(); -} - -/// Runs one end-to-end execution of the similarity-search tree for the given variant. All dataset -/// generation and tree construction happens in the bench setup closure so only the execution of -/// the lazy tree is timed. -fn bench_variant(bencher: Bencher<'_, '_>, variant: Variant) { - bencher - .with_inputs(|| { - let mut ctx = common::SESSION.create_execution_ctx(); - - // Use row 0 of the uncompressed data as the query so we always have at least one - // match. Keeping the query extraction separate from the compressed-data build keeps - // the query identical across all three variants. - let raw = generate_random_vectors(NUM_ROWS, DIM, SEED); - let query = extract_row_as_query(&raw, 0, DIM); - let data = match variant { - Variant::Uncompressed => raw, - Variant::DefaultCompression => { - common::compress_default(raw).vortex_expect("default compression succeeds") - } - Variant::TurboQuant => common::compress_turboquant(raw, &mut ctx) - .vortex_expect("turboquant compression succeeds"), - }; - - // println!( - // "\n\n{}: {}\n\n", - // variant, - // data.display_tree_encodings_only() - // ); - - let tree = build_similarity_search_tree(data, &query, THRESHOLD) - .vortex_expect("tree construction succeeds"); - - (tree, ctx) - }) - .bench_values(|(tree, mut ctx)| { - // Hot path: only the .execute() call is timed. The result is a BoolArray of length - // NUM_ROWS with true at positions where cosine_similarity > THRESHOLD. - tree.execute::(&mut ctx) - .vortex_expect("similarity search tree executes to a BoolArray") - }); -} - -#[divan::bench] -fn execute_uncompressed(bencher: Bencher) { - bench_variant(bencher, Variant::Uncompressed); -} - -#[divan::bench] -fn execute_default_compression(bencher: Bencher) { - bench_variant(bencher, Variant::DefaultCompression); -} - -#[divan::bench] -fn execute_turboquant(bencher: Bencher) { - bench_variant(bencher, Variant::TurboQuant); -} diff --git a/vortex-tensor/benches/similarity_search_common/mod.rs b/vortex-tensor/benches/similarity_search_common/mod.rs deleted file mode 100644 index c22cb5a9f08..00000000000 --- a/vortex-tensor/benches/similarity_search_common/mod.rs +++ /dev/null @@ -1,256 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Shared helpers for the similarity-search benchmark and example. -//! -//! This module is included from both `vortex-tensor/benches/similarity_search.rs` and -//! `vortex-tensor/examples/similarity_search.rs` via an explicit `#[path = ...]` so both targets -//! use the exact same array-tree builder. -//! -//! The three main entry points are: -//! -//! - [`generate_random_vectors`] to build a deterministic random [`Vector`] extension array. -//! - [`build_variant`] to take a raw vector array and apply the requested compression strategy -//! (uncompressed, default BtrBlocks, or TurboQuant). -//! - [`build_similarity_search_tree`] to wire a cosine-similarity + threshold expression on top of -//! a prepared data array and a single-row query vector. -//! -//! [`Vector`]: vortex_tensor::vector::Vector - -#![allow(dead_code)] - -use std::fmt; -use std::sync::LazyLock; - -use rand::SeedableRng; -use rand::rngs::StdRng; -use rand_distr::Distribution; -use rand_distr::Normal; -use vortex_array::ArrayRef; -use vortex_array::ExecutionCtx; -use vortex_array::IntoArray; -use vortex_array::VortexSessionExecute; -use vortex_array::arrays::ConstantArray; -use vortex_array::arrays::Extension; -use vortex_array::arrays::ExtensionArray; -use vortex_array::arrays::FixedSizeListArray; -use vortex_array::arrays::PrimitiveArray; -use vortex_array::arrays::extension::ExtensionArrayExt; -use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt; -use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; -use vortex_array::builtins::ArrayBuiltins; -use vortex_array::dtype::DType; -use vortex_array::dtype::Nullability; -use vortex_array::dtype::PType; -use vortex_array::dtype::extension::ExtDType; -use vortex_array::extension::EmptyMetadata; -use vortex_array::scalar::Scalar; -use vortex_array::scalar_fn::fns::operators::Operator; -use vortex_array::session::ArraySession; -use vortex_array::validity::Validity; -use vortex_btrblocks::BtrBlocksCompressor; -use vortex_buffer::BufferMut; -use vortex_error::VortexExpect; -use vortex_error::VortexResult; -use vortex_error::vortex_panic; -use vortex_session::VortexSession; -use vortex_tensor::encodings::turboquant::TurboQuantConfig; -use vortex_tensor::encodings::turboquant::turboquant_encode_unchecked; -use vortex_tensor::scalar_fns::cosine_similarity::CosineSimilarity; -use vortex_tensor::scalar_fns::l2_denorm::L2Denorm; -use vortex_tensor::scalar_fns::l2_denorm::normalize_as_l2_denorm; -use vortex_tensor::vector::Vector; - -/// A shared [`VortexSession`] pre-loaded with the builtin [`ArraySession`] so both bench and -/// example can create execution contexts cheaply. -pub static SESSION: LazyLock = - LazyLock::new(|| VortexSession::empty().with::()); - -/// The three compression strategies the benchmark and example exercise. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Variant { - /// Raw `Vector` with no compression applied. - Uncompressed, - /// `BtrBlocksCompressor::default()` walks into the extension array and compresses the - /// underlying FSL storage child with the default scheme set (no TurboQuant). - DefaultCompression, - /// TurboQuant: normalize, quantize to `FSL(Dict)`, wrap in SORF + `L2Denorm`. - TurboQuant, -} - -impl fmt::Display for Variant { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Uncompressed => f.write_str("Uncompressed"), - Self::DefaultCompression => f.write_str("DefaultCompression"), - Self::TurboQuant => f.write_str("TurboQuant"), - } - } -} - -/// Generate `num_rows` random f32 vectors of dimension `dim`, wrapped in a [`Vector`] extension -/// array. The values are drawn from a standard normal distribution seeded by `seed` so results -/// are reproducible across runs. -/// -/// [`Vector`]: vortex_tensor::vector::Vector -pub fn generate_random_vectors(num_rows: usize, dim: u32, seed: u64) -> ArrayRef { - let mut rng = StdRng::seed_from_u64(seed); - // `Normal::new(0, 1)` is infallible for these parameters. `rand_distr::NormalError` does - // not implement `Into`, so we cannot use `vortex_expect` here; fall back to - // `vortex_panic!` on the (impossible) error path instead. - let normal = - Normal::new(0.0f32, 1.0).unwrap_or_else(|_| vortex_panic!("Normal(0, 1) is well-defined")); - - let dim_usize = dim as usize; - let mut buf = BufferMut::::with_capacity(num_rows * dim_usize); - for _ in 0..(num_rows * dim_usize) { - buf.push(normal.sample(&mut rng)); - } - - let elements = PrimitiveArray::new::(buf.freeze(), Validity::NonNullable); - let fsl = - FixedSizeListArray::try_new(elements.into_array(), dim, Validity::NonNullable, num_rows) - .vortex_expect("FSL with valid shape and matching children length"); - - let ext_dtype = ExtDType::::try_new(EmptyMetadata, fsl.dtype().clone()) - .vortex_expect("Vector extension dtype is valid for an f32 FSL") - .erased(); - ExtensionArray::new(ext_dtype, fsl.into_array()).into_array() -} - -/// Pull the `row`-th vector out of a `Vector` extension array as a plain `Vec`. -/// -/// Used to extract a single query vector from a batch of generated data. The input must already -/// be fully materialized (no lazy scalar-fn wrappers); pass a raw array from -/// [`generate_random_vectors`], not a compressed variant. -pub fn extract_row_as_query(vectors: &ArrayRef, row: usize, dim: u32) -> Vec { - let ext = vectors - .as_opt::() - .vortex_expect("data must be a Vector extension array"); - - let mut ctx = SESSION.create_execution_ctx(); - let fsl: FixedSizeListArray = ext - .storage_array() - .clone() - .execute(&mut ctx) - .vortex_expect("storage array executes to an FSL"); - let elements: PrimitiveArray = fsl - .elements() - .clone() - .execute(&mut ctx) - .vortex_expect("FSL elements execute to a PrimitiveArray"); - - let slice = elements.as_slice::(); - let dim_usize = dim as usize; - let start = row * dim_usize; - slice[start..start + dim_usize].to_vec() -} - -/// Build a `Vector` extension array whose storage is a [`ConstantArray`] broadcasting a -/// single query vector across `num_rows` rows. This is how we hand a single query vector to -/// `CosineSimilarity` on the `rhs` side -- `ScalarFnArray` requires both children to have the -/// same length, so we broadcast the query instead of hand-rolling a 1-row input. -fn build_constant_query_vector(query: &[f32], num_rows: usize) -> VortexResult { - let element_dtype = DType::Primitive(PType::F32, Nullability::NonNullable); - - let children: Vec = query - .iter() - .map(|&v| Scalar::primitive(v, Nullability::NonNullable)) - .collect(); - let storage_scalar = Scalar::fixed_size_list(element_dtype, children, Nullability::NonNullable); - - let storage = ConstantArray::new(storage_scalar, num_rows).into_array(); - - let ext_dtype = ExtDType::::try_new(EmptyMetadata, storage.dtype().clone())?.erased(); - Ok(ExtensionArray::new(ext_dtype, storage).into_array()) -} - -/// Compresses a raw `Vector` array with the default BtrBlocks pipeline. -/// -/// [`BtrBlocksCompressor`] walks into the extension array and recursively compresses the -/// underlying FSL storage child. TurboQuant is *not* exercised by this path -- it is not -/// registered in the default scheme set -- so this measures "generic" lossless compression -/// applied to float vectors. -pub fn compress_default(data: ArrayRef) -> VortexResult { - BtrBlocksCompressor::default().compress(&data) -} - -/// Compresses a raw `Vector` array with the TurboQuant pipeline by hand, producing the -/// same tree shape that -/// [`vortex_tensor::encodings::turboquant::TurboQuantScheme`] would: -/// -/// ```text -/// L2Denorm(SorfTransform(FSL(Dict(codes, centroids))), norms) -/// ``` -/// -/// Calling the encode helpers directly (instead of going through -/// `BtrBlocksCompressorBuilder::with_turboquant()`) lets this example avoid depending on the -/// `unstable_encodings` feature flag. -/// -/// See `vortex-tensor/src/encodings/turboquant/tests/mod.rs::normalize_and_encode` for the same -/// canonical recipe. -pub fn compress_turboquant(data: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { - let l2_denorm = normalize_as_l2_denorm(data, ctx)?; - let normalized = l2_denorm.child_at(0).clone(); - let norms = l2_denorm.child_at(1).clone(); - let num_rows = l2_denorm.len(); - - let normalized_ext = normalized - .as_opt::() - .vortex_expect("normalized child should be an Extension array"); - - let config = TurboQuantConfig::default(); - // SAFETY: `normalize_as_l2_denorm` guarantees every row is unit-norm (or zero), which is the - // invariant `turboquant_encode_unchecked` expects. - let tq = unsafe { turboquant_encode_unchecked(normalized_ext, &config, ctx) }?; - - Ok(unsafe { L2Denorm::new_array_unchecked(tq, norms, num_rows) }?.into_array()) -} - -/// Dispatch helper that builds the data array for the requested [`Variant`], starting from a -/// single random-vector generation. Always returns an `ArrayRef` whose logical dtype is -/// `Vector`. -pub fn build_variant( - variant: Variant, - num_rows: usize, - dim: u32, - seed: u64, - ctx: &mut ExecutionCtx, -) -> VortexResult { - let raw = generate_random_vectors(num_rows, dim, seed); - match variant { - Variant::Uncompressed => Ok(raw), - Variant::DefaultCompression => compress_default(raw), - Variant::TurboQuant => compress_turboquant(raw, ctx), - } -} - -/// Build the lazy similarity-search array tree for a prepared data array and a single query -/// vector. The returned tree is a boolean array of length `data.len()` where position `i` is -/// `true` iff `cosine_similarity(data[i], query) > threshold`. -/// -/// The tree shape is: -/// -/// ```text -/// Binary(Gt, [ -/// CosineSimilarity([data, ConstantArray(query_vec, n)]), -/// ConstantArray(threshold, n), -/// ]) -/// ``` -/// -/// This function does no execution; it is safe to call inside a benchmark setup closure. -pub fn build_similarity_search_tree( - data: ArrayRef, - query: &[f32], - threshold: f32, -) -> VortexResult { - let num_rows = data.len(); - let query_vec = build_constant_query_vector(query, num_rows)?; - - let cosine = CosineSimilarity::try_new_array(data, query_vec, num_rows)?.into_array(); - - let threshold_scalar = Scalar::primitive(threshold, Nullability::NonNullable); - let threshold_array = ConstantArray::new(threshold_scalar, num_rows).into_array(); - - cosine.binary(threshold_array, Operator::Gt) -} diff --git a/vortex-tensor/public-api.lock b/vortex-tensor/public-api.lock index bec8df1cb29..4f6c2dddbfb 100644 --- a/vortex-tensor/public-api.lock +++ b/vortex-tensor/public-api.lock @@ -550,4 +550,12 @@ impl core::marker::Copy for vortex_tensor::vector::VectorMatcherMetadata impl core::marker::StructuralPartialEq for vortex_tensor::vector::VectorMatcherMetadata +pub mod vortex_tensor::vector_search + +pub fn vortex_tensor::vector_search::build_constant_query_vector>(query: &[T], num_rows: usize) -> vortex_error::VortexResult + +pub fn vortex_tensor::vector_search::build_similarity_search_tree>(data: vortex_array::array::erased::ArrayRef, query: &[T], threshold: T) -> vortex_error::VortexResult + +pub fn vortex_tensor::vector_search::compress_turboquant(data: vortex_array::array::erased::ArrayRef, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult + pub fn vortex_tensor::initialize(session: &vortex_session::VortexSession) diff --git a/vortex-tensor/src/lib.rs b/vortex-tensor/src/lib.rs index 3d3563aa8e4..b3cf6c21695 100644 --- a/vortex-tensor/src/lib.rs +++ b/vortex-tensor/src/lib.rs @@ -25,6 +25,8 @@ pub mod vector; pub mod encodings; +pub mod vector_search; + mod utils; /// Initialize the Vortex tensor library with a Vortex session. diff --git a/vortex-tensor/src/scalar_fns/inner_product.rs b/vortex-tensor/src/scalar_fns/inner_product.rs index c883cf00982..0d6b2bf76aa 100644 --- a/vortex-tensor/src/scalar_fns/inner_product.rs +++ b/vortex-tensor/src/scalar_fns/inner_product.rs @@ -527,18 +527,9 @@ impl InnerProduct { let values: &[f32] = values_prim.as_slice::(); debug_assert_eq!(codes.len(), len * padded_dim); - // Direct codebook lookup in the hot loop. See the function doc comment for why this - // beats an explicit product table here. - let mut out = BufferMut::::with_capacity(len); - for row in 0..len { - let row_codes = &codes[row * padded_dim..(row + 1) * padded_dim]; - let mut acc = 0.0f32; - for j in 0..padded_dim { - acc += q[j] * values[row_codes[j] as usize]; - } - // SAFETY: we reserved `len` slots above and push exactly once per row. - unsafe { out.push_unchecked(acc) }; - } + // The hot loop is extracted into [`execute_dict_constant_inner_product`] with + // unchecked indexing so the compiler can vectorize the inner gather-accumulate. + let out = execute_dict_constant_inner_product(q, values, codes, len, padded_dim); // SAFETY: the buffer length equals `len`, which matches the validity length. let result = unsafe { PrimitiveArray::new_unchecked(out.freeze(), validity) }.into_array(); @@ -556,6 +547,49 @@ fn inner_product_row(a: &[T], b: &[T]) -> T { .fold(T::zero(), |acc, v| acc + v) } +/// Compute inner products between a constant query vector and dictionary-encoded rows. +/// +/// For each row, computes `sum(q[j] * values[codes[row * dim + j]])` using the codebook +/// `values` directly instead of decoding the dictionary into dense vectors. +/// +/// The inner loop uses four independent accumulators so the CPU can pipeline FP additions +/// instead of waiting for each `fadd` to retire before starting the next. +fn execute_dict_constant_inner_product( + q: &[f32], + values: &[f32], + codes: &[u8], + num_rows: usize, + dim: usize, +) -> BufferMut { + let mut out = BufferMut::::with_capacity(num_rows); + + const PARTIAL_SUMS: usize = 8; + + for row_codes in codes.chunks_exact(dim) { + let mut acc = [0.0f32; PARTIAL_SUMS]; + + let code_chunks = row_codes.chunks_exact(PARTIAL_SUMS); + let q_chunks = q.chunks_exact(PARTIAL_SUMS); + let code_rem = code_chunks.remainder(); + let q_rem = q_chunks.remainder(); + + for (cc, qd) in code_chunks.zip(q_chunks) { + for i in 0..PARTIAL_SUMS { + acc[i] = qd[i].mul_add(values[cc[i] as usize], acc[i]); + } + } + + for (&code, &q_val) in code_rem.iter().zip(q_rem.iter()) { + acc[0] = q_val.mul_add(values[code as usize], acc[0]); + } + + // SAFETY: we reserved `num_rows` slots and push exactly once per row. + unsafe { out.push_unchecked(acc.iter().sum::()) }; + } + + out +} + #[cfg(test)] mod tests { use std::sync::LazyLock; diff --git a/vortex-tensor/src/vector_search.rs b/vortex-tensor/src/vector_search.rs new file mode 100644 index 00000000000..81a379683db --- /dev/null +++ b/vortex-tensor/src/vector_search.rs @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Reusable helpers for building brute-force vector similarity search expressions over +//! [`Vector`] extension arrays. +//! +//! This module exposes three small building blocks that together make it straightforward to +//! stand up a cosine-similarity-plus-threshold scan on top of a prepared data array: +//! +//! - [`compress_turboquant`] applies the canonical TurboQuant encoding pipeline +//! (`L2Denorm(SorfTransform(FSL(Dict(codes, centroids))), norms)`) to a raw +//! `Vector` array without requiring the caller to plumb the +//! `unstable_encodings` feature flag on the `vortex` facade. +//! - [`build_constant_query_vector`] wraps a single query vector into a +//! [`Vector`] extension array whose storage is a [`ConstantArray`] broadcast +//! across `num_rows` rows. This is the shape expected by +//! [`CosineSimilarity::try_new_array`] for the RHS of a database-vs-query scan. +//! - [`build_similarity_search_tree`] wires everything together into a lazy +//! `Binary(Gt, [CosineSimilarity(data, query), threshold])` expression. +//! +//! Executing the tree from [`build_similarity_search_tree`] into a +//! [`BoolArray`](vortex_array::arrays::BoolArray) yields one boolean per row indicating whether +//! that row's cosine similarity to the query exceeds `threshold`. +//! +//! # Example +//! +//! ```ignore +//! use vortex_array::{ArrayRef, VortexSessionExecute}; +//! use vortex_array::arrays::BoolArray; +//! use vortex_session::VortexSession; +//! use vortex_tensor::vector_search::{build_similarity_search_tree, compress_turboquant}; +//! +//! fn run(session: &VortexSession, data: ArrayRef, query: &[f32]) -> anyhow::Result<()> { +//! let mut ctx = session.create_execution_ctx(); +//! let data = compress_turboquant(data, &mut ctx)?; +//! let tree = build_similarity_search_tree(data, query, 0.8)?; +//! let _matches: BoolArray = tree.execute(&mut ctx)?; +//! Ok(()) +//! } +//! ``` +//! +//! [`Vector`]: crate::vector::Vector +//! [`CosineSimilarity::try_new_array`]: crate::scalar_fns::cosine_similarity::CosineSimilarity::try_new_array + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::ConstantArray; +use vortex_array::arrays::Extension; +use vortex_array::arrays::ExtensionArray; +use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; +use vortex_array::builtins::ArrayBuiltins; +use vortex_array::dtype::DType; +use vortex_array::dtype::NativePType; +use vortex_array::dtype::Nullability; +use vortex_array::dtype::extension::ExtDType; +use vortex_array::extension::EmptyMetadata; +use vortex_array::scalar::PValue; +use vortex_array::scalar::Scalar; +use vortex_array::scalar_fn::fns::operators::Operator; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; + +use crate::encodings::turboquant::TurboQuantConfig; +use crate::encodings::turboquant::turboquant_encode_unchecked; +use crate::scalar_fns::cosine_similarity::CosineSimilarity; +use crate::scalar_fns::l2_denorm::L2Denorm; +use crate::scalar_fns::l2_denorm::normalize_as_l2_denorm; +use crate::vector::Vector; + +/// Apply the canonical TurboQuant encoding pipeline to a `Vector` array. +/// +/// The returned array has the shape +/// `L2Denorm(SorfTransform(FSL(Dict(codes, centroids))), norms)` — exactly what +/// [`crate::encodings::turboquant::TurboQuantScheme`] produces when invoked through +/// `BtrBlocksCompressorBuilder::with_turboquant()`, but without requiring callers to enable +/// the `unstable_encodings` feature on the `vortex` facade. +/// +/// The input `data` must be a [`Vector`] extension array whose element type is `f32` and whose +/// dimensionality is at least +/// [`turboquant::MIN_DIMENSION`](crate::encodings::turboquant::MIN_DIMENSION). The TurboQuant +/// configuration used is [`TurboQuantConfig::default()`] (8-bit codes, 3 SORF rounds, seed 42). +/// +/// # Errors +/// +/// Returns an error if `data` is not a [`Vector`] extension array, if normalization fails, or +/// if the underlying TurboQuant encoder rejects the input shape. +pub fn compress_turboquant(data: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { + let l2_denorm = normalize_as_l2_denorm(data, ctx)?; + let normalized = l2_denorm.child_at(0).clone(); + let norms = l2_denorm.child_at(1).clone(); + let num_rows = l2_denorm.len(); + + let Some(normalized_ext) = normalized.as_opt::() else { + vortex_bail!("normalize_as_l2_denorm must produce an Extension array child"); + }; + + let config = TurboQuantConfig::default(); + // SAFETY: `normalize_as_l2_denorm` guarantees every row is unit-norm (or zero), which is + // the invariant `turboquant_encode_unchecked` expects. + let tq = unsafe { turboquant_encode_unchecked(normalized_ext, &config, ctx) }?; + + Ok(unsafe { L2Denorm::new_array_unchecked(tq, norms, num_rows) }?.into_array()) +} + +/// Build a [`Vector`] extension array whose storage is a [`ConstantArray`] broadcasting a single +/// query vector across `num_rows` rows. +/// +/// The element type is inferred from `T` (e.g. `f32` or `f64`). This is the shape expected for +/// the RHS of a database-vs-query [`CosineSimilarity`] scan: the `ScalarFnArray` contract +/// requires both children to have the same length, so rather than hand-rolling a 1-row input we +/// broadcast the query across the whole database. +/// +/// # Errors +/// +/// Returns an error if the [`Vector`] extension dtype rejects the constructed storage dtype. +pub fn build_constant_query_vector>( + query: &[T], + num_rows: usize, +) -> VortexResult { + let element_dtype = DType::Primitive(T::PTYPE, Nullability::NonNullable); + + let children: Vec = query + .iter() + .map(|&v| Scalar::primitive(v, Nullability::NonNullable)) + .collect(); + let storage_scalar = Scalar::fixed_size_list(element_dtype, children, Nullability::NonNullable); + + let storage = ConstantArray::new(storage_scalar, num_rows).into_array(); + + let ext_dtype = ExtDType::::try_new(EmptyMetadata, storage.dtype().clone())?.erased(); + Ok(ExtensionArray::new(ext_dtype, storage).into_array()) +} + +/// Build the lazy similarity-search expression tree for a prepared database array and a +/// single query vector. +/// +/// The returned array is a lazy boolean expression of length `data.len()` whose position `i` +/// is `true` iff `cosine_similarity(data[i], query) > threshold`. Executing it into a +/// [`BoolArray`](vortex_array::arrays::BoolArray) runs the full scan. +/// +/// The tree shape is: +/// +/// ```text +/// Binary(Gt, [ +/// CosineSimilarity([data, ConstantArray(query_vec, n)]), +/// ConstantArray(threshold, n), +/// ]) +/// ``` +/// +/// The element type is inferred from `T` and must match the element type of `data`'s +/// [`Vector`] extension dtype. +/// +/// This function performs no execution; it is safe to call inside a benchmark setup closure. +/// +/// # Errors +/// +/// Returns an error if `query` has a length incompatible with `data`'s vector dimension, or +/// if any of the intermediate array constructors fails. +pub fn build_similarity_search_tree>( + data: ArrayRef, + query: &[T], + threshold: T, +) -> VortexResult { + let num_rows = data.len(); + let query_vec = build_constant_query_vector(query, num_rows)?; + + let cosine = CosineSimilarity::try_new_array(data, query_vec, num_rows)?.into_array(); + + let threshold_scalar = Scalar::primitive(threshold, Nullability::NonNullable); + let threshold_array = ConstantArray::new(threshold_scalar, num_rows).into_array(); + + cosine.binary(threshold_array, Operator::Gt) +} + +#[cfg(test)] +mod tests { + use vortex_array::ArrayRef; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::Extension; + use vortex_array::arrays::ExtensionArray; + use vortex_array::arrays::FixedSizeListArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::arrays::bool::BoolArrayExt; + use vortex_array::dtype::extension::ExtDType; + use vortex_array::extension::EmptyMetadata; + use vortex_array::session::ArraySession; + use vortex_array::validity::Validity; + use vortex_buffer::BufferMut; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use super::build_constant_query_vector; + use super::build_similarity_search_tree; + use super::compress_turboquant; + use crate::vector::Vector; + + /// Build a `Vector` extension array from a flat f32 slice. Each contiguous + /// group of `DIM` values becomes one row. + fn vector_array(dim: u32, values: &[f32]) -> VortexResult { + let dim_usize = dim as usize; + assert_eq!(values.len() % dim_usize, 0); + let num_rows = values.len() / dim_usize; + + let mut buf = BufferMut::::with_capacity(values.len()); + for &v in values { + buf.push(v); + } + let elements = PrimitiveArray::new::(buf.freeze(), Validity::NonNullable); + let fsl = FixedSizeListArray::try_new( + elements.into_array(), + dim, + Validity::NonNullable, + num_rows, + )?; + + let ext_dtype = ExtDType::::try_new(EmptyMetadata, fsl.dtype().clone())?.erased(); + Ok(ExtensionArray::new(ext_dtype, fsl.into_array()).into_array()) + } + + fn test_session() -> VortexSession { + VortexSession::empty().with::() + } + + #[test] + fn constant_query_vector_has_vector_extension_dtype() -> VortexResult<()> { + let query = vec![1.0f32, 0.0, 0.0, 0.0]; + let rhs = build_constant_query_vector(&query, 5)?; + + assert_eq!(rhs.len(), 5); + assert!(rhs.as_opt::().is_some()); + Ok(()) + } + + #[test] + fn similarity_search_tree_executes_to_bool_array() -> VortexResult<()> { + // 4 rows of 3-dim vectors; the first and last match the query [1, 0, 0]. + let data = vector_array( + 3, + &[ + 1.0, 0.0, 0.0, // + 0.0, 1.0, 0.0, // + 0.0, 0.0, 1.0, // + 1.0, 0.0, 0.0, // + ], + )?; + let query = [1.0f32, 0.0, 0.0]; + + let tree = build_similarity_search_tree(data, &query, 0.5)?; + let mut ctx = test_session().create_execution_ctx(); + let result: BoolArray = tree.execute(&mut ctx)?; + + let bits = result.to_bit_buffer(); + assert_eq!(bits.len(), 4); + assert!(bits.value(0)); + assert!(!bits.value(1)); + assert!(!bits.value(2)); + assert!(bits.value(3)); + Ok(()) + } + + #[test] + fn turboquant_roundtrip_preserves_ranking() -> VortexResult<()> { + // Build 6 rows of 128-dim vectors where row 0 is highly correlated with the query. + // TurboQuant should preserve the "row 0 is the best match" ordering. + const DIM: u32 = 128; + const NUM_ROWS: usize = 6; + + let mut values = Vec::::with_capacity(NUM_ROWS * DIM as usize); + let query: Vec = (0..DIM as usize) + .map(|i| ((i as f32) * 0.017).sin()) + .collect(); + + // Row 0: identical to query (cosine=1.0) + values.extend_from_slice(&query); + // Row 1: query + noise + for (i, q) in query.iter().enumerate() { + values.push(q + 0.05 * ((i as f32) * 0.03).cos()); + } + // Rows 2..6: unrelated patterns + for row in 2..NUM_ROWS { + for i in 0..DIM as usize { + values.push(((row as f32 * 1.3 + i as f32) * 0.07).sin()); + } + } + + let data = vector_array(DIM, &values)?; + let mut ctx = test_session().create_execution_ctx(); + let compressed = compress_turboquant(data, &mut ctx)?; + assert_eq!(compressed.len(), NUM_ROWS); + + // Build a tree with a low threshold so row 0 (cosine=1.0 exact) matches. + let tree = build_similarity_search_tree(compressed, &query, 0.95)?; + let result: BoolArray = tree.execute(&mut ctx)?; + let bits = result.to_bit_buffer(); + assert_eq!(bits.len(), NUM_ROWS); + assert!( + bits.value(0), + "row 0 (identical to query) must match at threshold 0.95 even after TurboQuant" + ); + Ok(()) + } +} diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs index e82f5fdd87a..4cedf23c07d 100644 --- a/vortex/benches/single_encoding_throughput.rs +++ b/vortex/benches/single_encoding_throughput.rs @@ -430,6 +430,7 @@ fn bench_zstd_decompress_string(bencher: Bencher) { .bench_refs(|a| a.to_canonical()); } +// TODO(connor): Remove this. // TurboQuant vector quantization benchmarks. #[cfg(feature = "unstable_encodings")] mod turboquant_benches {