diff --git a/diskann-benchmark/Cargo.toml b/diskann-benchmark/Cargo.toml index bebaf4b8e..ecc3a53dd 100644 --- a/diskann-benchmark/Cargo.toml +++ b/diskann-benchmark/Cargo.toml @@ -63,6 +63,9 @@ scalar-quantization = [] # Enable minmax-quantization based algorithms minmax-quantization = [] +# Enable multi-vector MaxSim distance benchmarks +multi-vector = [] + # Enable Disk Index benchmarks disk-index = [ "diskann-disk/perf_test", diff --git a/diskann-benchmark/example/multi-vector.json b/diskann-benchmark/example/multi-vector.json new file mode 100644 index 000000000..af66a886d --- /dev/null +++ b/diskann-benchmark/example/multi-vector.json @@ -0,0 +1,47 @@ +{ + "search_directories": [], + "jobs": [ + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "isa": "auto", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 2, "num_measurements": 1 }, + { "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 2, "num_measurements": 1 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "isa": "scalar", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 2, "num_measurements": 1 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "isa": "reference", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 2, "num_measurements": 1 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 2, "num_measurements": 1 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float16", + "isa": "auto", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 2, "num_measurements": 1 } + ] + } + } + ] +} diff --git a/diskann-benchmark/perf_test_inputs/multi-vector-tolerance.json b/diskann-benchmark/perf_test_inputs/multi-vector-tolerance.json new file mode 100644 index 000000000..8d5997199 --- /dev/null +++ b/diskann-benchmark/perf_test_inputs/multi-vector-tolerance.json @@ -0,0 +1,16 @@ +{ + "checks": [ + { + "input": { + "type": "multi-vector-op", + "content": {} + }, + "tolerance": { + "type": "multi-vector-tolerance", + "content": { + "min_time_regression": 0.05 + } + } + } + ] +} diff --git a/diskann-benchmark/perf_test_inputs/multi-vector.json b/diskann-benchmark/perf_test_inputs/multi-vector.json new file mode 100644 index 000000000..c4ce9bb8b --- /dev/null +++ b/diskann-benchmark/perf_test_inputs/multi-vector.json @@ -0,0 +1,149 @@ +{ + "search_directories": [], + "jobs": [ + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "isa": "auto", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "isa": "scalar", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "isa": "x86-64-v3", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "isa": "x86-64-v4", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "isa": "reference", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float16", + "isa": "x86-64-v3", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float16", + "isa": "x86-64-v4", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float16", + "isa": "reference", + "runs": [ + { "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } + ] + } + } + ] +} diff --git a/diskann-benchmark/src/backend/mod.rs b/diskann-benchmark/src/backend/mod.rs index 8396577e8..d04bae158 100644 --- a/diskann-benchmark/src/backend/mod.rs +++ b/diskann-benchmark/src/backend/mod.rs @@ -9,11 +9,13 @@ mod disk_index; mod exhaustive; mod filters; mod index; +mod multi_vector; pub(crate) fn register_benchmarks(registry: &mut Registry) -> anyhow::Result<()> { exhaustive::register_benchmarks(registry)?; disk_index::register_benchmarks(registry)?; index::register_benchmarks(registry)?; filters::register_benchmarks(registry)?; + multi_vector::register_benchmarks(registry)?; Ok(()) } diff --git a/diskann-benchmark/src/backend/multi_vector/driver.rs b/diskann-benchmark/src/backend/multi_vector/driver.rs new file mode 100644 index 000000000..e69c70845 --- /dev/null +++ b/diskann-benchmark/src/backend/multi_vector/driver.rs @@ -0,0 +1,255 @@ +/* + * Copyright (c) Microsoft Corporation. + * Licensed under the MIT license. + */ + +//! Shared benchmark infrastructure for multi-vector kernels: timing harness, +//! data fixtures, result types. None of the contents are kernel-aware. + +use diskann_benchmark_runner::{ + utils::{ + fmt::Table, + num::{relative_change, NonNegativeFinite}, + percentiles, MicroSeconds, + }, + Checker, Input, +}; +use diskann_quantization::multi_vector::{Mat, MatRef, MaxSimKernel, Overflow, Standard}; +use rand::{ + distr::{Distribution, StandardUniform}, + rngs::StdRng, + SeedableRng, +}; +use serde::{Deserialize, Serialize}; + +use crate::inputs::multi_vector::Run; +use crate::utils::DisplayWrapper; + +////////////////////// +// Tolerance // +////////////////////// + +/// Tolerance thresholds for multi-vector benchmark regression detection. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub(super) struct MultiVectorTolerance { + pub(super) min_time_regression: NonNegativeFinite, +} + +impl Input for MultiVectorTolerance { + type Raw = Self; + + fn tag() -> &'static str { + "multi-vector-tolerance" + } + + fn from_raw(raw: Self::Raw, _checker: &mut Checker) -> anyhow::Result { + Ok(raw) + } + + fn serialize(&self) -> anyhow::Result { + Ok(serde_json::to_value(self)?) + } + + fn example() -> Self { + const EXAMPLE: NonNegativeFinite = match NonNegativeFinite::new(0.05) { + Ok(v) => v, + Err(_) => panic!("use a non-negative finite please"), + }; + + MultiVectorTolerance { + min_time_regression: EXAMPLE, + } + } +} + +/////////////////// +// Data fixtures // +/////////////////// + +/// Random query / doc fixture for a single benchmark run. +pub(super) struct Data { + pub(super) queries: Mat>, + pub(super) docs: Mat>, +} + +impl Data +where + StandardUniform: Distribution, +{ + pub(super) fn new(run: &Run) -> Result { + let mut rng = StdRng::seed_from_u64(0x12345); + let queries = Mat::from_fn( + Standard::new(run.num_query_vectors.get(), run.dim.get())?, + || StandardUniform.sample(&mut rng), + ); + let docs = Mat::from_fn( + Standard::new(run.num_doc_vectors.get(), run.dim.get())?, + || StandardUniform.sample(&mut rng), + ); + Ok(Self { queries, docs }) + } +} + +////////////////////// +// Timing harness // +////////////////////// + +pub(super) fn run_with_kernel( + run: &Run, + doc: MatRef<'_, Standard>, + kernel: &dyn MaxSimKernel, +) -> RunResult { + let mut scores = vec![0.0f32; run.num_query_vectors.get()]; + let mut latencies = Vec::with_capacity(run.num_measurements.get()); + + for _ in 0..run.num_measurements.get() { + let start = std::time::Instant::now(); + for _ in 0..run.loops_per_measurement.get() { + kernel + .compute_max_sim(doc, &mut scores) + .expect("scores.len() == kernel.nrows() by construction"); + std::hint::black_box(&mut scores); + } + latencies.push(start.elapsed().into()); + } + + let percentiles = percentiles::compute_percentiles(&mut latencies).unwrap(); + RunResult { + run: run.clone(), + latencies, + percentiles, + } +} + +////////////////////// +// Result types // +////////////////////// + +#[derive(Debug, Serialize, Deserialize)] +pub(super) struct RunResult { + /// The configuration for this run. + pub(super) run: Run, + /// Per-measurement latencies (over `loops_per_measurement` calls). + pub(super) latencies: Vec, + /// Latency percentiles. + pub(super) percentiles: percentiles::Percentiles, +} + +impl RunResult { + pub(super) fn computations_per_latency(&self) -> usize { + self.run.num_query_vectors.get() + * self.run.num_doc_vectors.get() + * self.run.loops_per_measurement.get() + } +} + +impl std::fmt::Display for DisplayWrapper<'_, [RunResult]> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.is_empty() { + return Ok(()); + } + + writeln!( + f, + "ns/IP = time per (query, doc) inner-product call (~ linear in Dim)" + )?; + + let header = [ + "Q", + "D", + "Dim", + "Min Time (ns/IP @ Dim)", + "Mean Time (ns/IP @ Dim)", + "Loops", + "Measurements", + ]; + + let mut table = Table::new(header, self.len()); + + self.iter().enumerate().for_each(|(row, r)| { + let mut row = table.row(row); + + let min_latency = r + .latencies + .iter() + .min() + .copied() + .unwrap_or(MicroSeconds::new(u64::MAX)); + let mean_latency = r.percentiles.mean; + + let computations_per_latency = r.computations_per_latency() as f64; + let min_time = min_latency.as_f64() / computations_per_latency * 1000.0; + let mean_time = mean_latency / computations_per_latency * 1000.0; + + row.insert(r.run.num_query_vectors, 0); + row.insert(r.run.num_doc_vectors, 1); + row.insert(r.run.dim, 2); + row.insert(format!("{:.3}", min_time), 3); + row.insert(format!("{:.3}", mean_time), 4); + row.insert(r.run.loops_per_measurement, 5); + row.insert(r.run.num_measurements, 6); + }); + + table.fmt(f) + } +} + +////////////////////// +// Regression Check // +////////////////////// + +/// Per-run comparison result showing before/after percentile differences. +#[derive(Debug, Serialize)] +pub(super) struct Comparison { + pub(super) run: Run, + pub(super) tolerance: MultiVectorTolerance, + pub(super) before_min: f64, + pub(super) after_min: f64, +} + +/// Aggregated result of the regression check across all runs. +#[derive(Debug, Serialize)] +pub(super) struct CheckResult { + pub(super) checks: Vec, +} + +impl std::fmt::Display for CheckResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let header = [ + "Q", + "D", + "Dim", + "Min Before (ns/IP @ Dim)", + "Min After (ns/IP @ Dim)", + "Change (%)", + "Remark", + ]; + + let mut table = Table::new(header, self.checks.len()); + + for (i, c) in self.checks.iter().enumerate() { + let mut row = table.row(i); + let change = relative_change(c.before_min, c.after_min); + + row.insert(c.run.num_query_vectors, 0); + row.insert(c.run.num_doc_vectors, 1); + row.insert(c.run.dim, 2); + row.insert(format!("{:.3}", c.before_min), 3); + row.insert(format!("{:.3}", c.after_min), 4); + match change { + Ok(change) => { + row.insert(format!("{:.3} %", change * 100.0), 5); + if change > c.tolerance.min_time_regression.get() { + row.insert("FAIL", 6); + } + } + Err(err) => { + row.insert("invalid", 5); + row.insert(err, 6); + } + } + } + + table.fmt(f) + } +} diff --git a/diskann-benchmark/src/backend/multi_vector/kernels.rs b/diskann-benchmark/src/backend/multi_vector/kernels.rs new file mode 100644 index 000000000..c328c776c --- /dev/null +++ b/diskann-benchmark/src/backend/multi_vector/kernels.rs @@ -0,0 +1,173 @@ +/* + * Copyright (c) Microsoft Corporation. + * Licensed under the MIT license. + */ + +//! `Benchmark` and `Regression` impls for the multi-vector MaxSim factory. +//! +//! A single generic [`Kernel`] carrier covers every element type accepted +//! by [`MaxSimElement`]; `try_match` also rejects ISAs unavailable on the +//! host so unsupported jobs fail at job-selection rather than mid-run. + +use std::io::Write; +use std::marker::PhantomData; + +use diskann_benchmark_runner::{ + benchmark::{FailureScore, MatchScore, PassFail, Regression}, + utils::{datatype::AsDataType, num::relative_change}, + Benchmark, Checkpoint, Output, Registry, +}; +use diskann_quantization::multi_vector::{build_max_sim, BoxErase, MaxSimElement, MaxSimIsa}; +use rand::distr::{Distribution, StandardUniform}; + +use super::driver::{ + run_with_kernel, CheckResult, Comparison, Data, MultiVectorTolerance, RunResult, +}; +use crate::inputs::multi_vector::MultiVectorOp; +use crate::utils::DisplayWrapper; + +// ───────────────────────────────────────────────────────────────────────── +// Kernel — generic carrier registered once per element type. +// ───────────────────────────────────────────────────────────────────────── + +#[derive(Debug)] +pub(super) struct Kernel(PhantomData); + +impl Kernel { + pub(super) const fn new() -> Self { + Self(PhantomData) + } +} + +impl Benchmark for Kernel +where + T: MaxSimElement + AsDataType, + StandardUniform: Distribution, +{ + type Input = MultiVectorOp; + type Output = Vec; + + fn try_match(&self, from: &MultiVectorOp) -> Result { + let mut failscore: Option = None; + if crate::utils::match_data_type::(from.element_type).is_err() { + *failscore.get_or_insert(0) += 1; + } + let isa: MaxSimIsa = from.isa.into(); + if !isa.is_available() { + *failscore.get_or_insert(0) += 1; + } + match failscore { + None => Ok(MatchScore(0)), + Some(score) => Err(FailureScore(score)), + } + } + + fn run( + &self, + input: &MultiVectorOp, + _: Checkpoint<'_>, + mut output: &mut dyn Output, + ) -> anyhow::Result { + writeln!(output, "{}", input)?; + let mut results = Vec::with_capacity(input.runs.len()); + for run in input.runs.iter() { + let data = Data::::new(run)?; + let kernel = build_max_sim::(input.isa.into(), data.queries.as_view(), BoxErase)?; + results.push(run_with_kernel(run, data.docs.as_view(), &*kernel)); + } + writeln!(output, "\n\n{}", DisplayWrapper(&*results))?; + Ok(results) + } + + fn description( + &self, + f: &mut std::fmt::Formatter<'_>, + input: Option<&MultiVectorOp>, + ) -> std::fmt::Result { + match input { + None => writeln!(f, "- Element Type: {}", ::DATA_TYPE)?, + Some(input) => { + let desc = ::describe(input.element_type); + if !desc.is_match() { + writeln!(f, "\n - Mismatched element type: {}", desc)?; + } + let isa: MaxSimIsa = input.isa.into(); + if !isa.is_available() { + writeln!(f, "\n - ISA unavailable on this CPU: {}", isa)?; + } + } + } + Ok(()) + } +} + +impl Regression for Kernel +where + T: MaxSimElement + AsDataType, + StandardUniform: Distribution, +{ + type Tolerances = MultiVectorTolerance; + type Pass = CheckResult; + type Fail = CheckResult; + + fn check( + &self, + tolerance: &MultiVectorTolerance, + _input: &MultiVectorOp, + before: &Vec, + after: &Vec, + ) -> anyhow::Result> { + anyhow::ensure!( + before.len() == after.len(), + "before has {} runs but after has {}", + before.len(), + after.len(), + ); + + let mut passed = true; + let checks: Vec = std::iter::zip(before.iter(), after.iter()) + .enumerate() + .map(|(i, (b, a))| { + anyhow::ensure!(b.run == a.run, "run {i} mismatched"); + + let computations_per_latency = b.computations_per_latency() as f64; + let before_min = b.percentiles.minimum.as_f64() * 1000.0 / computations_per_latency; + let after_min = a.percentiles.minimum.as_f64() * 1000.0 / computations_per_latency; + + let comparison = Comparison { + run: b.run.clone(), + tolerance: *tolerance, + before_min, + after_min, + }; + + match relative_change(before_min, after_min) { + Ok(change) => { + if change > tolerance.min_time_regression.get() { + passed = false; + } + } + Err(_) => passed = false, + }; + + Ok(comparison) + }) + .collect::>>()?; + + Ok(if passed { + PassFail::Pass(CheckResult { checks }) + } else { + PassFail::Fail(CheckResult { checks }) + }) + } +} + +// ───────────────────────────────────────────────────────────────────────── +// Registration. +// ───────────────────────────────────────────────────────────────────────── + +pub(super) fn register(registry: &mut Registry) -> anyhow::Result<()> { + registry.register_regression("multi-vector-op-f32", Kernel::::new())?; + registry.register_regression("multi-vector-op-f16", Kernel::::new())?; + Ok(()) +} diff --git a/diskann-benchmark/src/backend/multi_vector/mod.rs b/diskann-benchmark/src/backend/multi_vector/mod.rs new file mode 100644 index 000000000..dfad330af --- /dev/null +++ b/diskann-benchmark/src/backend/multi_vector/mod.rs @@ -0,0 +1,206 @@ +/* + * Copyright (c) Microsoft Corporation. + * Licensed under the MIT license. + */ + +//! Multi-vector MaxSim distance benchmarks with regression detection. +//! +//! One `Benchmark` is registered per element type supported by +//! [`MaxSimElement`]; the JSON `isa` field picks the kernel at run time. +//! +//! # Why two ISA enums? +//! +//! [`MaxSimIsa`] (library) and [`BenchIsa`] (this crate) are intentionally +//! separate so the library doesn't pin its public API on a serde version +//! or JSON shape. The benchmark owns its kebab-case JSON layout; the +//! library stays serde-agnostic. +//! +//! [`MaxSimIsa`]: diskann_quantization::multi_vector::MaxSimIsa +//! [`MaxSimElement`]: diskann_quantization::multi_vector::MaxSimElement +//! [`BenchIsa`]: crate::inputs::multi_vector::BenchIsa + +use diskann_benchmark_runner::Registry; + +cfg_if::cfg_if! { + if #[cfg(feature = "multi-vector")] { + mod driver; + mod kernels; + + pub(super) fn register_benchmarks(registry: &mut Registry) -> anyhow::Result<()> { + kernels::register(registry) + } + } else { + crate::utils::stub_impl!("multi-vector", inputs::multi_vector::MultiVectorOp); + + pub(super) fn register_benchmarks(registry: &mut Registry) -> anyhow::Result<()> { + imp::register("multi-vector-op", registry) + } + } +} + +#[cfg(all(test, feature = "multi-vector"))] +mod tests { + use std::num::NonZeroUsize; + + use diskann_benchmark_runner::{ + benchmark::{PassFail, Regression}, + utils::{ + datatype::DataType, num::NonNegativeFinite, percentiles::compute_percentiles, + MicroSeconds, + }, + }; + + use super::driver::{CheckResult, Comparison, MultiVectorTolerance, RunResult}; + use super::kernels::Kernel; + use crate::inputs::multi_vector::{BenchIsa, MultiVectorOp, Run}; + + fn tiny_run() -> Run { + Run { + num_query_vectors: NonZeroUsize::new(2).unwrap(), + num_doc_vectors: NonZeroUsize::new(2).unwrap(), + dim: NonZeroUsize::new(4).unwrap(), + loops_per_measurement: NonZeroUsize::new(1).unwrap(), + num_measurements: NonZeroUsize::new(1).unwrap(), + } + } + + fn tiny_op() -> MultiVectorOp { + MultiVectorOp { + element_type: DataType::Float32, + isa: BenchIsa::Auto, + runs: vec![tiny_run()], + } + } + + fn tiny_result(minimum: u64) -> RunResult { + let mut latencies = vec![MicroSeconds::new(minimum)]; + let percentiles = compute_percentiles(&mut latencies).unwrap(); + RunResult { + run: tiny_run(), + latencies, + percentiles, + } + } + + fn tolerance(limit: f64) -> MultiVectorTolerance { + MultiVectorTolerance { + min_time_regression: NonNegativeFinite::new(limit).unwrap(), + } + } + + #[test] + fn check_rejects_mismatched_runs() { + let kernel = Kernel::::new(); + + // Build a result whose `run` diverges from `tiny_run()` so the + // regression check's `b.run == a.run` invariant fires. + let mut latencies = vec![MicroSeconds::new(100)]; + let percentiles = compute_percentiles(&mut latencies).unwrap(); + let mismatched_result = RunResult { + run: Run { + num_query_vectors: NonZeroUsize::new(4).unwrap(), + ..tiny_run() + }, + latencies, + percentiles, + }; + + let err = kernel + .check( + &tolerance(0.0), + &tiny_op(), + &vec![tiny_result(100)], + &vec![mismatched_result], + ) + .unwrap_err(); + + assert_eq!(err.to_string(), "run 0 mismatched"); + } + + #[test] + fn check_allows_negative_relative_change() { + let kernel = Kernel::::new(); + + let result = kernel + .check( + &tolerance(0.0), + &tiny_op(), + &vec![tiny_result(100)], + &vec![tiny_result(95)], + ) + .unwrap(); + + assert!(matches!(result, PassFail::Pass(_))); + } + + #[test] + fn check_passes_on_tolerance_boundary() { + let kernel = Kernel::::new(); + + let result = kernel + .check( + &tolerance(0.05), + &tiny_op(), + &vec![tiny_result(100)], + &vec![tiny_result(105)], + ) + .unwrap(); + + assert!(matches!(result, PassFail::Pass(_))); + } + + #[test] + fn check_fails_above_tolerance_boundary() { + let kernel = Kernel::::new(); + + let result = kernel + .check( + &tolerance(0.05), + &tiny_op(), + &vec![tiny_result(100)], + &vec![tiny_result(106)], + ) + .unwrap(); + + assert!(matches!(result, PassFail::Fail(_))); + } + + #[test] + fn check_result_display_includes_failure_details() { + let check = CheckResult { + checks: vec![Comparison { + run: tiny_run(), + tolerance: tolerance(0.05), + before_min: 100.0, + after_min: 106.0, + }], + }; + + let rendered = check.to_string(); + assert!(rendered.contains("Q"), "rendered = {rendered}"); + assert!(rendered.contains("Dim"), "rendered = {rendered}"); + assert!(rendered.contains("100.000"), "rendered = {rendered}"); + assert!(rendered.contains("106.000"), "rendered = {rendered}"); + assert!(rendered.contains("6.000 %"), "rendered = {rendered}"); + assert!(rendered.contains("FAIL"), "rendered = {rendered}"); + } + + /// A "before" value of 0 means the measurement was too fast to obtain a + /// reliable signal, so we *could* be letting a regression through. We + /// require at least a non-zero value. + #[test] + fn zero_values_rejected() { + let kernel = Kernel::::new(); + + let result = kernel + .check( + &tolerance(0.05), + &tiny_op(), + &vec![tiny_result(0)], + &vec![tiny_result(0)], + ) + .unwrap(); + + assert!(matches!(result, PassFail::Fail(_))); + } +} diff --git a/diskann-benchmark/src/inputs/mod.rs b/diskann-benchmark/src/inputs/mod.rs index 492f0b9c1..0d429c0c5 100644 --- a/diskann-benchmark/src/inputs/mod.rs +++ b/diskann-benchmark/src/inputs/mod.rs @@ -7,6 +7,7 @@ pub(crate) mod disk; pub(crate) mod exhaustive; pub(crate) mod filters; pub(crate) mod graph_index; +pub(crate) mod multi_vector; pub(crate) mod save_and_load; /// Construct an example input of type `Self`. diff --git a/diskann-benchmark/src/inputs/multi_vector.rs b/diskann-benchmark/src/inputs/multi_vector.rs new file mode 100644 index 000000000..c74f9d232 --- /dev/null +++ b/diskann-benchmark/src/inputs/multi_vector.rs @@ -0,0 +1,151 @@ +/* + * Copyright (c) Microsoft Corporation. + * Licensed under the MIT license. + */ + +use std::num::NonZeroUsize; + +use diskann_benchmark_runner::{utils::datatype::DataType, Checker, Input}; +use diskann_quantization::multi_vector::MaxSimIsa; +use serde::{Deserialize, Serialize}; + +//////////////// +// Enum types // +//////////////// + +/// JSON-facing shadow of [`MaxSimIsa`]. The library's enum is deliberately +/// serde-free; this owns the kebab-case JSON shape and converts via `From`. +/// Stays variant-for-variant in sync with `MaxSimIsa` manually. +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +#[non_exhaustive] +pub(crate) enum BenchIsa { + #[serde(rename = "x86-64-v4")] + #[allow(non_camel_case_types)] + X86_64_V4, + #[serde(rename = "x86-64-v3")] + #[allow(non_camel_case_types)] + X86_64_V3, + Neon, + Scalar, + Reference, + Auto, +} + +impl std::fmt::Display for BenchIsa { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let st = match self { + Self::X86_64_V4 => "x86-64-v4", + Self::X86_64_V3 => "x86-64-v3", + Self::Neon => "neon", + Self::Scalar => "scalar", + Self::Reference => "reference", + Self::Auto => "auto", + }; + write!(f, "{}", st) + } +} + +impl From for MaxSimIsa { + fn from(b: BenchIsa) -> Self { + match b { + BenchIsa::X86_64_V4 => MaxSimIsa::X86_64_V4, + BenchIsa::X86_64_V3 => MaxSimIsa::X86_64_V3, + BenchIsa::Neon => MaxSimIsa::Neon, + BenchIsa::Scalar => MaxSimIsa::Scalar, + BenchIsa::Reference => MaxSimIsa::Reference, + BenchIsa::Auto => MaxSimIsa::Auto, + } + } +} + +/// One benchmark configuration: a single shape measurement. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub(crate) struct Run { + pub(crate) num_query_vectors: NonZeroUsize, + pub(crate) num_doc_vectors: NonZeroUsize, + pub(crate) dim: NonZeroUsize, + pub(crate) loops_per_measurement: NonZeroUsize, + pub(crate) num_measurements: NonZeroUsize, +} + +/////////////////////// +// Multi-Vector Op // +/////////////////////// + +/// A complete multi-vector benchmark job. +#[derive(Debug, Serialize, Deserialize)] +pub(crate) struct MultiVectorOp { + pub(crate) element_type: DataType, + pub(crate) isa: BenchIsa, + pub(crate) runs: Vec, +} + +impl MultiVectorOp { + pub(crate) const fn tag() -> &'static str { + "multi-vector-op" + } +} + +impl Input for MultiVectorOp { + type Raw = Self; + + fn tag() -> &'static str { + Self::tag() + } + + fn from_raw(raw: Self::Raw, _checker: &mut Checker) -> anyhow::Result { + Ok(raw) + } + + fn serialize(&self) -> anyhow::Result { + Ok(serde_json::to_value(self)?) + } + + fn example() -> Self { + const NUM_DOC_VECTORS: NonZeroUsize = NonZeroUsize::new(64).unwrap(); + const DIM: NonZeroUsize = NonZeroUsize::new(128).unwrap(); + const LOOPS_PER_MEASUREMENT: NonZeroUsize = NonZeroUsize::new(200).unwrap(); + const NUM_MEASUREMENTS: NonZeroUsize = NonZeroUsize::new(100).unwrap(); + + let runs = vec![ + Run { + num_query_vectors: NonZeroUsize::new(32).unwrap(), + num_doc_vectors: NUM_DOC_VECTORS, + dim: DIM, + loops_per_measurement: LOOPS_PER_MEASUREMENT, + num_measurements: NUM_MEASUREMENTS, + }, + Run { + num_query_vectors: NonZeroUsize::new(64).unwrap(), + num_doc_vectors: NUM_DOC_VECTORS, + dim: DIM, + loops_per_measurement: LOOPS_PER_MEASUREMENT, + num_measurements: NUM_MEASUREMENTS, + }, + ]; + + Self { + element_type: DataType::Float32, + isa: BenchIsa::Auto, + runs, + } + } +} + +macro_rules! write_field { + ($f:ident, $field:tt, $($expr:tt)*) => { + writeln!($f, "{:>18}: {}", $field, $($expr)*) + } +} + +impl std::fmt::Display for MultiVectorOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Multi-Vector Operation\n")?; + write_field!(f, "tag", Self::tag())?; + write_field!(f, "element type", self.element_type)?; + write_field!(f, "isa", self.isa)?; + write_field!(f, "number of runs", self.runs.len())?; + Ok(()) + } +} diff --git a/diskann-benchmark/src/main.rs b/diskann-benchmark/src/main.rs index cc70120cd..c87a08e17 100644 --- a/diskann-benchmark/src/main.rs +++ b/diskann-benchmark/src/main.rs @@ -772,6 +772,92 @@ mod tests { assert!(!output_path.exists()); } + /////////////////// + // Multi-Vector // + /////////////////// + + #[test] + fn multi_vector_integration() { + let path = example_directory().join("multi-vector.json"); + let tempdir = tempfile::tempdir().unwrap(); + let output_path = tempdir.path().join("output.json"); + assert!(!output_path.exists()); + + let modified_input_path = tempdir.path().join("input.json"); + + let mut raw = value_from_file(&path); + prefix_search_directories(&mut raw, &root_directory()); + save_to_file(&modified_input_path, &raw); + + run_multi_vector_integration(&modified_input_path, &output_path) + } + + #[cfg(feature = "multi-vector")] + fn run_multi_vector_integration(input_path: &std::path::Path, output_path: &std::path::Path) { + let command = Commands::Run { + input_file: input_path.to_owned(), + output_file: output_path.to_owned(), + dry_run: false, + allow_debug: true, + }; + + let cli = Cli::from_commands(command, true); + let mut output = Memory::new(); + + cli.run(&mut output).unwrap(); + println!( + "output = {}", + String::from_utf8(output.into_inner()).unwrap() + ); + + // Check that the results file is generated. + assert!(output_path.exists()); + } + + #[cfg(not(feature = "multi-vector"))] + fn run_multi_vector_integration(input_path: &std::path::Path, output_path: &std::path::Path) { + let command = Commands::Run { + input_file: input_path.to_owned(), + output_file: output_path.to_owned(), + dry_run: false, + allow_debug: true, + }; + let cli = Cli::from_commands(command, true); + let mut output = Memory::new(); + + let err = cli.run(&mut output).unwrap_err(); + println!("err = {:?}", err); + + let output = String::from_utf8(output.into_inner()).unwrap(); + assert!(output.contains("\"multi-vector\" feature")); + println!("output = {}", output); + + // The output file should not have been created because we failed the test. + assert!(!output_path.exists()); + } + + #[test] + #[cfg(feature = "multi-vector")] + fn multi_vector_check_verify() { + let input_path = example_directory().join("multi-vector.json"); + let tolerance_path = project_directory() + .join("perf_test_inputs") + .join("multi-vector-tolerance.json"); + + let command = Commands::Check(diskann_benchmark_runner::app::Check::Verify { + tolerances: tolerance_path, + input_file: input_path, + }); + + let cli = Cli::from_commands(command, true); + let mut output = Memory::new(); + cli.run(&mut output).unwrap(); + println!( + "output = {}", + String::from_utf8(output.into_inner()).unwrap() + ); + } + #[test] fn quiet_suppresses_check_target_warning() { let cli = Cli::from_commands(Commands::Skeleton, true); diff --git a/diskann-quantization/src/minmax/multi/max_sim.rs b/diskann-quantization/src/minmax/multi/max_sim.rs index 2fc6fa0ab..5bc0aa406 100644 --- a/diskann-quantization/src/minmax/multi/max_sim.rs +++ b/diskann-quantization/src/minmax/multi/max_sim.rs @@ -264,7 +264,7 @@ mod tests { .collect(); let mut scores = vec![0.0f32; nq]; - MaxSim::new(&mut scores).unwrap().evaluate(query, doc); + MaxSim::new(&mut scores).evaluate(query, doc); for (i, (&got, &exp)) in scores.iter().zip(expected.iter()).enumerate() { assert!( @@ -307,6 +307,6 @@ mod tests { let doc = MatRef::new(MinMaxMeta::<8>::new(3, dim), &doc_data).unwrap(); let mut scores = vec![0.0f32; 5]; // Wrong size - MaxSim::new(&mut scores).unwrap().evaluate(query, doc); + MaxSim::new(&mut scores).evaluate(query, doc); } } diff --git a/diskann-quantization/src/minmax/multi/mod.rs b/diskann-quantization/src/minmax/multi/mod.rs index f6e58cac1..0fb4e57e6 100644 --- a/diskann-quantization/src/minmax/multi/mod.rs +++ b/diskann-quantization/src/minmax/multi/mod.rs @@ -74,7 +74,7 @@ //! //! // Compute MaxSim: per-query-vector max similarities //! let mut scores = vec![0.0f32; num_query_vectors]; -//! MaxSim::new(&mut scores).unwrap().evaluate(query_mv.into(), doc_mv); +//! MaxSim::new(&mut scores).evaluate(query_mv.into(), doc_mv); //! // scores[i] = min over all doc vectors of distance(query[i], doc[j]) //! //! // Compute Chamfer distance (sum of MaxSim scores) diff --git a/diskann-quantization/src/multi_vector/distance/factory.rs b/diskann-quantization/src/multi_vector/distance/factory.rs new file mode 100644 index 000000000..5dcd4b8cd --- /dev/null +++ b/diskann-quantization/src/multi_vector/distance/factory.rs @@ -0,0 +1,655 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +//! Factory + concrete `MaxSimKernel` impls for the multi-vector distance +//! API. BYOTE entry point — see [`build_max_sim`]. + +use diskann_utils::Reborrow; +use diskann_vector::distance::InnerProduct; +use diskann_vector::{DistanceFunctionMut, PureDistanceFunction}; +use diskann_wide::Architecture; +use diskann_wide::arch::Scalar; +#[cfg(target_arch = "aarch64")] +use diskann_wide::arch::aarch64::Neon; +#[cfg(target_arch = "x86_64")] +use diskann_wide::arch::x86_64::{V3, V4}; + +use super::isa::{MaxSimIsa, NotSupported}; +use super::kernel::{Erase, MaxSimKernel}; +use super::kernels::f16::F16Entry; +use super::kernels::f32::F32Kernel; +use super::max_sim::{MaxSim, MaxSimError}; +use crate::multi_vector::distance::QueryMatRef; +use crate::multi_vector::{BlockTransposed, BlockTransposedRef, Mat, MatRef, Standard}; + +// ───────────────────────────────────────────────────────────────────────── +// Prepared — concrete kernel for the arch-dispatched paths. +// ───────────────────────────────────────────────────────────────────────── + +#[derive(Debug)] +struct Prepared { + arch: A, + prepared: Q, +} + +impl MaxSimKernel for Prepared> +where + A: Architecture, + F32Kernel: for<'a> diskann_wide::arch::Target3< + A, + (), + BlockTransposedRef<'a, f32, GROUP>, + MatRef<'a, Standard>, + &'a mut [f32], + >, +{ + fn nrows(&self) -> usize { + self.prepared.nrows() + } + + fn compute_max_sim( + &self, + doc: MatRef<'_, Standard>, + scores: &mut [f32], + ) -> Result<(), MaxSimError> { + if scores.len() != self.nrows() { + return Err(MaxSimError::InvalidBufferLength(scores.len(), self.nrows())); + } + if doc.num_vectors() == 0 { + scores.fill(f32::MAX); + return Ok(()); + } + let mut scratch = vec![f32::MIN; self.prepared.padded_nrows()]; + self.arch.run3( + F32Kernel::, + self.prepared.reborrow(), + doc, + &mut scratch, + ); + for (dst, &src) in scores.iter_mut().zip(&scratch[..self.prepared.nrows()]) { + *dst = -src; + } + Ok(()) + } +} + +impl MaxSimKernel + for Prepared> +where + A: Architecture, + F16Entry: for<'a> diskann_wide::arch::Target3< + A, + (), + BlockTransposedRef<'a, half::f16, GROUP>, + MatRef<'a, Standard>, + &'a mut [f32], + >, +{ + fn nrows(&self) -> usize { + self.prepared.nrows() + } + + fn compute_max_sim( + &self, + doc: MatRef<'_, Standard>, + scores: &mut [f32], + ) -> Result<(), MaxSimError> { + if scores.len() != self.nrows() { + return Err(MaxSimError::InvalidBufferLength(scores.len(), self.nrows())); + } + if doc.num_vectors() == 0 { + scores.fill(f32::MAX); + return Ok(()); + } + let mut scratch = vec![f32::MIN; self.prepared.padded_nrows()]; + self.arch.run3( + F16Entry::, + self.prepared.reborrow(), + doc, + &mut scratch, + ); + for (dst, &src) in scores.iter_mut().zip(&scratch[..self.prepared.nrows()]) { + *dst = -src; + } + Ok(()) + } +} + +// ───────────────────────────────────────────────────────────────────────── +// ReferenceKernel — non-SIMD fallback that wraps MaxSim::evaluate. +// ───────────────────────────────────────────────────────────────────────── + +struct ReferenceKernel { + query: Mat>, +} + +impl std::fmt::Debug for ReferenceKernel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ReferenceKernel") + .field("nrows", &self.query.num_vectors()) + .finish() + } +} + +impl ReferenceKernel { + fn new(query: MatRef<'_, Standard>) -> Self { + Self { + query: query.to_owned(), + } + } +} + +impl MaxSimKernel for ReferenceKernel +where + T: Copy + Send + Sync + std::fmt::Debug + 'static, + InnerProduct: for<'a, 'b> PureDistanceFunction<&'a [T], &'b [T], f32>, +{ + fn nrows(&self) -> usize { + self.query.num_vectors() + } + + fn compute_max_sim( + &self, + doc: MatRef<'_, Standard>, + scores: &mut [f32], + ) -> Result<(), MaxSimError> { + if scores.len() != self.nrows() { + return Err(MaxSimError::InvalidBufferLength(scores.len(), self.nrows())); + } + if doc.num_vectors() == 0 { + scores.fill(f32::MAX); + return Ok(()); + } + let query: QueryMatRef<'_, Standard> = self.query.as_view().into(); + let mut max_sim = MaxSim::new(scores); + max_sim.evaluate(query, doc) + } +} + +// ───────────────────────────────────────────────────────────────────────── +// BuildAndErase — Target1 impls used by `dispatch1_no_features` (Auto). +// ───────────────────────────────────────────────────────────────────────── + +struct BuildAndErase(E); + +// ───── f32 Target1 impls ───── + +impl> diskann_wide::arch::Target1>> + for BuildAndErase +{ + fn run(self, arch: Scalar, query: MatRef<'_, Standard>) -> E::Output { + let prepared = BlockTransposed::::from_matrix_view(query.as_matrix_view()); + self.0.erase(Prepared { arch, prepared }) + } +} + +#[cfg(target_arch = "x86_64")] +impl> diskann_wide::arch::Target1>> + for BuildAndErase +{ + fn run(self, arch: V3, query: MatRef<'_, Standard>) -> E::Output { + let prepared = BlockTransposed::::from_matrix_view(query.as_matrix_view()); + self.0.erase(Prepared { arch, prepared }) + } +} + +#[cfg(target_arch = "x86_64")] +impl> diskann_wide::arch::Target1>> + for BuildAndErase +{ + fn run(self, arch: V4, query: MatRef<'_, Standard>) -> E::Output { + // V4 dispatches to V3 (no V4-specific kernel). + let arch = arch.retarget(); + let prepared = BlockTransposed::::from_matrix_view(query.as_matrix_view()); + self.0.erase(Prepared { arch, prepared }) + } +} + +#[cfg(target_arch = "aarch64")] +impl> diskann_wide::arch::Target1>> + for BuildAndErase +{ + fn run(self, arch: Neon, query: MatRef<'_, Standard>) -> E::Output { + // Neon dispatches to Scalar (no Neon-specific kernel). + let arch = arch.retarget(); + let prepared = BlockTransposed::::from_matrix_view(query.as_matrix_view()); + self.0.erase(Prepared { arch, prepared }) + } +} + +// ───── f16 Target1 impls ───── + +impl> + diskann_wide::arch::Target1>> + for BuildAndErase +{ + fn run(self, arch: Scalar, query: MatRef<'_, Standard>) -> E::Output { + let prepared = BlockTransposed::::from_matrix_view(query.as_matrix_view()); + self.0.erase(Prepared { arch, prepared }) + } +} + +#[cfg(target_arch = "x86_64")] +impl> + diskann_wide::arch::Target1>> + for BuildAndErase +{ + fn run(self, arch: V3, query: MatRef<'_, Standard>) -> E::Output { + let prepared = BlockTransposed::::from_matrix_view(query.as_matrix_view()); + self.0.erase(Prepared { arch, prepared }) + } +} + +#[cfg(target_arch = "x86_64")] +impl> + diskann_wide::arch::Target1>> + for BuildAndErase +{ + fn run(self, arch: V4, query: MatRef<'_, Standard>) -> E::Output { + // V4 dispatches to V3 (no V4-specific kernel). + let arch = arch.retarget(); + let prepared = BlockTransposed::::from_matrix_view(query.as_matrix_view()); + self.0.erase(Prepared { arch, prepared }) + } +} + +#[cfg(target_arch = "aarch64")] +impl> + diskann_wide::arch::Target1>> + for BuildAndErase +{ + fn run(self, arch: Neon, query: MatRef<'_, Standard>) -> E::Output { + // Neon dispatches to Scalar (no Neon-specific kernel). + let arch = arch.retarget(); + let prepared = BlockTransposed::::from_matrix_view(query.as_matrix_view()); + self.0.erase(Prepared { arch, prepared }) + } +} + +// ───────────────────────────────────────────────────────────────────────── +// MaxSimElement — sealed trait gating accepted element types. +// ───────────────────────────────────────────────────────────────────────── + +mod sealed { + pub trait Sealed {} +} + +/// Scalar element types accepted by [`build_max_sim`]. +/// +/// Sealed: external crates cannot add impls. Quantized representations +/// (PQ, SQ, packed sub-byte) are intentionally excluded — they need +/// codebook/scale state that [`MatRef<'_, Standard>`] can't carry. +pub trait MaxSimElement: sealed::Sealed + Sized + Copy + Send + Sync + 'static { + /// Build the concrete kernel for this element type and hand it to + /// `erase.erase(...)`. + /// + /// # Errors + /// + /// Returns [`NotSupported`] when the requested ISA cannot run on this + /// build (e.g. AVX-512 unavailable; aarch64 on x86_64). + fn build>( + isa: MaxSimIsa, + query: MatRef<'_, Standard>, + erase: E, + ) -> Result; +} + +impl sealed::Sealed for f32 {} +impl sealed::Sealed for half::f16 {} + +impl MaxSimElement for f32 { + fn build>( + isa: MaxSimIsa, + query: MatRef<'_, Standard>, + erase: E, + ) -> Result { + match isa { + MaxSimIsa::Auto => Ok(diskann_wide::arch::dispatch1_no_features( + BuildAndErase(erase), + query, + )), + MaxSimIsa::Scalar => Ok(Scalar::new().run1(BuildAndErase(erase), query)), + #[cfg(target_arch = "x86_64")] + MaxSimIsa::X86_64_V3 => { + let arch = V3::new_checked().ok_or(NotSupported { + isa, + reason: "AVX2/FMA unavailable on this CPU", + })?; + Ok(arch.run1(BuildAndErase(erase), query)) + } + #[cfg(target_arch = "x86_64")] + MaxSimIsa::X86_64_V4 => { + let arch = V4::new_checked().ok_or(NotSupported { + isa, + reason: "AVX-512 unavailable on this CPU", + })?; + Ok(arch.run1(BuildAndErase(erase), query)) + } + #[cfg(not(target_arch = "x86_64"))] + MaxSimIsa::X86_64_V3 | MaxSimIsa::X86_64_V4 => Err(NotSupported { + isa, + reason: "x86_64 target only", + }), + #[cfg(target_arch = "aarch64")] + MaxSimIsa::Neon => { + let arch = Neon::new_checked().ok_or(NotSupported { + isa, + reason: "Neon unavailable on this CPU", + })?; + Ok(arch.run1(BuildAndErase(erase), query)) + } + #[cfg(not(target_arch = "aarch64"))] + MaxSimIsa::Neon => Err(NotSupported { + isa, + reason: "aarch64 target only", + }), + MaxSimIsa::Reference => Ok(erase.erase(ReferenceKernel::::new(query))), + } + } +} + +impl MaxSimElement for half::f16 { + fn build>( + isa: MaxSimIsa, + query: MatRef<'_, Standard>, + erase: E, + ) -> Result { + match isa { + MaxSimIsa::Auto => Ok(diskann_wide::arch::dispatch1_no_features( + BuildAndErase(erase), + query, + )), + MaxSimIsa::Scalar => Ok(Scalar::new().run1(BuildAndErase(erase), query)), + #[cfg(target_arch = "x86_64")] + MaxSimIsa::X86_64_V3 => { + let arch = V3::new_checked().ok_or(NotSupported { + isa, + reason: "AVX2/FMA unavailable on this CPU", + })?; + Ok(arch.run1(BuildAndErase(erase), query)) + } + #[cfg(target_arch = "x86_64")] + MaxSimIsa::X86_64_V4 => { + let arch = V4::new_checked().ok_or(NotSupported { + isa, + reason: "AVX-512 unavailable on this CPU", + })?; + Ok(arch.run1(BuildAndErase(erase), query)) + } + #[cfg(not(target_arch = "x86_64"))] + MaxSimIsa::X86_64_V3 | MaxSimIsa::X86_64_V4 => Err(NotSupported { + isa, + reason: "x86_64 target only", + }), + #[cfg(target_arch = "aarch64")] + MaxSimIsa::Neon => { + let arch = Neon::new_checked().ok_or(NotSupported { + isa, + reason: "Neon unavailable on this CPU", + })?; + Ok(arch.run1(BuildAndErase(erase), query)) + } + #[cfg(not(target_arch = "aarch64"))] + MaxSimIsa::Neon => Err(NotSupported { + isa, + reason: "aarch64 target only", + }), + MaxSimIsa::Reference => Ok(erase.erase(ReferenceKernel::::new(query))), + } + } +} + +// ───────────────────────────────────────────────────────────────────────── +// Factory entry point. +// ───────────────────────────────────────────────────────────────────────── + +/// Build a multi-vector MaxSim kernel for any [`MaxSimElement`] type. +/// +/// Thin wrapper over [`MaxSimElement::build`] so callers don't have to name +/// the trait at the call site. +/// +/// # Errors +/// +/// Returns [`NotSupported`] when the requested ISA cannot run on this build. +pub fn build_max_sim>( + isa: MaxSimIsa, + query: MatRef<'_, Standard>, + erase: E, +) -> Result { + T::build(isa, query, erase) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::multi_vector::{BoxErase, Chamfer, MaxSim, QueryMatRef}; + + /// Local helper trait — picks a sane test value of `T` from an `f32` + /// so both `f32` and `half::f16` parameterizations share the same data + /// generator. + trait FromF32 { + fn from_f32(v: f32) -> Self; + } + + impl FromF32 for f32 { + fn from_f32(v: f32) -> Self { + v + } + } + + impl FromF32 for half::f16 { + fn from_f32(v: f32) -> Self { + diskann_wide::cast_f32_to_f16(v) + } + } + + fn make_mat(data: &[T], nrows: usize, ncols: usize) -> MatRef<'_, Standard> { + MatRef::new(Standard::new(nrows, ncols).unwrap(), data).unwrap() + } + + fn make_test_data(len: usize, ceil: usize, shift: usize) -> Vec { + (0..len) + .map(|v| T::from_f32(((v + shift) % ceil) as f32)) + .collect() + } + + /// Shapes for the `chamfer_matches_fallback` / `max_sim_matches_fallback` + /// agreement checks: `(num_queries, num_docs, dim)`. + /// + /// Targets the factory wiring (query setup, score writeback) above the + /// kernel layer; exhaustive panel/remainder coverage is pinned in + /// `kernels::tiled_reduce::tests`. + const TEST_CASES: &[(usize, usize, usize)] = &[ + (1, 1, 4), // Degenerate + (5, 3, 5), // Prime k; nq > 1 and nd > 1 exercise per-row writeback + (17, 4, 64), // A-panel remainder crossing both Scalar and V3 panel widths + (16, 6, 32), // B-remainder ≠ 1 (V3 b_remainder = 2) + ]; + + fn check_chamfer_matches(tol: f32, label: &str) + where + T: MaxSimElement + FromF32, + InnerProduct: for<'a, 'b> PureDistanceFunction<&'a [T], &'b [T], f32>, + { + for &(nq, nd, dim) in TEST_CASES { + let query_data = make_test_data::(nq * dim, dim, dim / 2); + let doc_data = make_test_data::(nd * dim, dim, dim); + + let query = make_mat(&query_data, nq, dim); + let doc = make_mat(&doc_data, nd, dim); + + let expected = Chamfer::evaluate(QueryMatRef::from(query), doc); + + let kernel = build_max_sim::(MaxSimIsa::Auto, query, BoxErase).unwrap(); + let mut scores = vec![0.0f32; nq]; + kernel.compute_max_sim(doc, &mut scores).unwrap(); + let actual: f32 = scores.iter().sum(); + + assert!( + (actual - expected).abs() < tol, + "{label}Chamfer mismatch for ({nq},{nd},{dim}): actual={actual}, expected={expected}", + ); + } + } + + fn check_max_sim_matches(tol: f32, label: &str) + where + T: MaxSimElement + FromF32, + InnerProduct: for<'a, 'b> PureDistanceFunction<&'a [T], &'b [T], f32>, + { + for &(nq, nd, dim) in TEST_CASES { + let query_data = make_test_data::(nq * dim, dim, dim / 2); + let doc_data = make_test_data::(nd * dim, dim, dim); + + let query = make_mat(&query_data, nq, dim); + let doc = make_mat(&doc_data, nd, dim); + + let mut expected_scores = vec![0.0f32; nq]; + let _ = MaxSim::new(&mut expected_scores).evaluate(QueryMatRef::from(query), doc); + + let kernel = build_max_sim::(MaxSimIsa::Auto, query, BoxErase).unwrap(); + let mut actual_scores = vec![0.0f32; nq]; + kernel.compute_max_sim(doc, &mut actual_scores).unwrap(); + + for i in 0..nq { + assert!( + (actual_scores[i] - expected_scores[i]).abs() < tol, + "{label}MaxSim[{i}] mismatch for ({nq},{nd},{dim}): actual={}, expected={}", + actual_scores[i], + expected_scores[i], + ); + } + } + } + + #[test] + fn dimensions_f32() { + let data = vec![1.0f32; 5 * 8]; + let query = make_mat(&data, 5, 8); + let kernel = build_max_sim::(MaxSimIsa::Auto, query, BoxErase).unwrap(); + assert_eq!(kernel.nrows(), 5); + } + + #[test] + fn dimensions_f16() { + let data = vec![diskann_wide::cast_f32_to_f16(1.0); 5 * 8]; + let query = make_mat(data.as_slice(), 5, 8); + let kernel = build_max_sim::(MaxSimIsa::Auto, query, BoxErase).unwrap(); + assert_eq!(kernel.nrows(), 5); + } + + fn check_size_mismatch(label: &str) + where + T: MaxSimElement + FromF32, + InnerProduct: for<'a, 'b> PureDistanceFunction<&'a [T], &'b [T], f32>, + { + let query_data = make_test_data::(3 * 4, 4, 0); + let doc_data = make_test_data::(2 * 4, 4, 1); + let query = make_mat(&query_data, 3, 4); + let doc = make_mat(&doc_data, 2, 4); + + for isa in [MaxSimIsa::Auto, MaxSimIsa::Reference] { + let kernel = build_max_sim::(isa, query, BoxErase).unwrap(); + + let mut too_short = vec![0.0f32; 2]; + match kernel.compute_max_sim(doc, &mut too_short) { + Err(MaxSimError::InvalidBufferLength(2, 3)) => {} + other => { + panic!("{label}({isa:?}) expected InvalidBufferLength(2, 3), got {other:?}",) + } + } + + let mut too_long = vec![0.0f32; 4]; + match kernel.compute_max_sim(doc, &mut too_long) { + Err(MaxSimError::InvalidBufferLength(4, 3)) => {} + other => { + panic!("{label}({isa:?}) expected InvalidBufferLength(4, 3), got {other:?}",) + } + } + } + } + + fn check_zero_docs_fills_sentinel(label: &str) + where + T: MaxSimElement + FromF32, + InnerProduct: for<'a, 'b> PureDistanceFunction<&'a [T], &'b [T], f32>, + { + let query_data = make_test_data::(3 * 4, 4, 0); + let doc_data: Vec = Vec::new(); + let query = make_mat(&query_data, 3, 4); + let doc = make_mat(doc_data.as_slice(), 0, 4); + + for isa in [MaxSimIsa::Auto, MaxSimIsa::Reference] { + let kernel = build_max_sim::(isa, query, BoxErase).unwrap(); + let mut scores = vec![0.0f32; 3]; + kernel.compute_max_sim(doc, &mut scores).unwrap(); + for (i, &s) in scores.iter().enumerate() { + assert_eq!( + s, + f32::MAX, + "{label}({isa:?}) zero-doc slot {i} should be f32::MAX sentinel", + ); + } + } + } + + fn check_zero_query(label: &str) + where + T: MaxSimElement + FromF32, + InnerProduct: for<'a, 'b> PureDistanceFunction<&'a [T], &'b [T], f32>, + { + let query_data: Vec = Vec::new(); + let doc_data = make_test_data::(2 * 4, 4, 0); + let query = make_mat(query_data.as_slice(), 0, 4); + let doc = make_mat(&doc_data, 2, 4); + + for isa in [MaxSimIsa::Auto, MaxSimIsa::Reference] { + let kernel = build_max_sim::(isa, query, BoxErase).unwrap(); + assert_eq!( + kernel.nrows(), + 0, + "{label}({isa:?}) empty query should yield nrows=0", + ); + let mut scores: Vec = Vec::new(); + kernel + .compute_max_sim(doc, &mut scores) + .unwrap_or_else(|e| panic!("{label}({isa:?}) expected Ok, got {e:?}")); + } + } + + macro_rules! test_matches_fallback { + ($mod_name:ident, $ty:ty, $tol:expr, $label:literal) => { + mod $mod_name { + use super::*; + + #[test] + fn chamfer_matches_fallback() { + check_chamfer_matches::<$ty>($tol, $label); + } + + #[test] + fn max_sim_matches_fallback() { + check_max_sim_matches::<$ty>($tol, $label); + } + + #[test] + fn errors_on_size_mismatch() { + check_size_mismatch::<$ty>($label); + } + + #[test] + fn zero_docs_fills_sentinel() { + check_zero_docs_fills_sentinel::<$ty>($label); + } + + #[test] + fn zero_query_returns_ok() { + check_zero_query::<$ty>($label); + } + } + }; + } + + test_matches_fallback!(f32, f32, 1e-10, "f32 "); + test_matches_fallback!(f16, half::f16, 1e-10, "f16 "); +} diff --git a/diskann-quantization/src/multi_vector/distance/fallback.rs b/diskann-quantization/src/multi_vector/distance/fallback.rs index 5d677cde0..d54956a10 100644 --- a/diskann-quantization/src/multi_vector/distance/fallback.rs +++ b/diskann-quantization/src/multi_vector/distance/fallback.rs @@ -225,7 +225,7 @@ mod tests { let doc = make_doc(&[1.0, 1.0], 1, 2); let mut scores = vec![0.0f32; 3]; // Wrong size - let r = MaxSim::new(&mut scores).unwrap().evaluate(query, doc); + let r = MaxSim::new(&mut scores).evaluate(query, doc); assert!(r.is_err()); } @@ -251,7 +251,7 @@ mod tests { // Test MaxSim let mut scores = vec![0.0f32; *nq]; - let r = MaxSim::new(&mut scores).unwrap().evaluate(query, doc); + let r = MaxSim::new(&mut scores).evaluate(query, doc); assert!(r.is_ok()); let expected_scores: Vec = query diff --git a/diskann-quantization/src/multi_vector/distance/isa.rs b/diskann-quantization/src/multi_vector/distance/isa.rs new file mode 100644 index 000000000..d295438bc --- /dev/null +++ b/diskann-quantization/src/multi_vector/distance/isa.rs @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +//! Instruction Set Architecture (ISA) selector for the multi-vector MaxSim +//! factory. + +/// Instruction Set Architecture (ISA) selector for which multi-vector MaxSim +/// kernel to build. +/// +/// `#[non_exhaustive]` so adding a variant (e.g. for a new in-tree kernel) is +/// not a breaking change. Deliberately **not** `Serialize`/`Deserialize` — +/// callers wanting JSON support maintain their own shadow enum and convert +/// via `From` / `TryFrom`, so the library is not pinned to a particular +/// serialization format. +#[non_exhaustive] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[allow(non_camel_case_types)] +pub enum MaxSimIsa { + /// Pick the highest ISA the host CPU supports. + Auto, + /// Pure-scalar (emulated SIMD) kernel — always available. + Scalar, + /// x86_64 AVX2 + FMA. + X86_64_V3, + /// x86_64 AVX-512. + X86_64_V4, + /// AArch64 Neon. + Neon, + /// Non-SIMD reference fallback. Slow; serves as a correctness baseline. + Reference, +} + +impl MaxSimIsa { + /// Whether a kernel for this ISA can be built on the current host. + /// Feature-gated variants may return `false` even when compiled for + /// the matching target architecture. + pub fn is_available(self) -> bool { + match self { + Self::Auto | Self::Scalar | Self::Reference => true, + #[cfg(target_arch = "x86_64")] + Self::X86_64_V3 => diskann_wide::arch::x86_64::V3::new_checked().is_some(), + #[cfg(target_arch = "x86_64")] + Self::X86_64_V4 => diskann_wide::arch::x86_64::V4::new_checked().is_some(), + #[cfg(not(target_arch = "x86_64"))] + Self::X86_64_V3 | Self::X86_64_V4 => false, + #[cfg(target_arch = "aarch64")] + Self::Neon => diskann_wide::arch::aarch64::Neon::new_checked().is_some(), + #[cfg(not(target_arch = "aarch64"))] + Self::Neon => false, + } + } +} + +impl std::fmt::Display for MaxSimIsa { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + Self::Auto => "auto", + Self::Scalar => "scalar", + Self::X86_64_V3 => "x86-64-v3", + Self::X86_64_V4 => "x86-64-v4", + Self::Neon => "neon", + Self::Reference => "reference", + }; + f.write_str(s) + } +} + +/// Returned by [`build_max_sim`](super::build_max_sim) when the requested +/// ISA cannot be produced on the current host (e.g. x86_64 V4 requested on +/// a non-AVX512 CPU, or Neon requested on x86_64). +#[derive(Debug, Clone, Copy)] +pub struct NotSupported { + pub isa: MaxSimIsa, + pub reason: &'static str, +} + +impl std::fmt::Display for NotSupported { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} not supported: {}", self.isa, self.reason) + } +} + +impl std::error::Error for NotSupported {} diff --git a/diskann-quantization/src/multi_vector/distance/kernel.rs b/diskann-quantization/src/multi_vector/distance/kernel.rs new file mode 100644 index 000000000..b292def54 --- /dev/null +++ b/diskann-quantization/src/multi_vector/distance/kernel.rs @@ -0,0 +1,47 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +//! Object-safe kernel boundary trait plus BYOTE visitor trait. + +use crate::multi_vector::{MatRef, MaxSimError, Standard}; + +/// Object-safe interface for computing per-query MaxSim scores. +pub trait MaxSimKernel: Send + Sync + std::fmt::Debug { + /// Number of query rows whose scores this kernel produces. + fn nrows(&self) -> usize; + + /// Compute per-query MaxSim scores into `scores`. On zero docs, fills + /// every slot with `f32::MAX`. + /// + /// # Errors + /// + /// [`MaxSimError::InvalidBufferLength`] if `scores.len() != self.nrows()`. + fn compute_max_sim( + &self, + doc: MatRef<'_, Standard>, + scores: &mut [f32], + ) -> Result<(), MaxSimError>; +} + +/// "Bring your own type erasure" visitor: the factory hands a concrete +/// kernel to [`Erase::erase`], which decides how to package it (e.g. as +/// `Box>` via [`BoxErase`], a chamfer-only closure, a +/// batched evaluator, …). +pub trait Erase { + type Output; + /// `K` is generic so the body sees its concrete type and the compiler + /// can inline it. + fn erase + 'static>(self, kernel: K) -> Self::Output; +} + +/// Default boxing [`Erase`] impl. +#[derive(Debug, Clone, Copy)] +pub struct BoxErase; + +impl Erase for BoxErase { + type Output = Box>; + + fn erase + 'static>(self, kernel: K) -> Self::Output { + Box::new(kernel) + } +} diff --git a/diskann-quantization/src/multi_vector/distance/kernels/mod.rs b/diskann-quantization/src/multi_vector/distance/kernels/mod.rs index bd9121a24..55108698d 100644 --- a/diskann-quantization/src/multi_vector/distance/kernels/mod.rs +++ b/diskann-quantization/src/multi_vector/distance/kernels/mod.rs @@ -3,9 +3,8 @@ //! Block-transposed SIMD kernels for multi-vector distance computation. //! -//! This module provides a SIMD-accelerated implementation that uses block-transposed -//! memory layout for **query** vectors (instead of documents), with documents remaining -//! in row-major format. +//! SIMD-accelerated implementation that uses block-transposed memory layout +//! for **query** vectors, with documents remaining in row-major format. //! //! # Memory Layout //! diff --git a/diskann-quantization/src/multi_vector/distance/max_sim.rs b/diskann-quantization/src/multi_vector/distance/max_sim.rs index 9ac2b0ed1..d9a4fb541 100644 --- a/diskann-quantization/src/multi_vector/distance/max_sim.rs +++ b/diskann-quantization/src/multi_vector/distance/max_sim.rs @@ -10,8 +10,6 @@ use thiserror::Error; pub enum MaxSimError { #[error("Trying to access score in index {0} for output of size {1}")] IndexOutOfBounds(usize, usize), - #[error("Scores buffer length cannot be 0")] - BufferLengthIsZero, #[error("Invalid buffer length {0} for query size {1}")] InvalidBufferLength(usize, usize), } @@ -42,15 +40,9 @@ pub struct MaxSim<'a> { } impl<'a> MaxSim<'a> { - /// Creates a new [`MaxSim`] with the provided scores buffer. - /// - /// # Errors - /// Returns an error if `scores` is empty. - pub fn new(scores: &'a mut [f32]) -> Result { - if scores.is_empty() { - return Err(MaxSimError::BufferLengthIsZero); - } - Ok(Self { scores }) + /// Creates a new [`MaxSim`] wrapping the provided scores buffer. + pub fn new(scores: &'a mut [f32]) -> Self { + Self { scores } } /// Returns the number of score slots in the buffer. @@ -130,7 +122,7 @@ mod tests { } } - fn max_sim(&mut self) -> Result, MaxSimError> { + fn max_sim(&mut self) -> MaxSim<'_> { MaxSim::new(&mut self.buffer) } } @@ -138,19 +130,12 @@ mod tests { mod max_sim_new { use super::*; - #[test] - fn fails_with_empty_buffer() { - let mut buffer: Vec = vec![]; - let result = MaxSim::new(&mut buffer); - assert!(matches!(result, Err(MaxSimError::BufferLengthIsZero))); - } - #[test] fn returns_correct_size() { let sizes = [1, 2, 5, 100, 1000]; for size in sizes { let mut fixture = TestFixture::new(size); - let mut max_sim = fixture.max_sim().unwrap(); + let mut max_sim = fixture.max_sim(); assert_eq!(max_sim.size(), size, "size mismatch for buffer of {}", size); let scores = max_sim.scores_mut(); @@ -165,7 +150,7 @@ mod tests { #[test] fn returns_value_at_valid_index() { let mut fixture = TestFixture::with_values(&[1.0, 2.0, 3.0]); - let max_sim = fixture.max_sim().unwrap(); + let max_sim = fixture.max_sim(); assert_eq!(max_sim.get(0).unwrap(), 1.0); assert_eq!(max_sim.get(1).unwrap(), 2.0); @@ -175,7 +160,7 @@ mod tests { #[test] fn fails_at_out_of_bounds_index() { let mut fixture = TestFixture::new(3); - let max_sim = fixture.max_sim().unwrap(); + let max_sim = fixture.max_sim(); let result = max_sim.get(3); assert!(matches!(result, Err(MaxSimError::IndexOutOfBounds(3, 3)))); @@ -191,7 +176,7 @@ mod tests { #[test] fn sets_value_at_valid_index() { let mut fixture = TestFixture::new(3); - let mut max_sim = fixture.max_sim().unwrap(); + let mut max_sim = fixture.max_sim(); max_sim.set(0, 10.0).unwrap(); max_sim.set(1, 20.0).unwrap(); @@ -205,7 +190,7 @@ mod tests { #[test] fn fails_at_out_of_bounds_index() { let mut fixture = TestFixture::new(3); - let mut max_sim = fixture.max_sim().unwrap(); + let mut max_sim = fixture.max_sim(); let result = max_sim.set(3, 999.0); assert!(matches!(result, Err(MaxSimError::IndexOutOfBounds(3, 3)))); @@ -214,7 +199,7 @@ mod tests { #[test] fn overwrites_existing_value() { let mut fixture = TestFixture::with_values(&[1.0, 2.0, 3.0]); - let mut max_sim = fixture.max_sim().unwrap(); + let mut max_sim = fixture.max_sim(); max_sim.set(1, 99.0).unwrap(); @@ -226,7 +211,7 @@ mod tests { #[test] fn handles_special_float_values() { let mut fixture = TestFixture::new(4); - let mut max_sim = fixture.max_sim().unwrap(); + let mut max_sim = fixture.max_sim(); max_sim.set(0, f32::INFINITY).unwrap(); max_sim.set(1, f32::NEG_INFINITY).unwrap(); @@ -243,7 +228,7 @@ mod tests { fn writes_through_to_underlying_buffer() { let mut buffer = vec![0.0f32; 3]; { - let mut max_sim = MaxSim::new(&mut buffer).unwrap(); + let mut max_sim = MaxSim::new(&mut buffer); max_sim.set(0, 1.0).unwrap(); max_sim.set(1, 2.0).unwrap(); } diff --git a/diskann-quantization/src/multi_vector/distance/mod.rs b/diskann-quantization/src/multi_vector/distance/mod.rs index 853f60753..ef336161c 100644 --- a/diskann-quantization/src/multi_vector/distance/mod.rs +++ b/diskann-quantization/src/multi_vector/distance/mod.rs @@ -3,17 +3,9 @@ //! Distance computation for multi-vector representations. //! -//! Provides asymmetric distance primitives for multi-vector search: -//! -//! - [`MaxSim`]: Per-query-vector maximum similarities. -//! - [`Chamfer`]: Sum of MaxSim scores (asymmetric Chamfer distance). -//! - [`QueryComputer`]: Architecture-dispatched query computer backed by -//! SIMD-accelerated block-transposed kernels. -//! //! The fallback path uses a double-loop kernel over -//! [`InnerProduct`](diskann_vector::distance::InnerProduct). The optimised -//! path (via [`QueryComputer`]) uses block-transposed layout with -//! cache-tiled SIMD micro-kernels. +//! [`InnerProduct`](diskann_vector::distance::InnerProduct); the factory +//! returns cache-tiled SIMD kernels selected by [`MaxSimIsa`]. //! //! # Example //! @@ -43,17 +35,21 @@ //! //! // MaxSim (per-query-vector scores) //! let mut scores = vec![0.0f32; 2]; -//! let mut max_sim = MaxSim::new(&mut scores).unwrap(); +//! let mut max_sim = MaxSim::new(&mut scores); //! max_sim.evaluate(query, doc); //! // scores[0] = -1.0 (query[0] matches doc[0]: negated max inner product) //! // scores[1] = 0.0 (query[1] has no good match: max IP was 0) //! ``` +mod factory; mod fallback; +mod isa; +mod kernel; mod kernels; mod max_sim; -mod query_computer; +pub use factory::{MaxSimElement, build_max_sim}; pub use fallback::QueryMatRef; +pub use isa::{MaxSimIsa, NotSupported}; +pub use kernel::{BoxErase, Erase, MaxSimKernel}; pub use max_sim::{Chamfer, MaxSim, MaxSimError}; -pub use query_computer::QueryComputer; diff --git a/diskann-quantization/src/multi_vector/distance/query_computer/f16.rs b/diskann-quantization/src/multi_vector/distance/query_computer/f16.rs deleted file mode 100644 index 9bb348a6a..000000000 --- a/diskann-quantization/src/multi_vector/distance/query_computer/f16.rs +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -use diskann_wide::Architecture; -use diskann_wide::arch::Scalar; -#[cfg(target_arch = "aarch64")] -use diskann_wide::arch::aarch64::Neon; -#[cfg(target_arch = "x86_64")] -use diskann_wide::arch::x86_64::{V3, V4}; - -use super::{DynQueryComputer, Prepared, QueryComputer, build_prepared}; -use crate::multi_vector::distance::kernels::f16::F16Entry; -use crate::multi_vector::{BlockTransposed, BlockTransposedRef, MatRef, Standard}; -use diskann_utils::Reborrow; - -impl QueryComputer { - /// Build an f16 query computer, selecting the optimal architecture and - /// GROUP for the current CPU at runtime. - pub fn new(query: MatRef<'_, Standard>) -> Self { - diskann_wide::arch::dispatch1_no_features(BuildComputer, query) - } -} - -impl DynQueryComputer - for Prepared> -where - A: Architecture, - F16Entry: for<'a> diskann_wide::arch::Target3< - A, - (), - BlockTransposedRef<'a, half::f16, GROUP>, - MatRef<'a, Standard>, - &'a mut [f32], - >, -{ - fn compute_max_sim(&self, doc: MatRef<'_, Standard>, scores: &mut [f32]) { - let mut scratch = vec![f32::MIN; self.prepared.padded_nrows()]; - self.arch.run3( - F16Entry::, - self.prepared.reborrow(), - doc, - &mut scratch, - ); - for (dst, &src) in scores.iter_mut().zip(&scratch[..self.prepared.nrows()]) { - *dst = -src; - } - } - - fn nrows(&self) -> usize { - self.prepared.nrows() - } -} - -#[derive(Debug, Clone, Copy)] -pub(super) struct BuildComputer; - -impl diskann_wide::arch::Target1, MatRef<'_, Standard>> - for BuildComputer -{ - fn run(self, arch: Scalar, query: MatRef<'_, Standard>) -> QueryComputer { - QueryComputer { - inner: Box::new(build_prepared::(arch, query)), - } - } -} - -#[cfg(target_arch = "x86_64")] -impl diskann_wide::arch::Target1, MatRef<'_, Standard>> - for BuildComputer -{ - fn run(self, arch: V3, query: MatRef<'_, Standard>) -> QueryComputer { - QueryComputer { - inner: Box::new(build_prepared::(arch, query)), - } - } -} - -#[cfg(target_arch = "x86_64")] -impl diskann_wide::arch::Target1, MatRef<'_, Standard>> - for BuildComputer -{ - fn run(self, arch: V4, query: MatRef<'_, Standard>) -> QueryComputer { - let arch = arch.retarget(); - QueryComputer { - inner: Box::new(build_prepared::(arch, query)), - } - } -} - -#[cfg(target_arch = "aarch64")] -impl diskann_wide::arch::Target1, MatRef<'_, Standard>> - for BuildComputer -{ - fn run(self, arch: Neon, query: MatRef<'_, Standard>) -> QueryComputer { - let arch = arch.retarget(); - QueryComputer { - inner: Box::new(build_prepared::(arch, query)), - } - } -} diff --git a/diskann-quantization/src/multi_vector/distance/query_computer/f32.rs b/diskann-quantization/src/multi_vector/distance/query_computer/f32.rs deleted file mode 100644 index 9ff16b8b4..000000000 --- a/diskann-quantization/src/multi_vector/distance/query_computer/f32.rs +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -use diskann_wide::Architecture; -use diskann_wide::arch::Scalar; -#[cfg(target_arch = "aarch64")] -use diskann_wide::arch::aarch64::Neon; -#[cfg(target_arch = "x86_64")] -use diskann_wide::arch::x86_64::{V3, V4}; - -use super::{DynQueryComputer, Prepared, QueryComputer, build_prepared}; -use crate::multi_vector::distance::kernels::f32::F32Kernel; -use crate::multi_vector::{BlockTransposed, BlockTransposedRef, MatRef, Standard}; -use diskann_utils::Reborrow; - -impl QueryComputer { - /// Build an f32 query computer, selecting the optimal architecture and - /// GROUP for the current CPU at runtime. - pub fn new(query: MatRef<'_, Standard>) -> Self { - diskann_wide::arch::dispatch1_no_features(BuildComputer, query) - } -} - -impl DynQueryComputer for Prepared> -where - A: Architecture, - F32Kernel: for<'a> diskann_wide::arch::Target3< - A, - (), - BlockTransposedRef<'a, f32, GROUP>, - MatRef<'a, Standard>, - &'a mut [f32], - >, -{ - fn compute_max_sim(&self, doc: MatRef<'_, Standard>, scores: &mut [f32]) { - let mut scratch = vec![f32::MIN; self.prepared.padded_nrows()]; - self.arch.run3( - F32Kernel::, - self.prepared.reborrow(), - doc, - &mut scratch, - ); - for (dst, &src) in scores.iter_mut().zip(&scratch[..self.prepared.nrows()]) { - *dst = -src; - } - } - - fn nrows(&self) -> usize { - self.prepared.nrows() - } -} - -#[derive(Debug, Clone, Copy)] -pub(super) struct BuildComputer; - -impl diskann_wide::arch::Target1, MatRef<'_, Standard>> - for BuildComputer -{ - fn run(self, arch: Scalar, query: MatRef<'_, Standard>) -> QueryComputer { - QueryComputer { - inner: Box::new(build_prepared::(arch, query)), - } - } -} - -#[cfg(target_arch = "x86_64")] -impl diskann_wide::arch::Target1, MatRef<'_, Standard>> - for BuildComputer -{ - fn run(self, arch: V3, query: MatRef<'_, Standard>) -> QueryComputer { - QueryComputer { - inner: Box::new(build_prepared::(arch, query)), - } - } -} - -#[cfg(target_arch = "x86_64")] -impl diskann_wide::arch::Target1, MatRef<'_, Standard>> - for BuildComputer -{ - fn run(self, arch: V4, query: MatRef<'_, Standard>) -> QueryComputer { - // V4 delegates to V3 — the V3 micro-kernel is valid on V4 hardware. - let arch = arch.retarget(); - QueryComputer { - inner: Box::new(build_prepared::(arch, query)), - } - } -} - -#[cfg(target_arch = "aarch64")] -impl diskann_wide::arch::Target1, MatRef<'_, Standard>> - for BuildComputer -{ - fn run(self, arch: Neon, query: MatRef<'_, Standard>) -> QueryComputer { - // Neon delegates to Scalar. - let arch = arch.retarget(); - QueryComputer { - inner: Box::new(build_prepared::(arch, query)), - } - } -} diff --git a/diskann-quantization/src/multi_vector/distance/query_computer/mod.rs b/diskann-quantization/src/multi_vector/distance/query_computer/mod.rs deleted file mode 100644 index fbe84fcd3..000000000 --- a/diskann-quantization/src/multi_vector/distance/query_computer/mod.rs +++ /dev/null @@ -1,290 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -//! Architecture-opaque query computer with runtime dispatch. -//! -//! [`QueryComputer`] wraps a block-transposed query and a captured -//! architecture token behind a trait-object vtable. CPU detection happens -//! once at construction; every subsequent distance call goes through -//! [`Architecture::run3`](diskann_wide::Architecture::run3) with full -//! `#[target_feature]` propagation — no re-dispatch and no enum matching -//! on the hot path. -//! -//! # Usage -//! -//! ``` -//! use diskann_quantization::multi_vector::{ -//! QueryComputer, MatRef, Standard, -//! }; -//! -//! let query_data = [1.0f32, 0.0, 0.0, 1.0]; -//! let doc_data = [1.0f32, 0.0, 0.0, 1.0]; -//! -//! let query = MatRef::new(Standard::new(2, 2).unwrap(), &query_data).unwrap(); -//! let doc = MatRef::new(Standard::new(2, 2).unwrap(), &doc_data).unwrap(); -//! -//! // Build — runtime detects arch, picks optimal GROUP, captures both -//! let computer = QueryComputer::::new(query); -//! -//! // Distance — vtable → arch.run3 with target_feature propagation -//! let dist = computer.chamfer(doc); -//! assert_eq!(dist, -2.0); -//! ``` - -mod f16; -mod f32; - -use crate::multi_vector::{BlockTransposed, MatRef, Standard}; - -/// Architecture-dispatched query computer for multi-vector distance. -#[derive(Debug)] -pub struct QueryComputer { - inner: Box>, -} - -impl QueryComputer { - /// Number of logical (non-padded) query vectors. - #[inline] - pub fn nrows(&self) -> usize { - self.inner.nrows() - } - - /// Compute Chamfer distance (sum of per-query max similarities, negated). - /// - /// Returns `0.0` if the document has zero vectors. - pub fn chamfer(&self, doc: MatRef<'_, Standard>) -> f32 { - let nq = self.nrows(); - if doc.num_vectors() == 0 { - return 0.0; - } - let mut scores = vec![0.0f32; nq]; - self.max_sim(doc, &mut scores); - scores.iter().sum() - } - - /// Compute per-query-vector max similarities into `scores`. - /// - /// `scores` must have length equal to [`nrows()`](Self::nrows). - /// Each entry is the negated max inner product for that query vector. - /// - /// # Panics - /// - /// Panics if `scores.len() != self.nrows()`. - pub fn max_sim(&self, doc: MatRef<'_, Standard>, scores: &mut [f32]) { - let nq = self.nrows(); - assert_eq!( - scores.len(), - nq, - "scores buffer not right size: {} != {}", - scores.len(), - nq - ); - - if doc.num_vectors() == 0 { - return; - } - - self.inner.compute_max_sim(doc, scores); - } -} - -trait DynQueryComputer: std::fmt::Debug + Send + Sync { - fn compute_max_sim(&self, doc: MatRef<'_, Standard>, scores: &mut [f32]); - fn nrows(&self) -> usize; -} - -#[derive(Debug)] -struct Prepared { - arch: A, - prepared: Q, -} - -fn build_prepared( - arch: A, - query: MatRef<'_, Standard>, -) -> Prepared> { - let prepared = BlockTransposed::::from_matrix_view(query.as_matrix_view()); - Prepared { arch, prepared } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::multi_vector::{Chamfer, MaxSim, QueryMatRef}; - use diskann_vector::distance::InnerProduct; - use diskann_vector::{DistanceFunctionMut, PureDistanceFunction}; - - trait FromF32 { - fn from_f32(v: f32) -> Self; - } - - impl FromF32 for f32 { - fn from_f32(v: f32) -> Self { - v - } - } - - impl FromF32 for half::f16 { - fn from_f32(v: f32) -> Self { - diskann_wide::cast_f32_to_f16(v) - } - } - - fn make_mat(data: &[T], nrows: usize, ncols: usize) -> MatRef<'_, Standard> { - MatRef::new(Standard::new(nrows, ncols).unwrap(), data).unwrap() - } - - fn make_test_data(len: usize, ceil: usize, shift: usize) -> Vec { - (0..len) - .map(|v| T::from_f32(((v + shift) % ceil) as f32)) - .collect() - } - - /// Shapes for the `chamfer_matches_fallback` / `max_sim_matches_fallback` - /// agreement checks: (num_queries, num_docs, dim). - /// - /// This matrix targets the API-layer wiring that lives above the - /// kernel — `QueryComputer::new` query setup, `chamfer` row - /// summation, `max_sim` per-row writeback, and the f16 query - /// conversion path — not kernel correctness. A small - /// representative set is sufficient because exhaustive shape - /// coverage (panel boundaries, B-remainder classes, prime `k`, - /// degenerate dims) is pinned one layer below in - /// `kernels::tiled_reduce::tests::NAIVE_CASES`, and structural - /// loop-path coverage in `tiled_reduce_all_loop_paths_match_naive`. - const TEST_CASES: &[(usize, usize, usize)] = &[ - (1, 1, 4), // Degenerate - (5, 3, 5), // Prime k; nq > 1 and nd > 1 exercise chamfer summation - // and per-row max_sim writeback on a non-trivial shape - (17, 4, 64), // A-panel remainder crossing both Scalar and V3 panel widths - (16, 6, 32), // B-remainder ≠ 1 (V3 b_remainder = 2) - ]; - - fn check_chamfer_matches( - build: fn(MatRef<'_, Standard>) -> QueryComputer, - tol: f32, - label: &str, - ) where - InnerProduct: for<'a, 'b> PureDistanceFunction<&'a [T], &'b [T], f32>, - { - for &(nq, nd, dim) in TEST_CASES { - let query_data = make_test_data::(nq * dim, dim, dim / 2); - let doc_data = make_test_data::(nd * dim, dim, dim); - - let query = make_mat(&query_data, nq, dim); - let doc = make_mat(&doc_data, nd, dim); - - let expected = Chamfer::evaluate(QueryMatRef::from(query), doc); - let actual = build(query).chamfer(doc); - - assert!( - (actual - expected).abs() < tol, - "{label}Chamfer mismatch for ({nq},{nd},{dim}): actual={actual}, expected={expected}", - ); - } - } - - fn check_max_sim_matches( - build: fn(MatRef<'_, Standard>) -> QueryComputer, - tol: f32, - label: &str, - ) where - InnerProduct: for<'a, 'b> PureDistanceFunction<&'a [T], &'b [T], f32>, - { - for &(nq, nd, dim) in TEST_CASES { - let query_data = make_test_data::(nq * dim, dim, dim / 2); - let doc_data = make_test_data::(nd * dim, dim, dim); - - let query = make_mat(&query_data, nq, dim); - let doc = make_mat(&doc_data, nd, dim); - - let mut expected_scores = vec![0.0f32; nq]; - let _ = MaxSim::new(&mut expected_scores) - .unwrap() - .evaluate(QueryMatRef::from(query), doc); - - let computer = build(query); - let mut actual_scores = vec![0.0f32; nq]; - computer.max_sim(doc, &mut actual_scores); - - for i in 0..nq { - assert!( - (actual_scores[i] - expected_scores[i]).abs() < tol, - "{label}MaxSim[{i}] mismatch for ({nq},{nd},{dim}): actual={}, expected={}", - actual_scores[i], - expected_scores[i], - ); - } - } - } - - #[test] - fn query_computer_dimensions() { - let data = vec![1.0f32; 5 * 8]; - let query = make_mat(&data, 5, 8); - let computer = QueryComputer::::new(query); - - assert_eq!(computer.nrows(), 5); - } - - #[test] - fn query_computer_f16_dimensions() { - let data = vec![diskann_wide::cast_f32_to_f16(1.0); 5 * 8]; - let query = make_mat(data.as_slice(), 5, 8); - let computer = QueryComputer::::new(query); - - assert_eq!(computer.nrows(), 5); - } - - #[test] - fn chamfer_with_zero_docs() { - let query = make_mat(&[1.0f32, 0.0, 0.0, 1.0], 2, 2); - let computer = QueryComputer::::new(query); - let doc = make_mat(&[], 0, 2); - assert_eq!(computer.chamfer(doc), 0.0); - } - - #[test] - fn max_sim_with_zero_docs() { - let query = make_mat(&[1.0f32, 0.0, 0.0, 1.0], 2, 2); - let computer = QueryComputer::::new(query); - let doc = make_mat::(&[], 0, 2); - let mut scores = vec![0.0f32; 2]; - computer.max_sim(doc, &mut scores); - // With zero docs the scores buffer is left untouched. - for &s in &scores { - assert_eq!(s, 0.0, "zero-doc MaxSim should leave scores untouched"); - } - } - - #[test] - #[should_panic(expected = "scores buffer not right size")] - fn max_sim_panics_on_size_mismatch() { - let query = make_mat(&[1.0f32, 2.0, 3.0, 4.0], 2, 2); - let computer = QueryComputer::::new(query); - let doc = make_mat(&[1.0, 1.0], 1, 2); - let mut scores = vec![0.0f32; 3]; // Wrong size - computer.max_sim(doc, &mut scores); - } - - macro_rules! test_matches_fallback { - ($mod_name:ident, $ty:ty, $tol:expr, $label:literal) => { - mod $mod_name { - use super::*; - - #[test] - fn chamfer_matches_fallback() { - check_chamfer_matches(QueryComputer::<$ty>::new, $tol, $label); - } - - #[test] - fn max_sim_matches_fallback() { - check_max_sim_matches(QueryComputer::<$ty>::new, $tol, $label); - } - } - }; - } - - test_matches_fallback!(f32, f32, 1e-10, "f32 "); - test_matches_fallback!(f16, half::f16, 1e-10, "f16 "); -} diff --git a/diskann-quantization/src/multi_vector/matrix.rs b/diskann-quantization/src/multi_vector/matrix.rs index 70629d44c..9a3c02ea1 100644 --- a/diskann-quantization/src/multi_vector/matrix.rs +++ b/diskann-quantization/src/multi_vector/matrix.rs @@ -712,6 +712,13 @@ impl Clone for Mat { } impl Mat> { + /// Construct a [`Mat`] by calling `f` once per element in row-major order. + pub fn from_fn T>(repr: Standard, mut f: F) -> Self { + let b: Box<[T]> = (0..repr.num_elements()).map(|_| f()).collect(); + // SAFETY: `b` has length `repr.num_elements()` by construction. + unsafe { repr.box_to_mat(b) } + } + /// Returns the raw dimension (columns) of the vectors in the matrix. #[inline] pub fn vector_dim(&self) -> usize { @@ -1767,6 +1774,31 @@ mod tests { } } + #[test] + fn test_mat_from_fn() { + let rows = [0, 1, 2, 5]; + let cols = [0, 1, 3, 7]; + + for nrows in rows { + for ncols in cols { + let mut counter = 0u32; + let m = Mat::from_fn(Standard::new(nrows, ncols).unwrap(), || { + let v = counter; + counter += 1; + v + }); + + assert_eq!(counter as usize, nrows * ncols); + for (i, row) in m.rows().enumerate() { + assert_eq!(row.len(), ncols); + for (j, &v) in row.iter().enumerate() { + assert_eq!(v, (i * ncols + j) as u32); + } + } + } + } + } + #[test] fn matref_new_slice_length_error() { let repr = Standard::::new(3, 4).unwrap(); diff --git a/diskann-quantization/src/multi_vector/mod.rs b/diskann-quantization/src/multi_vector/mod.rs index 3670b1aaf..23a3a9266 100644 --- a/diskann-quantization/src/multi_vector/mod.rs +++ b/diskann-quantization/src/multi_vector/mod.rs @@ -8,22 +8,6 @@ //! Row-major matrix abstractions for multi-vector representations, where each //! entity is encoded as multiple embedding vectors (e.g., per-token embeddings). //! -//! # Core Types -//! -//! | Type | Description | -//! |------|-------------| -//! | [`Mat`] | Owning matrix that manages its own memory | -//! | [`MatRef`] | Immutable borrowed view | -//! | [`MatMut`] | Mutable borrowed view | -//! | [`Repr`] | Trait defining row layout (e.g., [`Standard`]) | -//! | [`BlockTransposed`] | Owning block-transposed matrix | -//! | [`BlockTransposedRef`] | Immutable view of a block-transposed matrix | -//! | [`BlockTransposedMut`] | Mutable view of a block-transposed matrix | -//! | [`QueryMatRef`] | Query wrapper for asymmetric distances | -//! | [`QueryComputer`] | Architecture-dispatched SIMD query computer | -//! | [`MaxSim`] | Per-query-vector max similarity computation | -//! | [`Chamfer`] | Asymmetric Chamfer distance (sum of MaxSim) | -//! //! # Example //! //! ``` @@ -61,7 +45,7 @@ //! //! // MaxSim (per-query-vector scores) //! let mut scores = vec![0.0f32; 2]; -//! let mut max_sim = MaxSim::new(&mut scores).unwrap(); +//! let mut max_sim = MaxSim::new(&mut scores); //! max_sim.evaluate(query, doc); //! assert_eq!(scores[0], -1.0); //! assert_eq!(scores[1], -1.0); @@ -72,7 +56,10 @@ pub mod distance; pub(crate) mod matrix; pub use block_transposed::{BlockTransposed, BlockTransposedMut, BlockTransposedRef}; -pub use distance::{Chamfer, MaxSim, MaxSimError, QueryComputer, QueryMatRef}; +pub use distance::{ + BoxErase, Chamfer, Erase, MaxSim, MaxSimElement, MaxSimError, MaxSimIsa, MaxSimKernel, + NotSupported, QueryMatRef, build_max_sim, +}; pub use matrix::{ Defaulted, LayoutError, Mat, MatMut, MatRef, NewCloned, NewMut, NewOwned, NewRef, Overflow, Repr, ReprMut, ReprOwned, SliceError, Standard,