Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions benchmarks/vector-search-bench/src/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
//!
//! The benchmark writes one `.vortex` file per flavor per data file, then scans them all with the
//! same query so the comparison is apples-to-apples with the Parquet files.
//!
//! Note that the handrolled `&[f32]` parquet baseline is **not** a flavor here.

use clap::ValueEnum;
use vortex::array::ArrayId;
Expand Down
133 changes: 133 additions & 0 deletions benchmarks/vector-search-bench/src/display.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Local table renderer for the vector-search benchmark.
//!
//! Groups columns by **flavor** (`vortex-uncompressed`, `vortex-turboquant`) rather than by
//! [`vortex_bench::Format`], because the two Vortex flavors share a single
//! `Format::OnDiskVortex`/`Format::VortexLossy` pair and the generic
//! [`vortex_bench::display::render_table`] groups by Format. Local renderer keeps the
//! column-per-flavor invariant intact without introducing a new global Format value.
//!
//! Output rows:
//!
//! ```text
//! Metric | vortex-uncompressed | vortex-turboquant
//! ------------------ + ------------------- + -----------------
//! scan wall (mean) | 485 ms | 212 ms
//! scan wall (median) | 490 ms | 215 ms
//! matches | 42 | 39
//! rows scanned | 10,000,000 | 10,000,000
//! bytes scanned | 30.5 GB | 7.62 GB
//! rows / sec | 5.2e6 | 1.2e7
//! ```

use std::io::Write;

use anyhow::Result;
use tabled::settings::Style;

use crate::compression::VectorFlavor;
use crate::prepare::CompressedVortexDataset;
use crate::scan::ScanTiming;

/// Final column-per-flavor row set for one dataset.
pub struct DatasetReport<'a> {
pub dataset_name: &'a str,
pub vortex_results: &'a [(VectorFlavor, &'a CompressedVortexDataset, &'a ScanTiming)],
}

/// Render the full report into the given writer as a tabled table.
pub fn render(report: &DatasetReport<'_>, writer: &mut dyn Write) -> Result<()> {
let mut headers: Vec<String> = vec!["metric".to_owned()];
for &(flavor, ..) in report.vortex_results {
headers.push(flavor.label().to_owned());
}

let rows: Vec<Vec<String>> = vec![
make_row("scan wall (mean)", report, |_, _, scan| {
format_duration(scan.mean)
}),
make_row("scan wall (median)", report, |_, _, scan| {
format_duration(scan.median)
}),
make_row("matches", report, |_, _, scan| scan.matches.to_string()),
make_row("rows scanned", report, |_, _, scan| {
scan.rows_scanned.to_string()
}),
make_row("bytes scanned", report, |_, _, scan| {
format_bytes(scan.bytes_scanned)
}),
make_row("rows / sec", report, |_, _, scan| {
format_throughput_rows(scan.rows_scanned, scan.mean)
}),
];

writeln!(writer, "## {}", report.dataset_name)?;
let mut builder = tabled::builder::Builder::new();
builder.push_record(headers);
for row in rows {
builder.push_record(row);
}
let mut table = builder.build();
table.with(Style::modern());
writeln!(writer, "{table}")?;
Ok(())
}

fn make_row<F>(metric: &str, report: &DatasetReport<'_>, vortex_cell: F) -> Vec<String>
where
F: Fn(VectorFlavor, &CompressedVortexDataset, &ScanTiming) -> String,
{
let mut row = vec![metric.to_owned()];
for &(flavor, prep, scan) in report.vortex_results {
row.push(vortex_cell(flavor, prep, scan));
}
row
}

fn format_duration(d: std::time::Duration) -> String {
let secs = d.as_secs_f64();
if secs >= 1.0 {
format!("{secs:.2} s")
} else if secs >= 1e-3 {
format!("{:.1} ms", secs * 1e3)
} else {
format!("{:.1} µs", secs * 1e6)
}
}

fn format_bytes(bytes: u64) -> String {
const UNITS: &[&str] = &["B", "KiB", "MiB", "GiB", "TiB"];
let mut value = bytes as f64;
let mut unit = UNITS[0];
for next in &UNITS[1..] {
if value < 1024.0 {
break;
}
value /= 1024.0;
unit = next;
}
if unit == "B" {
format!("{bytes} B")
} else {
format!("{value:.2} {unit}")
}
}

fn format_throughput_rows(rows: u64, wall: std::time::Duration) -> String {
let secs = wall.as_secs_f64();
if secs <= 0.0 {
return "—".to_owned();
}
let rps = rows as f64 / secs;
if rps >= 1e9 {
format!("{:.2}G", rps / 1e9)
} else if rps >= 1e6 {
format!("{:.2}M", rps / 1e6)
} else if rps >= 1e3 {
format!("{:.2}K", rps / 1e3)
} else {
format!("{rps:.0}")
}
}
96 changes: 47 additions & 49 deletions benchmarks/vector-search-bench/src/ingest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
//! 1. Project the `emb` column out of each struct chunk.
//! 2. Rewrap the `emb` column as `Extension<Vector<f32, dim>>` via
//! [`vortex_bench::vector_dataset::list_to_vector_ext`].
//! 3. Cast the FSL element buffer from `f64` -> `f32` if the source is `f64`. After this point all
//! 3. Detect the FSL element ptype at runtime and cast `f64` -> `f32` when needed. Detection is
//! from the arrow schema rather than a catalog declaration so upstream parquets whose actual
//! precision disagrees with the catalog still ingest correctly. After this point all
//! downstream code (compression, scan, recall) is f32-only.
//! 4. Optionally project the `scalar_labels` column through unchanged so future filtered-search
//! benchmarks have it without re-ingest.
Expand Down Expand Up @@ -39,48 +41,50 @@ use vortex_bench::vector_dataset::list_to_vector_ext;
use vortex_tensor::vector::AnyVector;
use vortex_tensor::vector::Vector;

/// Configuration passed alongside each chunk so the transform can stay stateless.
#[derive(Debug, Clone, Copy)]
pub struct ChunkTransform {
/// Source element ptype as declared by the dataset catalog. Used purely to decide whether the
/// f64 -> f32 cast is needed.
pub src_ptype: PType,
// /// Whether to project the `scalar_labels` column through the output struct.
// pub include_scalar_labels: bool,
}
/// Apply the transform to a single struct chunk and return the rebuilt chunk.
///
/// `chunk` must be a non-chunked `Struct { id: i64, emb: List<f32> }`, where all of the list
/// elements are
///
/// The returned array is always a `Struct { id: i64, emb: Vector<f32, dim> }`.
pub fn transform_chunk(chunk: ArrayRef, ctx: &mut ExecutionCtx) -> Result<ArrayRef> {
let struct_view = chunk
.as_opt::<Struct>()
.with_context(|| format!("ingest: expected struct chunk, got dtype {}", chunk.dtype()))?;

let id = struct_view
.unmasked_field_by_name("id")
.context("ingest: chunk missing `id` column")?
.clone();
let emb = struct_view
.unmasked_field_by_name("emb")
.context("ingest: chunk missing `emb` column")?
.clone();

let emb_ext: ExtensionArray = list_to_vector_ext(emb)?.execute(ctx)?;

// Detect the actual FSL element ptype from the extension storage dtype. The dataset catalog
// cannot be trusted here: at least one upstream parquet (`sift-medium-5m`) ships f64
// embeddings despite the catalog advertising f32.
let element_ptype = {
let storage_dtype = emb_ext.storage_array().dtype();
match storage_dtype {
DType::FixedSizeList(elem, ..) => match elem.as_ref() {
DType::Primitive(ptype, _) => *ptype,
other => bail!("ingest: expected primitive FSL element dtype, got {other}"),
},
other => bail!("ingest: expected FSL storage dtype, got {other}"),
}
};

impl ChunkTransform {
/// Apply the transform to a single struct chunk and return the rebuilt chunk.
///
/// `chunk` must be a non-chunked `Struct { id: i64, emb: List<f32> }`, where all of the list
/// elements are
///
/// The returned array is always a `Struct { id: i64, emb: Vector<f32, dim> }`.
pub fn apply(&self, chunk: ArrayRef, ctx: &mut ExecutionCtx) -> Result<ArrayRef> {
let struct_view = chunk.as_opt::<Struct>().with_context(|| {
format!("ingest: expected struct chunk, got dtype {}", chunk.dtype())
})?;

let id = struct_view
.unmasked_field_by_name("id")
.context("ingest: chunk missing `id` column")?
.clone();
let emb = struct_view
.unmasked_field_by_name("emb")
.context("ingest: chunk missing `emb` column")?
.clone();

let emb_ext: ExtensionArray = list_to_vector_ext(emb)?.execute(ctx)?;

let f32_vector_array = if self.src_ptype == PType::F64 {
convert_f64_to_f32_vectors(&emb_ext, ctx)?
} else {
emb_ext.into_array()
};
let f32_vector_array = match element_ptype {
PType::F32 => emb_ext.into_array(),
PType::F64 => convert_f64_to_f32_vectors(&emb_ext, ctx)?,
other => bail!("ingest: unsupported emb element ptype {other}, expected f32 or f64"),
};

let fields = [("id", id), ("emb", f32_vector_array)];
Ok(StructArray::from_fields(&fields)?.into_array())
}
let fields = [("id", id), ("emb", f32_vector_array)];
Ok(StructArray::from_fields(&fields)?.into_array())
}

/// Convert a `Vector<f64, dim>` extension array down to `Vector<f32, dim>`.
Expand Down Expand Up @@ -164,10 +168,7 @@ mod tests {
let emb = list_chunk_f64(&[&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]]);
let chunk =
StructArray::from_fields(&[("id", id_array(&[0, 1])), ("emb", emb)])?.into_array();
let transform = ChunkTransform {
src_ptype: PType::F64,
};
let out = transform.apply(chunk, &mut ctx)?;
let out = transform_chunk(chunk, &mut ctx)?;
let out_struct = out.as_opt::<Struct>().expect("returns Struct");
let out_emb = out_struct.unmasked_field_by_name("emb").unwrap().clone();
let DType::Extension(ext) = out_emb.dtype() else {
Expand Down Expand Up @@ -207,10 +208,7 @@ mod tests {
let chunk =
StructArray::from_fields(&[("id", id_array(&[0, 1])), ("emb", emb)])?.into_array();

let transform = ChunkTransform {
src_ptype: PType::F32,
};
let out = transform.apply(chunk, &mut ctx)?;
let out = transform_chunk(chunk, &mut ctx)?;
let out_struct = out.as_opt::<Struct>().expect("returns Struct");
assert_eq!(out_struct.len(), 2);
Ok(())
Expand Down
3 changes: 3 additions & 0 deletions benchmarks/vector-search-bench/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
//! `vector-search-bench` vector similarity-search benchmark over several datasets.

pub mod compression;
pub mod display;
pub mod expression;
pub mod ingest;
pub mod prepare;
pub mod query;
pub mod scan;

use std::sync::LazyLock;

Expand Down
Loading
Loading