From eb965700d0fec2ebee8c89ae5a77b9ad6d85ef44 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Fri, 29 May 2026 08:57:11 -0400 Subject: [PATCH] Revert "Add benchmark_runner for sql_benchmarks with help and list commands (#22001)" This reverts commit 32f51ec6 --- Cargo.lock | 32 -- benchmarks/Cargo.toml | 4 +- benchmarks/sql_benchmarks/tpch/tpch.suite | 16 - benchmarks/src/benchmark_runner/cli.rs | 81 ---- benchmarks/src/benchmark_runner/mod.rs | 104 ----- benchmarks/src/benchmark_runner/output.rs | 173 -------- benchmarks/src/benchmark_runner/suite.rs | 500 ---------------------- benchmarks/src/bin/benchmark_runner.rs | 36 -- benchmarks/src/lib.rs | 1 - 9 files changed, 1 insertion(+), 946 deletions(-) delete mode 100644 benchmarks/sql_benchmarks/tpch/tpch.suite delete mode 100644 benchmarks/src/benchmark_runner/cli.rs delete mode 100644 benchmarks/src/benchmark_runner/mod.rs delete mode 100644 benchmarks/src/benchmark_runner/output.rs delete mode 100644 benchmarks/src/benchmark_runner/suite.rs delete mode 100644 benchmarks/src/bin/benchmark_runner.rs diff --git a/Cargo.lock b/Cargo.lock index b0370a3e1bf27..0ca7ef44d049c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1746,7 +1746,6 @@ dependencies = [ name = "datafusion-benchmarks" version = "53.1.0" dependencies = [ - "anstream", "arrow", "async-trait", "bytes", @@ -1770,7 +1769,6 @@ dependencies = [ "tempfile", "tokio", "tokio-util", - "toml", ] [[package]] @@ -5580,15 +5578,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "serde_spanned" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26" -dependencies = [ - "serde_core", -] - [[package]] name = "serde_tokenstream" version = "0.2.3" @@ -6318,21 +6307,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "1.1.2+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81f3d15e84cbcd896376e6730314d59fb5a87f31e4b038454184435cd57defee" -dependencies = [ - "indexmap 2.14.0", - "serde_core", - "serde_spanned", - "toml_datetime", - "toml_parser", - "toml_writer", - "winnow", -] - [[package]] name = "toml_datetime" version = "1.1.1+spec-1.1.0" @@ -6363,12 +6337,6 @@ dependencies = [ "winnow", ] -[[package]] -name = "toml_writer" -version = "1.1.1+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" - [[package]] name = "tonic" version = "0.14.6" diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 97e0d901b95f9..1815f8bc42ca3 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -40,11 +40,10 @@ snmalloc = ["snmalloc-rs"] mimalloc_extended = ["libmimalloc-sys/extended"] [dependencies] -anstream = "1.0" arrow = { workspace = true } async-trait = "0.1" bytes = { workspace = true } -clap = { version = "4.6.1", features = ["derive", "env", "color"] } +clap = { version = "4.6.0", features = ["derive", "env"] } criterion = { workspace = true, features = ["html_reports"] } datafusion = { workspace = true, default-features = true } datafusion-common = { workspace = true, default-features = true } @@ -62,7 +61,6 @@ serde_json = { workspace = true } snmalloc-rs = { version = "0.7", optional = true } tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] } tokio-util = { version = "0.7.17" } -toml = "1.1" [dev-dependencies] datafusion-proto = { workspace = true } diff --git a/benchmarks/sql_benchmarks/tpch/tpch.suite b/benchmarks/sql_benchmarks/tpch/tpch.suite deleted file mode 100644 index 317b32e57f45f..0000000000000 --- a/benchmarks/sql_benchmarks/tpch/tpch.suite +++ /dev/null @@ -1,16 +0,0 @@ -name = "tpch" -description = "TPC-H SQL benchmarks" - -[[options]] -name = "format" -short = "f" -default = "parquet" -values = ["parquet", "csv", "mem"] -help = "Selects the TPC-H data format." - -[[options]] -name = "scale-factor" -short = "sf" -default = "1" -values = ["1", "10"] -help = "Selects the TPC-H scale factor." diff --git a/benchmarks/src/benchmark_runner/cli.rs b/benchmarks/src/benchmark_runner/cli.rs deleted file mode 100644 index 30fb71b5a30cc..0000000000000 --- a/benchmarks/src/benchmark_runner/cli.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! CLI construction and argument conversion for `benchmark_runner`. -//! -//! This module owns the clap command tree for the initial runner surface: -//! top-level help and suite listing. - -use clap::builder::styling::{AnsiColor, Styles}; -use clap::{ArgMatches, Command}; -use datafusion_common::{Result, exec_datafusion_err}; - -const HELP_STYLES: Styles = Styles::styled() - .header(AnsiColor::Green.on_default().bold()) - .usage(AnsiColor::Green.on_default().bold()) - .literal(AnsiColor::Cyan.on_default().bold()) - .placeholder(AnsiColor::Cyan.on_default()); - -#[derive(Debug)] -pub enum RunnerCommand { - Help, - List, -} - -/// Builds the command tree for help and suite listing. -pub fn build_cli() -> Command { - Command::new("benchmark_runner") - .about("Inspect DataFusion SQL benchmark suites.") - .styles(HELP_STYLES) - .subcommand_required(false) - .arg_required_else_help(false) - .disable_help_subcommand(true) - .subcommand(Command::new("help").about("Print help")) - .subcommand(Command::new("list").about("List SQL benchmark suites")) -} - -/// Converts clap matches into a typed command. -pub(crate) fn command_from_matches(matches: &ArgMatches) -> Result { - match matches.subcommand() { - None | Some(("help", _)) => Ok(RunnerCommand::Help), - Some(("list", _)) => Ok(RunnerCommand::List), - Some((name, _)) => Err(exec_datafusion_err!("Unknown command '{name}'")), - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn list_rejects_unrecognized_options() { - let matches = - build_cli().try_get_matches_from(["benchmark_runner", "list", "--format"]); - - assert!(matches.is_err(), "{matches:?}"); - } - - #[test] - fn help_mentions_list_command() { - let err = build_cli() - .try_get_matches_from(["benchmark_runner", "--help"]) - .unwrap_err(); - let help = err.to_string(); - - assert!(help.contains("list")); - } -} diff --git a/benchmarks/src/benchmark_runner/mod.rs b/benchmarks/src/benchmark_runner/mod.rs deleted file mode 100644 index 458e5f974c152..0000000000000 --- a/benchmarks/src/benchmark_runner/mod.rs +++ /dev/null @@ -1,104 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Command-line inspection for SQL benchmark suites. -//! -//! This module backs the `benchmark_runner` binary. The initial command -//! surface lists discovered SQL benchmark suites from `.suite` files and -//! prints the top-level help. -//! -//! Common invocations: -//! -//! ```text -//! cargo run --bin benchmark_runner -- --help -//! cargo run --release --bin benchmark_runner -- list -//! ``` -//! -//! The public entry point is [`run_cli`]. The submodules are kept private so -//! the command-line flow remains the single supported API: -//! -//! - `cli` builds the clap command tree and parses the selected command. -//! - `suite` loads `.suite` metadata and discovers benchmark query files. -//! - `output` formats colored `list` command output. - -mod cli; -mod output; -mod suite; - -use crate::benchmark_runner::cli::{RunnerCommand, build_cli, command_from_matches}; -use crate::benchmark_runner::output::format_suite_list_styled; -use crate::benchmark_runner::suite::SuiteRegistry; -use datafusion::error::Result; -use datafusion_common::DataFusionError; -use std::io::Write; -use std::path::PathBuf; - -/// Runs the benchmark runner command-line flow for the provided argument list. -/// -/// This discovers suite metadata, parses the help/list command, and dispatches -/// to the selected implementation. -pub fn run_cli(args: I) -> Result<()> -where - I: IntoIterator, - T: Clone + Into, -{ - let benchmark_dir = default_benchmark_dir(); - let registry = SuiteRegistry::discover(&benchmark_dir)?; - let mut cli = build_cli(); - let matches = match cli.try_get_matches_from_mut(args) { - Ok(matches) => matches, - Err(e) if e.kind() == clap::error::ErrorKind::DisplayHelp => { - e.print()?; - return Ok(()); - } - Err(e) => return Err(DataFusionError::External(Box::new(e))), - }; - let command = command_from_matches(&matches)?; - - match command { - RunnerCommand::Help => { - cli.print_long_help()?; - println!(); - } - RunnerCommand::List => { - print_styled(&format_suite_list_styled(®istry)?)?; - } - } - - Ok(()) -} - -/// Writes already styled output through `anstream` so ANSI color handling -/// matches clap help output on supported terminals. -fn print_styled(output: &str) -> Result<()> { - let mut stdout = anstream::stdout(); - - write!(&mut stdout, "{output}") - .map_err(|e| DataFusionError::External(Box::new(e)))?; - Ok(()) -} - -/// Resolves the SQL benchmark root from either the repository root or the -/// benchmarks crate manifest directory. -fn default_benchmark_dir() -> PathBuf { - let repo_root_path = PathBuf::from("benchmarks/sql_benchmarks"); - if repo_root_path.exists() { - repo_root_path - } else { - PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("sql_benchmarks") - } -} diff --git a/benchmarks/src/benchmark_runner/output.rs b/benchmarks/src/benchmark_runner/output.rs deleted file mode 100644 index 9eb975311e29f..0000000000000 --- a/benchmarks/src/benchmark_runner/output.rs +++ /dev/null @@ -1,173 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Formatting helpers for human-readable benchmark runner output. -//! -//! The runner intentionally uses the same colored style for `list` output as -//! clap uses for help text. - -use crate::benchmark_runner::suite::{SuiteConfig, SuiteOption, SuiteRegistry}; -use clap::builder::styling::{AnsiColor, Style}; -use datafusion_common::Result; -use std::fmt::{Display, Write as _}; - -/// Formats the `list` command output with suite summaries, query-id hints, and -/// configurable suite options. -pub fn format_suite_list_styled(registry: &SuiteRegistry) -> Result { - let mut output = String::new(); - - for suite in registry.suites() { - write_suite_list_entry(&mut output, suite)?; - } - - Ok(output) -} - -/// Writes one suite entry for the `list` command. -fn write_suite_list_entry(output: &mut String, suite: &SuiteConfig) -> Result<()> { - writeln!(output, "{}", header(&suite.name))?; - writeln!(output, " {}: {}", label("description"), suite.description)?; - - let queries = suite.discover_queries()?; - - if let (Some(first), Some(last)) = (queries.first(), queries.last()) { - writeln!( - output, - " {}: {}-{} discovered under {} as {}", - label("query ids"), - value(first.id), - value(last.id), - suite.query_search_root().display(), - literal("qNN.benchmark") - )?; - } - writeln!(output, " {}:", label("options"))?; - - for option in &suite.options { - write_suite_list_option(output, option)?; - } - - Ok(()) -} - -/// Writes one suite option summary for the `list` command. -fn write_suite_list_option(output: &mut String, option: &SuiteOption) -> Result<()> { - let values = option - .values - .iter() - .map(|v| { - if v == &option.default { - format!("{} ({})", value(v), label("default")) - } else { - value(v) - } - }) - .collect::>() - .join(", "); - - writeln!( - output, - " {} {} {}", - literal(option_display(option)), - placeholder(""), - values - )?; - - Ok(()) -} - -fn option_display(option: &SuiteOption) -> String { - match &option.short { - Some(short) => format!("-{short}, --{}", option.name), - None => format!("--{}", option.name), - } -} - -fn header(text: impl Display) -> String { - styled(AnsiColor::Green.on_default().bold(), text) -} - -fn literal(text: impl Display) -> String { - styled(AnsiColor::Cyan.on_default().bold(), text) -} - -fn placeholder(text: impl Display) -> String { - styled(AnsiColor::Cyan.on_default(), text) -} - -fn value(text: impl Display) -> String { - styled(AnsiColor::Green.on_default(), text) -} - -fn label(text: impl Display) -> String { - styled(Style::new().bold(), text) -} - -fn styled(style: Style, text: impl Display) -> String { - format!("{style}{text}{style:#}") -} - -#[cfg(test)] -mod tests { - use super::*; - use std::path::PathBuf; - - fn manifest_path(path: &str) -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(path) - } - - fn strip_ansi(input: &str) -> String { - let mut output = String::new(); - let mut chars = input.chars(); - while let Some(c) = chars.next() { - if c == '\x1b' { - for c in chars.by_ref() { - if c == 'm' { - break; - } - } - } else { - output.push(c); - } - } - output - } - - #[test] - fn list_output_mentions_tpch_options() { - let registry = SuiteRegistry::discover(manifest_path("sql_benchmarks")).unwrap(); - let output = strip_ansi(&format_suite_list_styled(®istry).unwrap()); - - assert!(output.contains("tpch\n description: TPC-H SQL benchmarks")); - assert!(output.contains("-f, --format parquet (default), csv, mem")); - assert!(output.contains("-sf, --scale-factor 1 (default), 10")); - } - - #[test] - fn styled_list_output_includes_ansi_sequences() { - let registry = SuiteRegistry::discover(manifest_path("sql_benchmarks")).unwrap(); - let output = format_suite_list_styled(®istry).unwrap(); - - assert!(output.contains("\u{1b}[")); - assert!(output.contains("tpch")); - assert!(output.contains("-f")); - assert!(output.contains("--format")); - assert!(output.contains("-sf")); - assert!(output.contains("--scale-factor")); - assert!(output.contains("")); - } -} diff --git a/benchmarks/src/benchmark_runner/suite.rs b/benchmarks/src/benchmark_runner/suite.rs deleted file mode 100644 index fe5b3339b1643..0000000000000 --- a/benchmarks/src/benchmark_runner/suite.rs +++ /dev/null @@ -1,500 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Suite-file loading and validation. -//! -//! A suite is described by a text `.suite` file that declares which options -//! the runner should display. Query discovery recursively scans from the suite -//! file's directory for `qNN.benchmark` files. Discovered queries are cached -//! lazily because they are reused by listing during a single CLI run. - -use datafusion_common::{DataFusionError, Result}; -use serde::{Deserialize, Serialize}; -use std::cell::OnceCell; -use std::collections::HashSet; -use std::fs::{self, DirEntry}; -use std::path::{Path, PathBuf}; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct SuiteQuery { - /// Numeric query id parsed from a `qNN.benchmark` file. - pub id: usize, - /// File name as it appears on disk, for example `q01.benchmark`. - pub file_name: String, - /// Full path to the benchmark file. - pub path: PathBuf, -} - -/// Parsed `.suite` file plus runtime metadata derived from its location. -/// -/// The serialized fields define the text configuration format. The skipped -/// fields are populated from the suite file path and used for discovery and -/// caching during a single runner invocation. -#[derive(Debug, Deserialize, Serialize)] -pub struct SuiteConfig { - /// Suite selector used on the command line, such as `tpch`. - pub name: String, - /// Human-readable suite description shown by `list`. - pub description: String, - /// Suite-specific options shown by `list`. - #[serde(default)] - pub options: Vec, - /// Path to the `.suite` file that produced this config. - #[serde(skip)] - pub suite_path: PathBuf, - /// Directory containing the `.suite` file. - #[serde(skip)] - pub suite_dir: PathBuf, - /// Lazily discovered benchmark query files for this suite. - #[serde(skip)] - pub(crate) query_cache: OnceCell>, -} - -impl Clone for SuiteConfig { - fn clone(&self) -> Self { - Self { - name: self.name.clone(), - description: self.description.clone(), - options: self.options.clone(), - suite_path: self.suite_path.clone(), - suite_dir: self.suite_dir.clone(), - query_cache: OnceCell::new(), - } - } -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct SuiteOption { - /// Long option name, without the leading `--`. - pub name: String, - /// Optional short alias, without the leading `-`. - #[serde(default)] - pub short: Option, - /// Default option value. - pub default: String, - /// Allowed option values. - pub values: Vec, - /// Help text shown in command output. - pub help: String, -} - -/// Discovered suite metadata, sorted by suite name. -#[derive(Debug, Clone)] -pub struct SuiteRegistry { - suites: Vec, -} - -impl SuiteConfig { - /// Loads, parses, and validates one `.suite` file. - /// - /// The suite file path and containing directory are stored on the returned - /// config so later discovery can be resolved relative to the suite file. - pub fn from_file(path: impl AsRef) -> Result { - let suite_path = path.as_ref().to_path_buf(); - let contents = fs::read_to_string(&suite_path).map_err(|e| { - DataFusionError::External( - format!("failed to read suite file {}: {e}", suite_path.display()).into(), - ) - })?; - let mut suite: Self = toml::from_str(&contents).map_err(|e| { - DataFusionError::External( - format!("failed to parse suite file {}: {e}", suite_path.display()) - .into(), - ) - })?; - - suite.suite_dir = suite_path - .parent() - .map(Path::to_path_buf) - .unwrap_or_else(|| PathBuf::from(".")); - suite.suite_path = suite_path; - suite.validate()?; - - Ok(suite) - } - - /// Returns the directory recursively searched for query benchmark files. - pub fn query_search_root(&self) -> &Path { - &self.suite_dir - } - - /// Discovers and caches the suite's benchmark query files. - /// - /// Query files are found by recursively scanning from the suite directory, - /// accepting only `qNN.benchmark` files, sorting by numeric query id, and - /// rejecting duplicate ids. - pub fn discover_queries(&self) -> Result> { - if let Some(queries) = self.query_cache.get() { - return Ok(queries.clone()); - } - - let queries = self.scan_queries()?; - let _ = self.query_cache.set(queries.clone()); - Ok(queries) - } - - /// Performs uncached query discovery and duplicate-id validation. - fn scan_queries(&self) -> Result> { - let mut queries = Vec::new(); - - self.scan_query_dir(self.query_search_root(), &mut queries)?; - queries.sort_by(|left, right| { - left.id - .cmp(&right.id) - .then_with(|| left.path.cmp(&right.path)) - }); - - for pair in queries.windows(2) { - let [left, right] = pair else { - continue; - }; - if left.id == right.id { - return Err(DataFusionError::Configuration(format!( - "duplicate QUERY_ID {} in suite '{}': {} and {}", - left.id, - self.name, - left.path.display(), - right.path.display() - ))); - } - } - - Ok(queries) - } - - /// Recursively scans a directory and appends valid query benchmark files to - /// the provided collection. - fn scan_query_dir(&self, dir: &Path, queries: &mut Vec) -> Result<()> { - let mut entries = read_dir_entries(dir, "benchmark query directory")?; - entries.sort_by_key(|entry| entry.file_name()); - - for entry in entries { - let path = entry.path(); - let file_type = entry.file_type().map_err(|e| { - DataFusionError::External( - format!( - "failed to read benchmark query entry type {}: {e}", - path.display() - ) - .into(), - ) - })?; - - if file_type.is_dir() { - self.scan_query_dir(&path, queries)?; - continue; - } - - if path - .extension() - .is_none_or(|extension| extension != "benchmark") - { - continue; - } - - let file_name = entry.file_name().to_string_lossy().into_owned(); - if let Some(id) = parse_query_file_name(&file_name) { - queries.push(SuiteQuery { - id, - file_name, - path, - }); - } - } - - Ok(()) - } - - /// Validates suite metadata that cannot be enforced by TOML deserialization. - fn validate(&self) -> Result<()> { - self.validate_suite_fields()?; - self.validate_options() - } - - /// Validates required suite-level fields. - fn validate_suite_fields(&self) -> Result<()> { - if self.name.trim().is_empty() { - return Err(DataFusionError::Configuration( - "suite name cannot be empty".to_string(), - )); - } - - Ok(()) - } - - /// Validates suite-defined option declarations. - fn validate_options(&self) -> Result<()> { - let mut option_names = HashSet::new(); - for option in &self.options { - if !option_names.insert(option.name.as_str()) { - return Err(DataFusionError::Configuration(format!( - "duplicate option name '{}'", - option.name - ))); - } - - option.validate()?; - } - - Ok(()) - } -} - -impl SuiteOption { - /// Validates one suite-defined option. - fn validate(&self) -> Result<()> { - if self.name.trim().is_empty() { - return Err(DataFusionError::Configuration( - "option name cannot be empty".to_string(), - )); - } - - if !is_valid_cli_option_name(&self.name) { - return Err(DataFusionError::Configuration(format!( - "invalid option name '{}'; expected lowercase ASCII letters, digits, and hyphens", - self.name - ))); - } - - self.validate_short_alias()?; - - if self.help.trim().is_empty() { - return Err(DataFusionError::Configuration(format!( - "help for option '{}' cannot be empty", - self.name - ))); - } - - if self.values.is_empty() { - return Err(DataFusionError::Configuration(format!( - "values for option '{}' cannot be empty", - self.name - ))); - } - - let mut values = HashSet::new(); - for value in &self.values { - if !values.insert(value.as_str()) { - return Err(DataFusionError::Configuration(format!( - "duplicate value '{}' for option '{}'", - value, self.name - ))); - } - } - - if !self.values.contains(&self.default) { - return Err(DataFusionError::Configuration(format!( - "default value '{}' for option '{}' must be present in values", - self.default, self.name - ))); - } - - Ok(()) - } - - /// Validates the optional short alias for one suite-defined option. - fn validate_short_alias(&self) -> Result<()> { - let Some(short) = &self.short else { - return Ok(()); - }; - - if short.trim().is_empty() { - return Err(DataFusionError::Configuration(format!( - "short alias for option '{}' cannot be empty", - self.name - ))); - } - - if !is_valid_cli_option_name(short) { - return Err(DataFusionError::Configuration(format!( - "invalid short alias '{short}' for option '{}'; expected lowercase ASCII letters, digits, and hyphens", - self.name - ))); - } - - Ok(()) - } -} - -impl SuiteRegistry { - /// Discovers all suite files below the SQL benchmark root. - /// - /// Each direct child directory is searched for `.suite` files. Suite names - /// must be unique across the registry so command selectors are - /// unambiguous. - pub fn discover(root: impl AsRef) -> Result { - let root = root.as_ref(); - let mut suites = Vec::new(); - let mut suite_names = HashSet::new(); - - for entry in read_dir_entries(root, "benchmark suite root")? { - if !entry - .file_type() - .map_err(|e| { - DataFusionError::External( - format!( - "failed to read benchmark suite entry type {}: {e}", - entry.path().display() - ) - .into(), - ) - })? - .is_dir() - { - continue; - } - - let mut suite_files = Vec::new(); - let suite_dir = entry.path(); - for suite_entry in read_dir_entries(&suite_dir, "benchmark suite directory")? - { - let path = suite_entry.path(); - if path - .extension() - .is_some_and(|extension| extension == "suite") - { - suite_files.push(path); - } - } - suite_files.sort(); - - for suite_file in suite_files { - let suite = SuiteConfig::from_file(suite_file)?; - if !suite_names.insert(suite.name.clone()) { - return Err(DataFusionError::Configuration(format!( - "duplicate suite name '{}'", - suite.name - ))); - } - suites.push(suite); - } - } - - suites.sort_by(|left, right| left.name.cmp(&right.name)); - - Ok(Self { suites }) - } - - /// Returns discovered suites sorted by selector name. - pub fn suites(&self) -> &[SuiteConfig] { - &self.suites - } -} - -fn parse_query_file_name(file_name: &str) -> Option { - let query_id = file_name.strip_prefix('q')?.strip_suffix(".benchmark")?; - - if query_id.len() < 2 || !query_id.chars().all(|c| c.is_ascii_digit()) { - return None; - } - - query_id.parse().ok() -} - -fn is_valid_cli_option_name(name: &str) -> bool { - name.chars() - .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-') -} - -fn read_dir_entries(dir: &Path, label: &str) -> Result> { - let entries = fs::read_dir(dir).map_err(|e| { - DataFusionError::External( - format!("failed to read {label} {}: {e}", dir.display()).into(), - ) - })?; - - entries - .collect::>>() - .map_err(|e| DataFusionError::External(Box::new(e))) -} - -#[cfg(test)] -mod tests { - use super::*; - use std::fs; - - fn manifest_path(path: &str) -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(path) - } - - #[test] - fn discovers_tpch_suite_file() { - let registry = SuiteRegistry::discover(manifest_path("sql_benchmarks")).unwrap(); - - assert_eq!(registry.suites().len(), 1); - assert_eq!(registry.suites()[0].name, "tpch"); - assert_eq!(registry.suites()[0].options.len(), 2); - } - - #[test] - fn discovers_query_ids_in_numeric_order() { - let registry = SuiteRegistry::discover(manifest_path("sql_benchmarks")).unwrap(); - let suite = ®istry.suites()[0]; - let queries = suite.discover_queries().unwrap(); - let ids = queries.iter().map(|query| query.id).collect::>(); - - assert_eq!(ids.first(), Some(&1)); - assert_eq!(ids.last(), Some(&22)); - assert!(!ids.contains(&0)); - } - - #[test] - fn rejects_duplicate_suite_names() { - let dir = tempfile::tempdir().unwrap(); - let one = dir.path().join("one"); - let two = dir.path().join("two"); - fs::create_dir_all(&one).unwrap(); - fs::create_dir_all(&two).unwrap(); - fs::write( - one.join("suite.suite"), - "name = \"dup\"\ndescription = \"one\"\n", - ) - .unwrap(); - fs::write( - two.join("suite.suite"), - "name = \"dup\"\ndescription = \"two\"\n", - ) - .unwrap(); - - let err = SuiteRegistry::discover(dir.path()).unwrap_err(); - assert!(err.to_string().contains("duplicate suite name")); - } - - #[test] - fn rejects_invalid_option_metadata() { - let dir = tempfile::tempdir().unwrap(); - let suite_dir = dir.path().join("suite"); - fs::create_dir_all(&suite_dir).unwrap(); - fs::write( - suite_dir.join("bad.suite"), - r#" -name = "bad" -description = "bad suite" - -[[options]] -name = "BAD" -default = "one" -values = ["one"] -help = "bad" -"#, - ) - .unwrap(); - - let err = SuiteRegistry::discover(dir.path()).unwrap_err(); - assert!(err.to_string().contains("invalid option name")); - } -} diff --git a/benchmarks/src/bin/benchmark_runner.rs b/benchmarks/src/bin/benchmark_runner.rs deleted file mode 100644 index 0efd169947f3c..0000000000000 --- a/benchmarks/src/bin/benchmark_runner.rs +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion_benchmarks::benchmark_runner::run_cli; - -#[cfg(feature = "snmalloc")] -#[global_allocator] -static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc; - -// `cargo clippy --all-features` enables both allocator features, so prefer -// `snmalloc` in that case and fall back to `mimalloc` otherwise. -#[cfg(all(not(feature = "snmalloc"), feature = "mimalloc"))] -#[global_allocator] -static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; - -fn main() { - env_logger::init(); - if let Err(e) = run_cli(std::env::args()) { - eprintln!("{e}"); - std::process::exit(1); - } -} diff --git a/benchmarks/src/lib.rs b/benchmarks/src/lib.rs index f41fd5ebed205..eae72c2a72d9e 100644 --- a/benchmarks/src/lib.rs +++ b/benchmarks/src/lib.rs @@ -16,7 +16,6 @@ // under the License. //! DataFusion benchmark runner -pub mod benchmark_runner; pub mod cancellation; pub mod clickbench; pub mod dict;