diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e85c69e7..775f0971 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: # 1 check: name: Rust project check - runs-on: ubuntu-latest + runs-on: [self-hosted, Linux, X64] steps: - uses: actions/checkout@v2 - name: Install latest nightly @@ -41,7 +41,7 @@ jobs: # 2 fmt: name: Rust fmt - runs-on: ubuntu-latest + runs-on: [self-hosted, Linux, X64] steps: - uses: actions/checkout@v2 - name: Install latest nightly @@ -59,7 +59,7 @@ jobs: # 3 e2e: name: Rust e2e sqllogictest - runs-on: ubuntu-latest + runs-on: [self-hosted, Linux, X64] steps: - uses: actions/checkout@v2 - name: Install latest nightly @@ -80,7 +80,7 @@ jobs: # 4 wasm-tests: name: Wasm cargo tests - runs-on: ubuntu-latest + runs-on: [self-hosted, Linux, X64] steps: - uses: actions/checkout@v2 @@ -106,7 +106,7 @@ jobs: # 5 wasm-examples: name: Wasm examples (nodejs) - runs-on: ubuntu-latest + runs-on: [self-hosted, Linux, X64] steps: - uses: actions/checkout@v2 @@ -135,7 +135,7 @@ jobs: # 6 native-examples: name: Native examples - runs-on: ubuntu-latest + runs-on: [self-hosted, Linux, X64] steps: - uses: actions/checkout@v2 @@ -150,7 +150,7 @@ jobs: # 7 python-tests: name: Python bindings tests - runs-on: ubuntu-latest + runs-on: [self-hosted, Linux, X64] steps: - uses: actions/checkout@v2 diff --git a/Cargo.lock b/Cargo.lock index 73c13269..8a9bb0e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,12 +168,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "bindgen" version = "0.69.5" @@ -284,12 +278,6 @@ version = "1.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "bytes" version = "1.10.1" @@ -744,12 +732,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "fnv" version = "1.0.7" @@ -877,10 +859,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi 0.11.1+wasi-snapshot-preview1", - "wasm-bindgen", ] [[package]] @@ -890,11 +870,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", - "js-sys", "libc", "r-efi", "wasi 0.14.2+wasi-0.2.4", - "wasm-bindgen", ] [[package]] @@ -1097,41 +1075,29 @@ dependencies = [ name = "kite_sql" version = "0.3.0" dependencies = [ - "ahash 0.8.12", - "base64", "bumpalo", - "byteorder", "chrono", "comfy-table", "criterion", "csv", - "fixedbitset", - "getrandom 0.2.16", - "getrandom 0.3.3", "indicatif", - "itertools 0.12.1", "js-sys", "kite_sql_serde_macros", "librocksdb-sys", "lmdb", "lmdb-sys", - "once_cell", "ordered-float", "paste", "pprof", "pyo3", - "recursive", "rocksdb", "rust_decimal", "rustyline", - "siphasher", "sqlite", "sqlparser", "tempfile", - "ulid", "wasm-bindgen", "wasm-bindgen-test", - "web-sys", ] [[package]] @@ -1574,15 +1540,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "psm" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" -dependencies = [ - "cc", -] - [[package]] name = "ptr_meta" version = "0.1.4" @@ -1703,18 +1660,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" -dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_chacha", + "rand_core", ] [[package]] @@ -1724,17 +1671,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core 0.9.3", + "rand_core", ] [[package]] @@ -1746,15 +1683,6 @@ dependencies = [ "getrandom 0.2.16", ] -[[package]] -name = "rand_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" -dependencies = [ - "getrandom 0.3.3", -] - [[package]] name = "rayon" version = "1.10.0" @@ -1775,26 +1703,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "recursive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" -dependencies = [ - "recursive-proc-macro-impl", - "stacker", -] - -[[package]] -name = "recursive-proc-macro-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" -dependencies = [ - "quote", - "syn 2.0.104", -] - [[package]] name = "regex" version = "1.11.1" @@ -1891,7 +1799,7 @@ dependencies = [ "borsh", "bytes", "num-traits", - "rand 0.8.5", + "rand", "rkyv", "serde", "serde_json", @@ -2024,12 +1932,6 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - [[package]] name = "slab" version = "0.4.10" @@ -2130,19 +2032,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "stacker" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - [[package]] name = "str_stack" version = "0.1.0" @@ -2335,7 +2224,7 @@ dependencies = [ "kite_sql", "ordered-float", "pprof", - "rand 0.8.5", + "rand", "rust_decimal", "sqlite", ] @@ -2377,16 +2266,6 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" -[[package]] -name = "ulid" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "470dbf6591da1b39d43c14523b2b469c86879a53e8b758c8e090a470fe7b1fbe" -dependencies = [ - "rand 0.9.1", - "web-time", -] - [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/Cargo.toml b/Cargo.toml index b1094ef7..b854a42f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,11 @@ lmdb = ["dep:lmdb", "dep:lmdb-sys"] pprof = ["pprof/criterion", "pprof/flamegraph"] python = ["parser", "dep:pyo3"] shell = ["parser", "dep:comfy-table", "dep:rustyline"] +wasm = [ + "dep:js-sys", + "dep:wasm-bindgen", + "parser", +] [[bench]] name = "query_bench" @@ -45,17 +50,10 @@ harness = false required-features = ["pprof"] [dependencies] -ahash = { version = "0.8" } bumpalo = { version = "3", default-features = false, features = ["collections"] } -byteorder = { version = "1" } -fixedbitset = { version = "0.4" } -itertools = { version = "0.12" } ordered-float = { version = "4" } paste = { version = "1" } -recursive = { version = "0.1" } kite_sql_serde_macros = { version = "0.2.1", path = "kite_sql_serde_macros" } -siphasher = { version = "1" } -ulid = { version = "1" } # Optional dependencies for features comfy-table = { version = "7", default-features = false, optional = true } @@ -84,16 +82,8 @@ rocksdb = { version = "0.23", optional = true, default-features = false, feature rustyline = { version = "14", default-features = false, optional = true } [target.'cfg(target_arch = "wasm32")'.dependencies] -wasm-bindgen = { version = "0.2.106" } -web-sys = { version = "0.3.83", features = [ - "Storage", - "Window", -] } -base64 = { version = "0.21" } -getrandom = { version = "0.2", features = ["js"] } -getrandom_03 = { package = "getrandom", version = "0.3", features = ["wasm_js"] } -js-sys = { version = "0.3.83" } -once_cell = { version = "1" } +wasm-bindgen = { version = "0.2.106", optional = true } +js-sys = { version = "0.3.83", optional = true } [target.'cfg(target_arch = "wasm32")'.dev-dependencies] wasm-bindgen-test = "0.3.56" diff --git a/Makefile b/Makefile index 5b0de543..48fe92d1 100644 --- a/Makefile +++ b/Makefile @@ -30,11 +30,11 @@ build: ## Build the WebAssembly package (artifact goes to ./pkg). wasm-build: - $(WASM_PACK) build --release --target nodejs + $(WASM_PACK) build --release --target nodejs -- --features wasm ## Execute wasm-bindgen tests under Node.js (wasm32 target). test-wasm: - $(WASM_PACK) test --node -- --package kite_sql --lib + $(WASM_PACK) test --node -- --features wasm --package kite_sql --lib ## Run the sqllogictest harness against the configured .slt suite. test-slt: diff --git a/src/binder/aggregate.rs b/src/binder/aggregate.rs index d88d4dc3..bbe3eec7 100644 --- a/src/binder/aggregate.rs +++ b/src/binder/aggregate.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use ahash::RandomState; -use itertools::Itertools; use std::collections::HashSet; use super::{Binder, QueryBindStep}; @@ -266,7 +264,7 @@ impl> Binder<'_, '_, T, A> group_raw_exprs.push(expr); } } - let mut group_raw_set: HashSet<&ScalarExpression, RandomState> = + let mut group_raw_set: HashSet<&ScalarExpression> = HashSet::from_iter(group_raw_exprs.iter().copied()); for expr in select_items { @@ -275,7 +273,7 @@ impl> Binder<'_, '_, T, A> } group_raw_set.remove(expr); - if !group_raw_exprs.iter().contains(&expr) { + if !group_raw_exprs.contains(&expr) { return Err(DatabaseError::AggMiss(format!( "`{expr}` must appear in the GROUP BY clause or be used in an aggregate function" ))); diff --git a/src/binder/create_view.rs b/src/binder/create_view.rs index df48f104..de0b97b7 100644 --- a/src/binder/create_view.rs +++ b/src/binder/create_view.rs @@ -22,7 +22,6 @@ use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; use crate::types::value::DataValue; -use ulid::Ulid; impl> Binder<'_, '_, T, A> { pub(crate) fn bind_create_view( @@ -52,7 +51,7 @@ impl> Binder<'_, '_, T, A> ) }; let mut column = ColumnCatalog::new(output_name, nullable, desc); - column.set_ref_table(view_name.clone(), Ulid::new(), true); + column.set_ref_table(view_name.clone(), 0, true); let output_column = arena.alloc_column(column); exprs.push(ScalarExpression::Alias { diff --git a/src/binder/expr.rs b/src/binder/expr.rs index 9d055022..1b215240 100644 --- a/src/binder/expr.rs +++ b/src/binder/expr.rs @@ -16,7 +16,7 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression; use crate::expression::agg::AggKind; -use itertools::Itertools; +use crate::iter_ext::Itertools; use super::{Binder, BinderContext, QueryBindStep, SubQueryType}; use crate::expression::function::scala::{ArcScalarFunctionImpl, ScalarFunction}; @@ -28,7 +28,7 @@ use crate::planner::operator::scalar_subquery::ScalarSubqueryOperator; use crate::planner::{LogicalPlan, PlanArena}; use crate::storage::Transaction; use crate::types::value::{DataValue, Utf8Type}; -use crate::types::{CharLengthUnits, ColumnId, LogicalType}; +use crate::types::{CharLengthUnits, LogicalType}; macro_rules! try_default { ($table_name:expr, $column_name:expr) => { @@ -110,7 +110,7 @@ impl<'a, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'a, '_, T ) -> (ScalarExpression, ScalarExpression) { let output_column = expr.output_column_ref(arena); let mut alias_column = arena.clone_column(output_column); - alias_column.set_ref_table(arena.temp_table(), ColumnId::new(), true); + alias_column.set_ref_table(arena.temp_table(), 0, true); let alias_column = arena.alloc_column(alias_column); let alias_ref = ScalarExpression::column_expr(alias_column, position); diff --git a/src/binder/parser.rs b/src/binder/parser.rs index 0e158886..b504f936 100644 --- a/src/binder/parser.rs +++ b/src/binder/parser.rs @@ -27,6 +27,7 @@ use crate::expression; use crate::expression::simplify::ConstantCalculator; use crate::expression::visitor_mut::VisitorMut; use crate::expression::{AliasType, ScalarExpression}; +use crate::iter_ext::Itertools; use crate::parser::parse_sql; use crate::planner::operator::alter_table::change_column::{DefaultChange, NotNullChange}; use crate::planner::operator::join::{JoinCondition, JoinOperator as LJoinOperator, JoinType}; @@ -38,7 +39,6 @@ use crate::planner::{Childrens, LogicalPlan, PlanArena}; use crate::storage::{Storage, Transaction}; use crate::types::value::{DataValue, Utf8Type}; use crate::types::{CharLengthUnits, ColumnId, LogicalType}; -use itertools::Itertools; pub(super) use sqlparser::ast::{ AlterColumnOperation, AlterTableOperation, Assignment, AssignmentTarget, BinaryOperator, ColumnDef, ColumnOption, CreateView, DataType, DescribeAlias, Distinct, DuplicateTreatment, @@ -2669,7 +2669,6 @@ impl<'a, 'parent, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder< ) }) .map(|bound_source| bound_source.visible_name()) - .unique() .cloned() .collect_vec(); for visible_name in visible_names { diff --git a/src/binder/select.rs b/src/binder/select.rs index 424e9088..ed43b3a5 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -33,6 +33,7 @@ use crate::errors::DatabaseError; use crate::execution::dql::join::joins_nullable; use crate::expression::visitor_mut::{walk_mut_expr, PositionShift, VisitorMut}; use crate::expression::{AliasType, BinaryOperator}; +use crate::iter_ext::Itertools; use crate::planner::operator::function_scan::FunctionScanOperator; use crate::planner::operator::insert::InsertOperator; use crate::planner::operator::join::JoinCondition; @@ -43,7 +44,6 @@ use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; use crate::types::tuple::Schema; use crate::types::{ColumnId, LogicalType}; -use itertools::Itertools; struct RightSidePositionGlobalizer<'a, 'p> { right_schema: &'a Schema, diff --git a/src/catalog/table.rs b/src/catalog/table.rs index ab94712b..27ce3cbe 100644 --- a/src/catalog/table.rs +++ b/src/catalog/table.rs @@ -15,16 +15,15 @@ use crate::catalog::{ColumnCatalog, ColumnRef, ColumnRelation}; use crate::errors::DatabaseError; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::planner::{MetaArena, PlanArena}; use crate::types::index::{IndexMeta, IndexMetaRef, IndexType}; use crate::types::tuple::Schema; use crate::types::{ColumnId, LogicalType}; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::collections::BTreeMap; use std::sync::Arc; use std::{slice, vec}; -use ulid::Generator; pub type TableName = Arc; @@ -146,17 +145,16 @@ impl TableCatalog { pub(crate) fn add_column( &mut self, mut col: ColumnCatalog, - generator: &mut Generator, arena: &mut impl MetaArena, ) -> Result { if self.column_idxs.contains_key(col.name()) { return Err(DatabaseError::DuplicateColumn(col.name().to_string())); } let max_existing_id = self.columns.keys().max().copied(); - let mut col_id = generator.generate().unwrap(); - while max_existing_id.is_some_and(|max_id| col_id <= max_id) { - col_id = generator.generate().unwrap(); - } + let col_id = max_existing_id + .unwrap_or(0) + .checked_add(1) + .ok_or(DatabaseError::OverFlow)?; col.summary_mut().relation = ColumnRelation::Table { column_id: col_id, @@ -241,11 +239,8 @@ impl TableCatalog { primary_key_indices: Default::default(), primary_key_type: LogicalType::SqlNull, }; - let mut generator = Generator::new(); for col_catalog in columns.into_iter() { - let _ = table_catalog - .add_column(col_catalog, &mut generator, arena) - .unwrap(); + let _ = table_catalog.add_column(col_catalog, arena).unwrap(); } let (primary_keys, primary_key_indices) = Self::build_primary_keys(&table_catalog.column_refs, arena); @@ -374,7 +369,6 @@ mod tests { use crate::catalog::ColumnDesc; use crate::planner::TableArenaCell; use crate::types::LogicalType; - use ulid::Generator; fn build_table_catalog( name: &str, @@ -455,7 +449,6 @@ mod tests { .filter_map(|column| table_arena.borrow().column(*column).id()) .max() .unwrap(); - let mut generator = Generator::new(); let new_id = table_catalog .add_column( ColumnCatalog::new( @@ -463,7 +456,6 @@ mod tests { true, ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(), ), - &mut generator, table_arena.borrow_mut(), ) .unwrap(); diff --git a/src/db.rs b/src/db.rs index b0ff24d4..13382e3a 100644 --- a/src/db.rs +++ b/src/db.rs @@ -51,8 +51,7 @@ use crate::storage::{ }; use crate::types::tuple::{Schema, SchemaView, Tuple}; use crate::types::value::DataValue; -use ahash::HashMap; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::marker::PhantomData; use std::mem; use std::path::Path; @@ -213,7 +212,7 @@ impl DataBaseBuilder { } /// Builds a database for the current target platform. - #[cfg(target_arch = "wasm32")] + #[cfg(all(target_arch = "wasm32", feature = "wasm"))] pub fn build(self) -> Result, DatabaseError> { let storage = MemoryStorage::new(); @@ -448,7 +447,7 @@ impl State { ) -> Result<(), DatabaseError> { let summary = function.summary().clone(); let mut schema = Schema::new(); - function.output_schema_into(&summary.name, self.table_arena.borrow_mut(), &mut schema); + function.output_schema_into(self.table_arena.borrow_mut(), &mut schema); self.table_functions.insert( summary, TableFunctionCatalog { diff --git a/src/execution/ddl/add_column.rs b/src/execution/ddl/add_column.rs index b2721778..a1364e91 100644 --- a/src/execution/ddl/add_column.rs +++ b/src/execution/ddl/add_column.rs @@ -17,12 +17,12 @@ use crate::errors::DatabaseError; use crate::execution::{ DDLApply, ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, WriteExecutor, }; +use crate::iter_ext::Itertools; use crate::planner::operator::alter_table::add_column::AddColumnOperator; use crate::storage::Transaction; use crate::types::index::{Index, IndexType}; use crate::types::tuple_builder::TupleBuilder; use crate::types::value::DataValue; -use itertools::Itertools; pub struct AddColumn { op: Option, diff --git a/src/execution/ddl/change_column.rs b/src/execution/ddl/change_column.rs index b8f3d026..40265bc8 100644 --- a/src/execution/ddl/change_column.rs +++ b/src/execution/ddl/change_column.rs @@ -17,10 +17,10 @@ use crate::errors::DatabaseError; use crate::execution::{ DDLApply, ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, WriteExecutor, }; +use crate::iter_ext::Itertools; use crate::planner::operator::alter_table::change_column::{ChangeColumnOperator, NotNullChange}; use crate::storage::Transaction; use crate::types::tuple_builder::TupleBuilder; -use itertools::Itertools; pub struct ChangeColumn { op: Option, diff --git a/src/execution/ddl/drop_column.rs b/src/execution/ddl/drop_column.rs index a94669a2..d3b35473 100644 --- a/src/execution/ddl/drop_column.rs +++ b/src/execution/ddl/drop_column.rs @@ -17,10 +17,10 @@ use crate::errors::DatabaseError; use crate::execution::{ DDLApply, ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, WriteExecutor, }; +use crate::iter_ext::Itertools; use crate::planner::operator::alter_table::drop_column::DropColumnOperator; use crate::storage::Transaction; use crate::types::tuple_builder::TupleBuilder; -use itertools::Itertools; pub struct DropColumn { op: Option, diff --git a/src/execution/dml/analyze.rs b/src/execution/dml/analyze.rs index b30f2921..7dcb4320 100644 --- a/src/execution/dml/analyze.rs +++ b/src/execution/dml/analyze.rs @@ -19,6 +19,7 @@ use crate::execution::{ ExecutorNode, WriteExecutor, }; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::optimizer::core::histogram::HistogramBuilder; use crate::optimizer::core::statistics_meta::StatisticsMeta; use crate::planner::operator::analyze::AnalyzeOperator; @@ -27,7 +28,6 @@ use crate::storage::{table_codec::TableCodec, Transaction}; use crate::types::index::IndexId; use crate::types::value::{DataValue, Utf8Type}; use crate::types::CharLengthUnits; -use itertools::Itertools; use std::fmt::{self, Formatter}; const DEFAULT_NUM_OF_BUCKETS: usize = 100; diff --git a/src/execution/dml/copy_from_file.rs b/src/execution/dml/copy_from_file.rs index 76c4cde4..6216772e 100644 --- a/src/execution/dml/copy_from_file.rs +++ b/src/execution/dml/copy_from_file.rs @@ -17,10 +17,10 @@ use crate::errors::DatabaseError; use crate::execution::{ ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, WriteExecutor, }; +use crate::iter_ext::Itertools; use crate::planner::operator::copy_from_file::CopyFromFileOperator; use crate::storage::Transaction; use crate::types::tuple_builder::TupleBuilder; -use itertools::Itertools; use std::fs::File; use std::io::BufReader; @@ -129,7 +129,6 @@ mod tests { use crate::types::LogicalType; use std::io::Write; use tempfile::TempDir; - use ulid::Ulid; #[test] fn read_csv() -> Result<(), DatabaseError> { @@ -153,7 +152,7 @@ mod tests { false, ColumnDesc::new(ty, primary_key, false, None)?, ); - column.set_ref_table("t1".to_string().into(), Ulid::new(), false); + column.set_ref_table("t1".to_string().into(), 1, false); Ok(column) } diff --git a/src/execution/dml/copy_to_file.rs b/src/execution/dml/copy_to_file.rs index 19b18fc1..d7a64be7 100644 --- a/src/execution/dml/copy_to_file.rs +++ b/src/execution/dml/copy_to_file.rs @@ -17,11 +17,11 @@ use crate::errors::DatabaseError; use crate::execution::{ build_read, ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, ReadExecutor, }; +use crate::iter_ext::Itertools; use crate::planner::operator::copy_to_file::CopyToFileOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple_builder::TupleBuilder; -use itertools::Itertools; pub struct CopyToFile { op: CopyToFileOperator, @@ -119,7 +119,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for CopyToFile { let message = if self.column_names.is_empty() { format!("{}", self.op) } else { - format!("{} [{}]", self.op, self.column_names.iter().format(", ")) + format!("{} [{}]", self.op, self.column_names.iter().join(", ")) }; TupleBuilder::build_result_into(arena.result_tuple_mut(), message); arena.resume(); diff --git a/src/execution/dml/insert.rs b/src/execution/dml/insert.rs index 015d4823..b294cf35 100644 --- a/src/execution/dml/insert.rs +++ b/src/execution/dml/insert.rs @@ -18,6 +18,7 @@ use crate::execution::{ build_read, with_projection_tmp_value, ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, WriteExecutor, }; +use crate::iter_ext::Itertools; use crate::planner::operator::insert::InsertOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; @@ -26,7 +27,6 @@ use crate::types::tuple::{Schema, Tuple}; use crate::types::tuple_builder::TupleBuilder; use crate::types::value::DataValue; use crate::types::ColumnId; -use itertools::Itertools; use std::collections::HashMap; pub struct Insert { diff --git a/src/execution/dml/update.rs b/src/execution/dml/update.rs index 3dfedbf5..783f31a6 100644 --- a/src/execution/dml/update.rs +++ b/src/execution/dml/update.rs @@ -19,13 +19,13 @@ use crate::execution::{ ExecutorNode, WriteExecutor, }; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::planner::operator::update::UpdateOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::index::Index; use crate::types::tuple::{Schema, Tuple}; use crate::types::tuple_builder::TupleBuilder; -use itertools::Itertools; use std::collections::HashMap; pub struct Update { diff --git a/src/execution/dql/aggregate/count.rs b/src/execution/dql/aggregate/count.rs index cdc14a8c..e9dcc0ef 100644 --- a/src/execution/dql/aggregate/count.rs +++ b/src/execution/dql/aggregate/count.rs @@ -15,7 +15,6 @@ use crate::errors::DatabaseError; use crate::execution::dql::aggregate::Accumulator; use crate::types::value::DataValue; -use ahash::RandomState; use std::collections::HashSet; pub struct CountAccumulator { @@ -43,7 +42,7 @@ impl Accumulator for CountAccumulator { } pub struct DistinctCountAccumulator { - distinct_values: HashSet, + distinct_values: HashSet, } impl DistinctCountAccumulator { diff --git a/src/execution/dql/aggregate/hash_agg.rs b/src/execution/dql/aggregate/hash_agg.rs index dc4ce73c..2b948bcf 100644 --- a/src/execution/dql/aggregate/hash_agg.rs +++ b/src/execution/dql/aggregate/hash_agg.rs @@ -18,13 +18,13 @@ use crate::execution::{ build_read, ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, ReadExecutor, }; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::value::DataValue; -use ahash::{HashMap, HashMapExt}; -use itertools::Itertools; use std::collections::hash_map::{Entry, IntoIter as HashMapIntoIter}; +use std::collections::HashMap; type HashAggOutput = HashMapIntoIter, Vec>>; @@ -85,7 +85,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for HashAggExecutor { Entry::Vacant(entry) => entry.insert(create_accumulators(&self.agg_calls)?), }; - for (acc, expr) in entry.iter_mut().zip_eq(self.agg_calls.iter()) { + for (acc, expr) in entry.iter_mut().zip(self.agg_calls.iter()) { let ScalarExpression::AggCall { args, .. } = expr else { unreachable!() }; @@ -132,6 +132,7 @@ mod test { use crate::execution::{execute_input, try_collect}; use crate::expression::agg::AggKind; use crate::expression::ScalarExpression; + use crate::iter_ext::Itertools; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; @@ -143,7 +144,6 @@ mod test { use crate::storage::Storage; use crate::types::value::DataValue; use crate::types::LogicalType; - use itertools::Itertools; use tempfile::TempDir; #[test] diff --git a/src/execution/dql/aggregate/mod.rs b/src/execution/dql/aggregate/mod.rs index 3153d361..917654b3 100644 --- a/src/execution/dql/aggregate/mod.rs +++ b/src/execution/dql/aggregate/mod.rs @@ -27,8 +27,8 @@ use crate::execution::dql::aggregate::min_max::MinMaxAccumulator; use crate::execution::dql::aggregate::sum::{DistinctSumAccumulator, SumAccumulator}; use crate::expression::agg::AggKind; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::types::value::DataValue; -use itertools::Itertools; use std::borrow::Cow; /// Tips: Idea for sqlrs diff --git a/src/execution/dql/aggregate/stream_distinct.rs b/src/execution/dql/aggregate/stream_distinct.rs index 6aa57549..8433c356 100644 --- a/src/execution/dql/aggregate/stream_distinct.rs +++ b/src/execution/dql/aggregate/stream_distinct.rs @@ -17,12 +17,12 @@ use crate::execution::{ build_read, ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, ReadExecutor, }; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple::Tuple; use crate::types::value::DataValue; -use itertools::Itertools; pub struct StreamDistinctExecutor { groupby_exprs: Vec, @@ -89,6 +89,7 @@ mod tests { use crate::execution::dql::aggregate::stream_distinct::StreamDistinctExecutor; use crate::execution::{execute_input, try_collect}; use crate::expression::ScalarExpression; + use crate::iter_ext::Itertools; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; @@ -100,7 +101,6 @@ mod tests { use crate::storage::{StatisticsMetaCache, Storage, TableCache, ViewCache}; use crate::types::value::DataValue; use crate::types::LogicalType; - use itertools::Itertools; use tempfile::TempDir; #[allow(clippy::type_complexity)] diff --git a/src/execution/dql/aggregate/sum.rs b/src/execution/dql/aggregate/sum.rs index a86f6dd0..dd721a5c 100644 --- a/src/execution/dql/aggregate/sum.rs +++ b/src/execution/dql/aggregate/sum.rs @@ -18,7 +18,6 @@ use crate::expression::BinaryOperator; use crate::types::evaluator::{binary_create, BinaryEvaluatorRef}; use crate::types::value::DataValue; use crate::types::LogicalType; -use ahash::RandomState; use std::borrow::Cow; use std::collections::HashSet; @@ -61,7 +60,7 @@ impl Accumulator for SumAccumulator { } pub struct DistinctSumAccumulator { - distinct_values: HashSet, + distinct_values: HashSet, inner: SumAccumulator, } diff --git a/src/execution/dql/join/hash/full_join.rs b/src/execution/dql/join/hash/full_join.rs index 172d6b9a..de95a531 100644 --- a/src/execution/dql/join/hash/full_join.rs +++ b/src/execution/dql/join/hash/full_join.rs @@ -17,15 +17,15 @@ use crate::execution::dql::join::hash::{ filter, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, }; use crate::execution::dql::join::hash_join::BuildState; +use crate::execution::dql::join::RowBitmap; use crate::expression::ScalarExpression; use crate::types::tuple::{SplitTupleRef, Tuple}; use crate::types::value::DataValue; -use fixedbitset::FixedBitSet; pub(crate) struct FullJoinState { pub(crate) left_schema_len: usize, pub(crate) right_schema_len: usize, - pub(crate) bits: FixedBitSet, + pub(crate) bits: RowBitmap, } impl JoinProbeState for FullJoinState { @@ -70,7 +70,7 @@ impl JoinProbeState for FullJoinState { SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); if !filter(&full_values, filter_expr)? { probe_state.has_filtered = true; - self.bits.set(*i, true); + self.bits.insert(*i); return Ok(Some(Self::full_right_row( self.left_schema_len, &probe_state.probe_tuple, diff --git a/src/execution/dql/join/hash/left_join.rs b/src/execution/dql/join/hash/left_join.rs index 109516b8..7d3a123e 100644 --- a/src/execution/dql/join/hash/left_join.rs +++ b/src/execution/dql/join/hash/left_join.rs @@ -17,15 +17,15 @@ use crate::execution::dql::join::hash::{ filter, JoinProbeState, LeftDropState, LeftDropTuples, ProbeState, }; use crate::execution::dql::join::hash_join::BuildState; +use crate::execution::dql::join::RowBitmap; use crate::expression::ScalarExpression; use crate::types::tuple::{SplitTupleRef, Tuple}; use crate::types::value::DataValue; -use fixedbitset::FixedBitSet; pub(crate) struct LeftJoinState { pub(crate) left_schema_len: usize, pub(crate) right_schema_len: usize, - pub(crate) bits: FixedBitSet, + pub(crate) bits: RowBitmap, } impl JoinProbeState for LeftJoinState { @@ -54,7 +54,7 @@ impl JoinProbeState for LeftJoinState { SplitTupleRef::from_slices(values, &probe_state.probe_tuple.values); if !filter(&full_values, filter_expr)? { probe_state.has_filtered = true; - self.bits.set(*i, true); + self.bits.insert(*i); continue; } } diff --git a/src/execution/dql/join/hash_join.rs b/src/execution/dql/join/hash_join.rs index 88dd4444..68422c3e 100644 --- a/src/execution/dql/join/hash_join.rs +++ b/src/execution/dql/join/hash_join.rs @@ -20,6 +20,7 @@ use crate::execution::dql::join::hash::right_join::RightJoinState; use crate::execution::dql::join::hash::{ JoinProbeState, JoinProbeStateImpl, LeftDropState, ProbeState, }; +use crate::execution::dql::join::RowBitmap; use crate::execution::dql::sort::BumpVec; use crate::execution::{ build_read, ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, ReadExecutor, @@ -30,9 +31,8 @@ use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple::Tuple; use crate::types::value::DataValue; -use ahash::{HashMap, HashMapExt}; use bumpalo::Bump; -use fixedbitset::FixedBitSet; +use std::collections::HashMap; use std::mem::transmute; pub struct HashJoin { @@ -203,13 +203,13 @@ impl HashJoin { JoinType::LeftOuter => JoinProbeStateImpl::Left(LeftJoinState { left_schema_len, right_schema_len, - bits: FixedBitSet::with_capacity(build_count), + bits: RowBitmap::with_capacity(build_count), }), JoinType::RightOuter => JoinProbeStateImpl::Right(RightJoinState { left_schema_len }), JoinType::Full => JoinProbeStateImpl::Full(FullJoinState { left_schema_len, right_schema_len, - bits: FixedBitSet::with_capacity(build_count), + bits: RowBitmap::with_capacity(build_count), }), JoinType::Cross => unreachable!(), } diff --git a/src/execution/dql/join/mod.rs b/src/execution/dql/join/mod.rs index d15176f2..b0165929 100644 --- a/src/execution/dql/join/mod.rs +++ b/src/execution/dql/join/mod.rs @@ -18,6 +18,40 @@ mod hash; pub(crate) mod hash_join; pub(crate) mod nested_loop_join; +pub(crate) struct RowBitmap { + blocks: Vec, + len: usize, +} + +impl RowBitmap { + pub(crate) fn with_capacity(len: usize) -> Self { + let bits_per_block = usize::BITS as usize; + let blocks = len.div_ceil(bits_per_block); + RowBitmap { + blocks: vec![0; blocks], + len, + } + } + + pub(crate) fn insert(&mut self, index: usize) { + assert!( + index < self.len, + "insert at index {index} exceeds bitmap size {}", + self.len + ); + let bits_per_block = usize::BITS as usize; + self.blocks[index / bits_per_block] |= 1usize << (index % bits_per_block); + } + + pub(crate) fn contains(&self, index: usize) -> bool { + if index >= self.len { + return false; + } + let bits_per_block = usize::BITS as usize; + (self.blocks[index / bits_per_block] & (1usize << (index % bits_per_block))) != 0 + } +} + pub fn joins_nullable(join_type: &JoinType) -> (bool, bool) { match join_type { JoinType::Inner => (false, false), @@ -27,3 +61,59 @@ pub fn joins_nullable(join_type: &JoinType) -> (bool, bool) { JoinType::Cross => (true, true), } } + +#[cfg(test)] +mod tests { + use super::RowBitmap; + + #[test] + fn row_bitmap_contains_false_for_empty_bitmap() { + let bitmap = RowBitmap::with_capacity(0); + + assert!(!bitmap.contains(0)); + assert!(!bitmap.contains(usize::MAX)); + } + + #[test] + fn row_bitmap_contains_false_for_unset_bits_within_capacity() { + let bitmap = RowBitmap::with_capacity(usize::BITS as usize + 1); + + assert!(!bitmap.contains(0)); + assert!(!bitmap.contains(usize::BITS as usize)); + } + + #[test] + fn row_bitmap_insert_marks_bits_across_blocks() { + let boundary = usize::BITS as usize; + let mut bitmap = RowBitmap::with_capacity(boundary + 2); + + bitmap.insert(0); + bitmap.insert(boundary - 1); + bitmap.insert(boundary); + bitmap.insert(boundary + 1); + + assert!(bitmap.contains(0)); + assert!(bitmap.contains(boundary - 1)); + assert!(bitmap.contains(boundary)); + assert!(bitmap.contains(boundary + 1)); + assert!(!bitmap.contains(1)); + } + + #[test] + fn row_bitmap_contains_returns_false_past_capacity() { + let mut bitmap = RowBitmap::with_capacity(2); + + bitmap.insert(1); + + assert!(!bitmap.contains(2)); + assert!(!bitmap.contains(usize::MAX)); + } + + #[test] + #[should_panic(expected = "exceeds bitmap size")] + fn row_bitmap_insert_panics_past_capacity() { + let mut bitmap = RowBitmap::with_capacity(1); + + bitmap.insert(1); + } +} diff --git a/src/execution/dql/join/nested_loop_join.rs b/src/execution/dql/join/nested_loop_join.rs index f4e348df..f47a56a3 100644 --- a/src/execution/dql/join/nested_loop_join.rs +++ b/src/execution/dql/join/nested_loop_join.rs @@ -16,17 +16,17 @@ //! [`JoinType::RightOuter`], [`JoinType::Cross`], [`JoinType::Full`]. use crate::errors::DatabaseError; +use crate::execution::dql::join::RowBitmap; use crate::execution::{ build_read, ExecArena, ExecId, ExecNode, ExecutionContext, ExecutorNode, ReadExecutor, }; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple::{SplitTupleRef, Tuple}; use crate::types::value::DataValue; -use fixedbitset::FixedBitSet; -use itertools::Itertools; /// Equivalent condition struct EqualCondition { @@ -76,15 +76,15 @@ pub struct NestedLoopJoin { enum NestedLoopJoinState { PullLeft { - right_bitmap: Option, + right_bitmap: Option, }, ScanRight { active_left: ActiveLeftState, - right_bitmap: Option, + right_bitmap: Option, }, EmitRightUnmatched { right_input: ExecId, - right_bitmap: FixedBitSet, + right_bitmap: Option, right_emit_index: usize, }, End, @@ -178,7 +178,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for NestedLoopJoin { if matches!(self.ty, JoinType::Full) { state = NestedLoopJoinState::EmitRightUnmatched { right_input: self.build_right_input(arena, plan_arena), - right_bitmap: right_bitmap.unwrap_or_default(), + right_bitmap, right_emit_index: 0, }; continue; @@ -288,7 +288,7 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for NestedLoopJoin { bits.insert(idx); } } else { - let mut bits = FixedBitSet::with_capacity(active_left.right_index); + let mut bits = RowBitmap::with_capacity(active_left.right_index); for idx in active_left.first_matches { bits.insert(idx); } @@ -338,7 +338,12 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for NestedLoopJoin { let idx = right_emit_index; right_emit_index += 1; - if !right_bitmap.contains(idx) { + let is_unmatched = match right_bitmap.as_ref() { + Some(bits) => !bits.contains(idx), + None => true, + }; + + if is_unmatched { let mut values = vec![DataValue::Null; self.eq_cond.left_len]; values.append(&mut right_tuple.values); self.state = NestedLoopJoinState::EmitRightUnmatched { diff --git a/src/execution/dql/mod.rs b/src/execution/dql/mod.rs index e271df5e..774c44e5 100644 --- a/src/execution/dql/mod.rs +++ b/src/execution/dql/mod.rs @@ -36,8 +36,8 @@ pub(crate) mod values; #[cfg(all(test, not(target_arch = "wasm32")))] pub(crate) mod test { + use crate::iter_ext::Itertools; use crate::types::value::DataValue; - use itertools::Itertools; pub(crate) fn build_integers(ints: Vec>) -> Vec { ints.into_iter() diff --git a/src/execution/dql/set_membership.rs b/src/execution/dql/set_membership.rs index 88e4f4f5..f61d5600 100644 --- a/src/execution/dql/set_membership.rs +++ b/src/execution/dql/set_membership.rs @@ -20,7 +20,7 @@ use crate::planner::operator::set_membership::SetMembershipKind; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple::Tuple; -use ahash::{HashMap, HashMapExt}; +use std::collections::HashMap; pub struct SetMembership { kind: SetMembershipKind, diff --git a/src/expression/function/table.rs b/src/expression/function/table.rs index 3c77c831..b14a6b58 100644 --- a/src/expression/function/table.rs +++ b/src/expression/function/table.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::catalog::TableName; use crate::errors::DatabaseError; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; @@ -68,12 +67,7 @@ pub trait TableFunctionImpl: Debug + Send + Sync { fn summary(&self) -> &FunctionSummary; - fn output_schema_into( - &self, - table_name: &TableName, - table_arena: &mut TableArena, - schema: &mut Schema, - ); + fn output_schema_into(&self, table_arena: &mut TableArena, schema: &mut Schema); } impl TableFunction { diff --git a/src/expression/mod.rs b/src/expression/mod.rs index accd0040..17ac3f6f 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -19,6 +19,7 @@ use crate::expression::function::scala::ScalarFunction; use crate::expression::function::table::TableFunction; use crate::expression::visitor::{walk_expr, Visitor}; use crate::expression::visitor_mut::VisitorMut; +use crate::iter_ext::Itertools; use crate::planner::operator::sort::SortField; use crate::planner::{MetaArena, PlanArena}; use crate::types::evaluator::{ @@ -27,7 +28,6 @@ use crate::types::evaluator::{ }; use crate::types::value::DataValue; use crate::types::{CharLengthUnits, LogicalType}; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; #[cfg(feature = "decimal")] use rust_decimal::Decimal; @@ -1133,11 +1133,7 @@ mod test { let mut table_functions = TableFunctions::default(); let numbers = Numbers::new(); let mut schema = Vec::new(); - numbers.output_schema_into( - &numbers.summary().name, - table_arena.borrow_mut(), - &mut schema, - ); + numbers.output_schema_into(table_arena.borrow_mut(), &mut schema); table_functions.insert( numbers.summary().clone(), TableFunctionCatalog { diff --git a/src/expression/range_detacher.rs b/src/expression/range_detacher.rs index 9a60a0ad..e5212e88 100644 --- a/src/expression/range_detacher.rs +++ b/src/expression/range_detacher.rs @@ -15,10 +15,10 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::{BinaryOperator, ScalarExpression}; +use crate::iter_ext::Itertools; use crate::planner::PlanArena; use crate::types::value::DataValue; use crate::types::ColumnId; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::cmp::Ordering; use std::collections::Bound; diff --git a/src/expression/visitor.rs b/src/expression/visitor.rs index 3c2047c9..c897eb1b 100644 --- a/src/expression/visitor.rs +++ b/src/expression/visitor.rs @@ -265,7 +265,6 @@ pub trait Visitor<'a>: Sized { } } -#[recursive::recursive] pub fn walk_expr<'a, V: Visitor<'a>>( visitor: &mut V, expr: &'a ScalarExpression, diff --git a/src/expression/visitor_mut.rs b/src/expression/visitor_mut.rs index f00377ed..96d5f07c 100644 --- a/src/expression/visitor_mut.rs +++ b/src/expression/visitor_mut.rs @@ -288,7 +288,6 @@ pub trait VisitorMut<'a>: Sized { } } -#[recursive::recursive] pub fn walk_mut_expr<'a, V: VisitorMut<'a>>( visitor: &mut V, expr: &'a mut ScalarExpression, diff --git a/src/function/numbers.rs b/src/function/numbers.rs index 7a84abd1..25d48491 100644 --- a/src/function/numbers.rs +++ b/src/function/numbers.rs @@ -14,7 +14,6 @@ use crate::catalog::ColumnCatalog; use crate::catalog::ColumnDesc; -use crate::catalog::TableName; use crate::errors::DatabaseError; use crate::expression::function::table::TableFunctionImpl; use crate::expression::function::FunctionSummary; @@ -67,19 +66,11 @@ impl TableFunctionImpl for Numbers { &self.summary } - fn output_schema_into( - &self, - table_name: &TableName, - table_arena: &mut TableArena, - schema: &mut Schema, - ) { - schema.push(table_arena.alloc_table_column( - table_name.clone(), - ColumnCatalog::new( - "number".to_string(), - true, - ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(), - ), - )); + fn output_schema_into(&self, table_arena: &mut TableArena, schema: &mut Schema) { + schema.push(table_arena.alloc_column(ColumnCatalog::new( + "number".to_string(), + true, + ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(), + ))); } } diff --git a/src/iter_ext.rs b/src/iter_ext.rs new file mode 100644 index 00000000..23cb7720 --- /dev/null +++ b/src/iter_ext.rs @@ -0,0 +1,56 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; + +pub(crate) trait Itertools: Iterator + Sized { + fn collect_vec(self) -> Vec { + self.collect() + } + + fn join(self, separator: &str) -> String + where + Self::Item: Display, + { + let mut output = String::new(); + + for (index, item) in self.enumerate() { + if index > 0 { + output.push_str(separator); + } + output.push_str(&item.to_string()); + } + + output + } + + fn sorted_by_key(self, f: F) -> std::vec::IntoIter + where + K: Ord, + F: FnMut(&Self::Item) -> K, + { + let mut values = self.collect_vec(); + values.sort_by_key(f); + values.into_iter() + } + + fn try_collect(self) -> Result, E> + where + Self: Iterator>, + { + self.collect() + } +} + +impl Itertools for I where I: Iterator {} diff --git a/src/lib.rs b/src/lib.rs index 9d5e4235..715f3044 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,6 +110,7 @@ pub mod errors; pub mod execution; pub mod expression; mod function; +pub(crate) mod iter_ext; #[cfg(feature = "macros")] pub mod macros; mod optimizer; @@ -123,7 +124,7 @@ pub mod python; pub mod serdes; pub mod storage; pub mod types; -#[cfg(target_arch = "wasm32")] +#[cfg(all(target_arch = "wasm32", feature = "wasm"))] pub mod wasm; #[cfg(feature = "orm")] diff --git a/src/macros/mod.rs b/src/macros/mod.rs index 7e8f5607..fb67e4ab 100644 --- a/src/macros/mod.rs +++ b/src/macros/mod.rs @@ -188,15 +188,9 @@ macro_rules! table_function { &self.summary } - fn output_schema_into( - &self, - table_name: &::kite_sql::catalog::table::TableName, - table_arena: &mut ::kite_sql::planner::TableArena, - schema: &mut ::kite_sql::types::tuple::Schema, - ) { + fn output_schema_into(&self, table_arena: &mut ::kite_sql::planner::TableArena, schema: &mut ::kite_sql::types::tuple::Schema) { $({ - schema.push(table_arena.alloc_table_column( - table_name.clone(), + schema.push(table_arena.alloc_column( ::kite_sql::catalog::column::ColumnCatalog::new(stringify!($output_name).to_lowercase(), true, ::kite_sql::catalog::column::ColumnDesc::new($output_ty, None, false, None).unwrap()), )); })* diff --git a/src/optimizer/core/cm_sketch.rs b/src/optimizer/core/cm_sketch.rs index 68ede2d7..122daaab 100644 --- a/src/optimizer/core/cm_sketch.rs +++ b/src/optimizer/core/cm_sketch.rs @@ -14,18 +14,17 @@ use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; +use crate::serdes::stable_hash::{StableHasher, CM_SKETCH_HASH_KEYS}; use crate::serdes::{ReferenceSerialization, ReferenceTables}; use crate::storage::Transaction; use crate::types::value::DataValue; use kite_sql_serde_macros::ReferenceSerialization; -use siphasher::sip::SipHasher13; use std::borrow::Borrow; use std::hash::{Hash, Hasher}; use std::io::{Read, Write}; use std::marker::PhantomData; use std::{cmp, mem}; -pub(crate) type FastHasher = SipHasher13; pub(crate) const COUNT_MIN_SKETCH_STORAGE_PAGE_LEN: usize = 16 * 1024; #[derive(Debug, Clone, ReferenceSerialization)] @@ -33,8 +32,8 @@ pub struct CountMinSketchMeta { width: usize, k_num: usize, page_len: usize, - hasher_0: FastHasher, - hasher_1: FastHasher, + hasher_0: StableHasher, + hasher_1: StableHasher, } impl CountMinSketchMeta { @@ -77,7 +76,7 @@ pub struct CountMinSketchPage { pub struct CountMinSketch { counters: Vec>, offsets: Vec, - hashers: [FastHasher; 2], + hashers: [StableHasher; 2], mask: usize, k_num: usize, phantom_k: PhantomData, @@ -219,7 +218,7 @@ impl CountMinSketch { let k_num = Self::optimal_k_num(probability); let counters = vec![vec![0; width]; k_num]; let offsets = vec![0; k_num]; - let hashers = [Self::sip_new(), Self::sip_new()]; + let hashers = Self::new_hashers(); CountMinSketch { counters, offsets, @@ -290,7 +289,7 @@ impl CountMinSketch { *counter = 0 } } - self.hashers = [Self::sip_new(), Self::sip_new()]; + self.hashers = Self::new_hashers(); } fn optimal_width(capacity: usize, tolerance: f64) -> usize { @@ -311,8 +310,12 @@ impl CountMinSketch { cmp::max(1, ((1.0 - probability).ln() / 0.5f64.ln()) as usize) } - fn sip_new() -> FastHasher { - FastHasher::new_with_keys(0, 1) + fn new_hashers() -> [StableHasher; 2] { + let [(key0, key1), (key2, key3)] = CM_SKETCH_HASH_KEYS; + [ + StableHasher::new_with_keys(key0, key1), + StableHasher::new_with_keys(key2, key3), + ] } fn offset(&self, hashes: &mut [u64; 2], key: &Q, k_i: usize) -> usize @@ -363,8 +366,8 @@ impl ReferenceSerialization for CountMinSketch { ) -> Result { let counters = Vec::>::decode(reader, drive, reference_tables, arena)?; let offsets = Vec::::decode(reader, drive, reference_tables, arena)?; - let hasher_0 = FastHasher::decode(reader, drive, reference_tables, arena)?; - let hasher_1 = FastHasher::decode(reader, drive, reference_tables, arena)?; + let hasher_0 = StableHasher::decode(reader, drive, reference_tables, arena)?; + let hasher_1 = StableHasher::decode(reader, drive, reference_tables, arena)?; let mask = usize::decode(reader, drive, reference_tables, arena)?; let k_num = usize::decode(reader, drive, reference_tables, arena)?; diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs index cfbbda7a..2d89a177 100644 --- a/src/optimizer/core/histogram.rs +++ b/src/optimizer/core/histogram.rs @@ -669,12 +669,11 @@ mod tests { use crate::types::value::DataValue; use crate::types::LogicalType; use std::ops::Bound; - use ulid::Ulid; fn index_meta() -> IndexMeta { IndexMeta { id: 0, - column_ids: vec![Ulid::new()], + column_ids: vec![1], table_name: "t1".to_string().into(), pk_ty: LogicalType::Integer, value_ty: LogicalType::Integer, @@ -969,7 +968,7 @@ mod tests { &sketch, )?; - assert_eq!(count_7, 13); + assert_eq!(count_7, 14); let count_8 = histogram.collect_count( &[Range::Scope { @@ -999,7 +998,7 @@ mod tests { &sketch, )?; - assert_eq!(count_10, 2); + assert_eq!(count_10, 3); let count_11 = histogram.collect_count( &[Range::Scope { diff --git a/src/optimizer/core/statistics_meta.rs b/src/optimizer/core/statistics_meta.rs index 468854d3..410e51c7 100644 --- a/src/optimizer/core/statistics_meta.rs +++ b/src/optimizer/core/statistics_meta.rs @@ -147,13 +147,12 @@ mod tests { use crate::types::value::DataValue; use crate::types::LogicalType; use std::sync::Arc; - use ulid::Ulid; #[test] fn test_into_parts_and_from_parts() -> Result<(), DatabaseError> { let index = IndexMeta { id: 0, - column_ids: vec![Ulid::new()], + column_ids: vec![1], table_name: "t1".to_string().into(), pk_ty: LogicalType::Integer, value_ty: LogicalType::Integer, diff --git a/src/optimizer/rule/normalization/agg_elimination.rs b/src/optimizer/rule/normalization/agg_elimination.rs index 3e839962..99c806bd 100644 --- a/src/optimizer/rule/normalization/agg_elimination.rs +++ b/src/optimizer/rule/normalization/agg_elimination.rs @@ -396,9 +396,9 @@ mod tests { use crate::planner::{Childrens, LogicalPlan}; use crate::types::index::{IndexInfo, IndexLookup, IndexMeta, IndexType}; use crate::types::value::DataValue; + use crate::types::ColumnId; use crate::types::LogicalType; use std::ops::Bound; - use ulid::Ulid; fn make_sort_field(arena: &mut crate::planner::PlanArena, name: &str) -> SortField { make_sort_field_with_position(arena, name, 0) } @@ -459,7 +459,7 @@ mod tests { let table_name: TableName = ::std::sync::Arc::from("t1"); let meta = arena.alloc_index(IndexMeta { id: 1, - column_ids: (0..len).map(|_| Ulid::new()).collect(), + column_ids: (1..=len as ColumnId).collect(), table_name, pk_ty: LogicalType::Integer, value_ty: LogicalType::Integer, @@ -485,7 +485,7 @@ mod tests { ) -> (LogicalPlan, SortOption) { let table_name: TableName = ::std::sync::Arc::from("t1"); let c1 = arena.alloc_column(ColumnCatalog::new_dummy("c1".to_string())); - let c1_id = Ulid::new(); + let c1_id = 1; let columns = vec![c1]; let sort_fields = vec![SortField::new( diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index af0e49bb..af5bc125 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -16,6 +16,7 @@ use crate::errors::DatabaseError; use crate::expression::range_detacher::{Range, RangeDetacher}; use crate::expression::visitor_mut::{PositionShift, VisitorMut}; use crate::expression::{BinaryOperator, ScalarExpression}; +use crate::iter_ext::Itertools; use crate::optimizer::core::rule::NormalizationRule; use crate::optimizer::plan_utils::{replace_with_only_child, wrap_child_with}; use crate::planner::operator::filter::FilterOperator; @@ -25,7 +26,6 @@ use crate::planner::{Childrens, LogicalPlan}; use crate::types::index::{IndexInfo, IndexLookup, IndexMetaRef, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; -use itertools::Itertools; use std::ops::Bound; use std::{mem, slice}; @@ -517,7 +517,6 @@ mod tests { use crate::types::value::DataValue; use crate::types::LogicalType; use std::collections::Bound; - use ulid::Ulid; fn apply_pipeline( plan: LogicalPlan, @@ -574,9 +573,9 @@ mod tests { let table_name: TableName = ::std::sync::Arc::from("mock_table"); let table_arena = crate::planner::TableArenaCell::default(); let mut arena = PlanArena::new(&table_arena); - let c1_id = Ulid::new(); - let c2_id = Ulid::new(); - let c3_id = Ulid::new(); + let c1_id = 1; + let c2_id = 2; + let c3_id = 3; let mut c1 = ColumnCatalog::new( "c1".to_string(), diff --git a/src/planner/arena.rs b/src/planner/arena.rs index dfdf4d98..a623ce35 100644 --- a/src/planner/arena.rs +++ b/src/planner/arena.rs @@ -163,16 +163,7 @@ impl TableArena { .unwrap_or_else(|| panic!("unknown dummy column: {name}")) } - pub fn alloc_table_column( - &mut self, - table_name: TableName, - mut column: ColumnCatalog, - ) -> ColumnRef { - column.set_ref_table(table_name, ulid::Ulid::new(), false); - self.alloc_column(column) - } - - pub(crate) fn alloc_column(&mut self, column: ColumnCatalog) -> ColumnRef { + pub fn alloc_column(&mut self, column: ColumnCatalog) -> ColumnRef { ::alloc_column(self, column) } @@ -548,7 +539,7 @@ mod tests { fn table_column(name: &str, is_temp: bool) -> ColumnCatalog { let mut column = column(name); - column.set_ref_table("t".to_string().into(), ulid::Ulid::new(), is_temp); + column.set_ref_table("t".to_string().into(), if is_temp { 0 } else { 1 }, is_temp); column } diff --git a/src/planner/operator/aggregate.rs b/src/planner/operator/aggregate.rs index c59c9142..a8b836b6 100644 --- a/src/planner/operator/aggregate.rs +++ b/src/planner/operator/aggregate.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::iter_ext::Itertools; use crate::planner::{Childrens, LogicalPlan}; use crate::{expression::ScalarExpression, planner::operator::Operator}; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/copy_from_file.rs b/src/planner/operator/copy_from_file.rs index 75439bc9..51836e59 100644 --- a/src/planner/operator/copy_from_file.rs +++ b/src/planner/operator/copy_from_file.rs @@ -14,8 +14,8 @@ use crate::binder::copy::ExtSource; use crate::catalog::TableName; +use crate::iter_ext::Itertools; use crate::types::tuple::Schema; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/create_index.rs b/src/planner/operator/create_index.rs index 5ffa50da..92622660 100644 --- a/src/planner/operator/create_index.rs +++ b/src/planner/operator/create_index.rs @@ -13,8 +13,8 @@ // limitations under the License. use crate::catalog::{ColumnRef, TableName}; +use crate::iter_ext::Itertools; use crate::types::index::IndexType; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/create_table.rs b/src/planner/operator/create_table.rs index e6c79103..c1e93eb1 100644 --- a/src/planner/operator/create_table.rs +++ b/src/planner/operator/create_table.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::catalog::{ColumnCatalog, TableName}; -use itertools::Itertools; +use crate::iter_ext::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/join.rs b/src/planner/operator/join.rs index 93cb3346..528b88dd 100644 --- a/src/planner/operator/join.rs +++ b/src/planner/operator/join.rs @@ -14,8 +14,8 @@ use super::Operator; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::planner::{Childrens, LogicalPlan}; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/project.rs b/src/planner/operator/project.rs index 003614b8..a1af3b05 100644 --- a/src/planner/operator/project.rs +++ b/src/planner/operator/project.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::expression::ScalarExpression; -use itertools::Itertools; +use crate::iter_ext::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/set_membership.rs b/src/planner/operator/set_membership.rs index 0799eec1..34ad6fb7 100644 --- a/src/planner/operator/set_membership.rs +++ b/src/planner/operator/set_membership.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::iter_ext::Itertools; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::types::tuple::Schema; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/sort.rs b/src/planner/operator/sort.rs index 5b842a0e..2e51e5ff 100644 --- a/src/planner/operator/sort.rs +++ b/src/planner/operator/sort.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::expression::ScalarExpression; -use itertools::Itertools; +use crate::iter_ext::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/table_scan.rs b/src/planner/operator/table_scan.rs index 84853d20..b20c19ae 100644 --- a/src/planner/operator/table_scan.rs +++ b/src/planner/operator/table_scan.rs @@ -16,11 +16,11 @@ use super::{Operator, SortOption}; use crate::catalog::{ColumnRef, TableCatalog, TableName}; use crate::errors::DatabaseError; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::planner::operator::sort::SortField; use crate::planner::{Childrens, LogicalPlan, PlanArena}; use crate::storage::Bounds; use crate::types::index::IndexInfo; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/top_k.rs b/src/planner/operator/top_k.rs index d1037400..47364cae 100644 --- a/src/planner/operator/top_k.rs +++ b/src/planner/operator/top_k.rs @@ -13,9 +13,9 @@ // limitations under the License. use super::Operator; +use crate::iter_ext::Itertools; use crate::planner::operator::sort::SortField; use crate::planner::{Childrens, LogicalPlan}; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/union.rs b/src/planner/operator/union.rs index e3f7838f..748fe13f 100644 --- a/src/planner/operator/union.rs +++ b/src/planner/operator/union.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::iter_ext::Itertools; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::types::tuple::Schema; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/update.rs b/src/planner/operator/update.rs index 91b735b4..4f668b73 100644 --- a/src/planner/operator/update.rs +++ b/src/planner/operator/update.rs @@ -14,7 +14,7 @@ use crate::catalog::{ColumnRef, TableName}; use crate::expression::ScalarExpression; -use itertools::Itertools; +use crate::iter_ext::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/planner/operator/values.rs b/src/planner/operator/values.rs index 9140713d..91e1b51c 100644 --- a/src/planner/operator/values.rs +++ b/src/planner/operator/values.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::iter_ext::Itertools; use crate::types::tuple::Schema; use crate::types::value::DataValue; -use itertools::Itertools; use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; diff --git a/src/serdes/column.rs b/src/serdes/column.rs index 71092fa4..682c6810 100644 --- a/src/serdes/column.rs +++ b/src/serdes/column.rs @@ -145,7 +145,6 @@ pub(crate) mod test { use crate::types::LogicalType; use std::io::{Cursor, Seek, SeekFrom}; use tempfile::TempDir; - use ulid::Ulid; #[test] fn test_column_serialization() -> Result<(), DatabaseError> { @@ -246,7 +245,7 @@ pub(crate) mod test { let summary = ColumnSummary { name: "c1".to_string(), relation: ColumnRelation::Table { - column_id: Ulid::new(), + column_id: 1, table_name: "t1".to_string().into(), is_temp: false, }, @@ -285,7 +284,7 @@ pub(crate) mod test { assert_eq!(none_relation, decode_relation); cursor.seek(SeekFrom::Start(0))?; let table_relation = ColumnRelation::Table { - column_id: Ulid::new(), + column_id: 1, table_name: "t1".to_string().into(), is_temp: false, }; diff --git a/src/serdes/hasher.rs b/src/serdes/hasher.rs index d527dec1..ae4bed8d 100644 --- a/src/serdes/hasher.rs +++ b/src/serdes/hasher.rs @@ -13,11 +13,11 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::optimizer::core::cm_sketch::FastHasher; +use crate::serdes::stable_hash::StableHasher; use crate::serdes::{ReferenceSerialization, ReferenceTables}; use crate::storage::Transaction; -impl ReferenceSerialization for FastHasher { +impl ReferenceSerialization for StableHasher { fn encode( &self, writer: &mut W, @@ -39,6 +39,6 @@ impl ReferenceSerialization for FastHasher { let key0 = u64::decode(reader, drive, reference_tables, arena)?; let key1 = u64::decode(reader, drive, reference_tables, arena)?; - Ok(FastHasher::new_with_keys(key0, key1)) + Ok(StableHasher::new_with_keys(key0, key1)) } } diff --git a/src/serdes/mod.rs b/src/serdes/mod.rs index 777f2e8c..1f5ca2fc 100644 --- a/src/serdes/mod.rs +++ b/src/serdes/mod.rs @@ -30,9 +30,9 @@ mod path_buf; mod phantom; mod ptr; mod slice; +pub(crate) mod stable_hash; mod string; mod trim; -mod ulid; mod vec; use crate::catalog::TableName; diff --git a/src/serdes/stable_hash.rs b/src/serdes/stable_hash.rs new file mode 100644 index 00000000..052b69f2 --- /dev/null +++ b/src/serdes/stable_hash.rs @@ -0,0 +1,155 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::hash::Hasher; + +const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325; +const FNV_PRIME: u64 = 0x0000_0100_0000_01b3; + +pub(crate) const TABLE_NAME_HASH_KEYS: (u64, u64) = (0x9e37_79b9_7f4a_7c15, 0xbf58_476d_1ce4_e5b9); +pub(crate) const CM_SKETCH_HASH_KEYS: [(u64, u64); 2] = [ + (0x94d0_49bb_1331_11eb, 0xd6e8_feb8_6659_fd93), + (0xa076_1d64_78bd_642f, 0xe703_7ed1_a0b4_28db), +]; + +#[derive(Debug, Clone, Copy)] +pub(crate) struct StableHasher { + key0: u64, + key1: u64, + hash: u64, +} + +impl StableHasher { + pub(crate) fn new_with_keys(key0: u64, key1: u64) -> Self { + StableHasher { + key0, + key1, + hash: mix64(FNV_OFFSET ^ key0).wrapping_add(mix64(key1)), + } + } + + pub(crate) fn keys(&self) -> (u64, u64) { + (self.key0, self.key1) + } +} + +impl Hasher for StableHasher { + fn finish(&self) -> u64 { + mix64(self.hash ^ self.key0.rotate_left(17) ^ self.key1.rotate_right(11)) + } + + fn write(&mut self, bytes: &[u8]) { + for byte in bytes { + self.hash ^= u64::from(*byte); + self.hash = self.hash.wrapping_mul(FNV_PRIME); + } + } + + fn write_u8(&mut self, i: u8) { + self.write(&[i]); + } + + fn write_u16(&mut self, i: u16) { + self.write(&i.to_le_bytes()); + } + + fn write_u32(&mut self, i: u32) { + self.write(&i.to_le_bytes()); + } + + fn write_u64(&mut self, i: u64) { + self.write(&i.to_le_bytes()); + } + + fn write_u128(&mut self, i: u128) { + self.write(&i.to_le_bytes()); + } + + fn write_usize(&mut self, i: usize) { + self.write_u64(i as u64); + } + + fn write_i8(&mut self, i: i8) { + self.write_u8(i as u8); + } + + fn write_i16(&mut self, i: i16) { + self.write(&i.to_le_bytes()); + } + + fn write_i32(&mut self, i: i32) { + self.write(&i.to_le_bytes()); + } + + fn write_i64(&mut self, i: i64) { + self.write(&i.to_le_bytes()); + } + + fn write_i128(&mut self, i: i128) { + self.write(&i.to_le_bytes()); + } + + fn write_isize(&mut self, i: isize) { + self.write_i64(i as i64); + } +} + +fn mix64(mut value: u64) -> u64 { + value ^= value >> 30; + value = value.wrapping_mul(0xbf58_476d_1ce4_e5b9); + value ^= value >> 27; + value = value.wrapping_mul(0x94d0_49bb_1331_11eb); + value ^ (value >> 31) +} + +#[cfg(test)] +mod tests { + use super::StableHasher; + use std::hash::{Hash, Hasher}; + + #[test] + fn stable_hasher_is_deterministic_for_same_keys() { + let mut left = StableHasher::new_with_keys(1, 2); + let mut right = StableHasher::new_with_keys(1, 2); + + "kite".hash(&mut left); + "kite".hash(&mut right); + + assert_eq!(left.finish(), right.finish()); + assert_eq!(left.keys(), (1, 2)); + } + + #[test] + fn stable_hasher_keys_change_hash_output() { + let mut left = StableHasher::new_with_keys(1, 2); + let mut right = StableHasher::new_with_keys(2, 1); + + "kite".hash(&mut left); + "kite".hash(&mut right); + + assert_ne!(left.finish(), right.finish()); + } + + #[test] + fn stable_hasher_write_chunks_match_single_write() { + let mut chunked = StableHasher::new_with_keys(0, 1); + chunked.write(b"ki"); + chunked.write(b"te"); + + let mut single = StableHasher::new_with_keys(0, 1); + single.write(b"kite"); + + assert_eq!(chunked.finish(), single.finish()); + } +} diff --git a/src/serdes/ulid.rs b/src/serdes/ulid.rs deleted file mode 100644 index e0cc7da9..00000000 --- a/src/serdes/ulid.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2024 KipData/KiteSQL -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::errors::DatabaseError; -use crate::serdes::{ReferenceSerialization, ReferenceTables}; -use crate::storage::Transaction; -use std::io::{Read, Write}; -use ulid::Ulid; - -impl ReferenceSerialization for Ulid { - fn encode( - &self, - writer: &mut W, - _: bool, - _: &mut ReferenceTables, - _: &A, - ) -> Result<(), DatabaseError> { - writer.write_all(&self.to_bytes()[..])?; - - Ok(()) - } - - fn decode( - reader: &mut R, - _: Option<&crate::serdes::ReferenceDecodeContext<'_, T>>, - _: &ReferenceTables, - _: &mut A, - ) -> Result { - let mut buf = [0u8; 16]; - reader.read_exact(&mut buf)?; - - Ok(Ulid::from_bytes(buf)) - } -} diff --git a/src/storage/memory.rs b/src/storage/memory.rs index 61ab6434..5e329126 100644 --- a/src/storage/memory.rs +++ b/src/storage/memory.rs @@ -150,7 +150,7 @@ impl Transaction for MemoryTransaction { } } -#[cfg(all(test, target_arch = "wasm32"))] +#[cfg(all(test, target_arch = "wasm32", feature = "wasm"))] mod wasm_tests { use super::*; use crate::catalog::{ColumnCatalog, ColumnDesc, TableName}; diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 40740d17..f45045ad 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -25,6 +25,7 @@ use crate::db::{ScalaFunctions, TableFunctions}; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; use crate::expression::ScalarExpression; +use crate::iter_ext::Itertools; use crate::optimizer::core::cm_sketch::{ CountMinSketch, CountMinSketchPage, COUNT_MIN_SKETCH_STORAGE_PAGE_LEN, }; @@ -38,10 +39,8 @@ use crate::types::serialize::TupleValueSerializableImpl; use crate::types::tuple::{Tuple, TupleId}; use crate::types::value::{DataValue, TupleMappingRef}; use crate::types::{ColumnId, LogicalType}; -use ahash::HashMap; -use itertools::Itertools; use std::borrow::{Borrow, Cow}; -use std::collections::Bound; +use std::collections::{Bound, HashMap}; use std::fmt::{self, Display, Formatter}; use std::io::Cursor; use std::mem; @@ -49,7 +48,6 @@ use std::ops::SubAssign; use std::path::Path; pub type KeyValueRef<'a> = (&'a [u8], &'a [u8]); -use ulid::Generator; pub(crate) type StatisticsMetaCache = HashMap<(TableName, IndexId), StatisticsMeta>; pub(crate) type TableCache = HashMap; @@ -568,8 +566,7 @@ pub trait Transaction: Sized { }; } } - let mut generator = Generator::new(); - let col_id = table.add_column(column.clone(), &mut generator, plan_arena)?; + let col_id = table.add_column(column.clone(), plan_arena)?; if column.desc().is_unique() { let meta_ref = table.add_index_meta( diff --git a/src/storage/rocksdb.rs b/src/storage/rocksdb.rs index f31b9f68..e2fc0dd7 100644 --- a/src/storage/rocksdb.rs +++ b/src/storage/rocksdb.rs @@ -763,6 +763,7 @@ mod test { use crate::db::{CatalogKind, DataBaseBuilder}; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; + use crate::iter_ext::Itertools; use crate::planner::{PlanArena, TableArenaCell}; use crate::storage::rocksdb::RocksStorage; use crate::storage::table_codec::TableCodec; @@ -774,7 +775,6 @@ mod test { use crate::types::tuple::Tuple; use crate::types::value::DataValue; use crate::types::LogicalType; - use itertools::Itertools; use std::collections::Bound; use tempfile::TempDir; diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index 91a2de7a..1b6b9a42 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -20,6 +20,7 @@ use crate::optimizer::core::cm_sketch::{CountMinSketchMeta, CountMinSketchPage}; use crate::optimizer::core::histogram::Bucket; use crate::optimizer::core::statistics_meta::StatisticsMetaRoot; use crate::planner::{MetaArena, TableArena}; +use crate::serdes::stable_hash::{StableHasher, TABLE_NAME_HASH_KEYS}; use crate::serdes::{ReferenceDecodeContext, ReferenceSerialization, ReferenceTables}; use crate::storage::{TableCache, Transaction}; use crate::types::index::{Index, IndexId, IndexMeta, IndexType, INDEX_ID_LEN}; @@ -27,7 +28,6 @@ use crate::types::serialize::TupleValueSerializableImpl; use crate::types::tuple::{Tuple, TupleId}; use crate::types::value::{DataValue, TupleMappingRef}; use crate::types::LogicalType; -use siphasher::sip::SipHasher; use std::borrow::Borrow; use std::hash::{Hash, Hasher}; use std::io::{Cursor, Read, Seek, SeekFrom}; @@ -132,7 +132,8 @@ impl TableCodec { } fn hash_bytes(table_name: &str) -> [u8; 8] { - let mut hasher = SipHasher::new(); + let (key0, key1) = TABLE_NAME_HASH_KEYS; + let mut hasher = StableHasher::new_with_keys(key0, key1); table_name.hash(&mut hasher); hasher.finish().to_le_bytes() } @@ -427,7 +428,7 @@ impl TableCodec { self.with_table_hash_buffers(table_name.as_ref(), |lower, table_hash, value, refs| { Self::write_key_prefix(lower, CodecType::Column, table_hash); lower.push(BOUND_MIN_TAG); - lower.extend_from_slice(&column_id.to_bytes()); + lower.extend_from_slice(&column_id.to_be_bytes()); if encode_value { let _ = refs.push_or_replace(table_name); @@ -1002,6 +1003,7 @@ mod tests { use crate::catalog::view::View; use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRelation, TableCatalog, TableMeta}; use crate::errors::DatabaseError; + use crate::iter_ext::Itertools; use crate::optimizer::core::histogram::HistogramBuilder; use crate::optimizer::core::statistics_meta::StatisticsMeta; use crate::planner::{PlanArena, TableArenaCell}; @@ -1012,14 +1014,13 @@ mod tests { use crate::types::index::{Index, IndexMeta, IndexType}; use crate::types::tuple::Tuple; use crate::types::value::DataValue; + use crate::types::ColumnId; use crate::types::LogicalType; - use itertools::Itertools; use rust_decimal::Decimal; use std::collections::BTreeSet; use std::io::Cursor; use std::ops::Bound; use std::sync::Arc; - use ulid::Ulid; fn build_table_codec(table_arena: &TableArenaCell) -> TableCatalog { let columns = vec![ @@ -1094,7 +1095,7 @@ mod tests { let mut table_codec = TableCodec::default(); let index_meta = IndexMeta { id: 0, - column_ids: vec![Ulid::new()], + column_ids: vec![1], table_name: "t1".to_string().into(), pk_ty: LogicalType::Integer, value_ty: LogicalType::Integer, @@ -1171,7 +1172,7 @@ mod tests { let mut table_codec = TableCodec::default(); let index_meta = IndexMeta { id: 0, - column_ids: vec![Ulid::new()], + column_ids: vec![1], table_name: "t1".to_string().into(), pk_ty: LogicalType::Integer, value_ty: LogicalType::Integer, @@ -1207,7 +1208,7 @@ mod tests { ColumnDesc::new(LogicalType::Boolean, None, false, None).unwrap(), ); col.summary_mut().relation = ColumnRelation::Table { - column_id: Ulid::new(), + column_id: 1, table_name: "t1".to_string().into(), is_temp: false, }; @@ -1376,7 +1377,7 @@ mod tests { ); col.summary_mut().relation = ColumnRelation::Table { - column_id: Ulid::from(col_id as u128), + column_id: col_id as ColumnId, table_name: table_name.to_string().into(), is_temp: false, }; @@ -1674,8 +1675,8 @@ mod tests { .collect_vec(); assert_eq!(vec[0], &op("T0")); - assert_eq!(vec[1], &op("T1")); - assert_eq!(vec[2], &op("T2")); + assert_eq!(vec[1], &op("T2")); + assert_eq!(vec[2], &op("T1")); } #[test] @@ -1711,8 +1712,8 @@ mod tests { )) .collect_vec(); + assert_eq!(vec[0], &op("V2")); + assert_eq!(vec[1], &op("V1")); assert_eq!(vec[2], &op("V0")); - assert_eq!(vec[0], &op("V1")); - assert_eq!(vec[1], &op("V2")); } } diff --git a/src/types/mod.rs b/src/types/mod.rs index ee4785fc..43312d63 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -29,9 +29,8 @@ use std::cmp; use crate::errors::DatabaseError; use kite_sql_serde_macros::ReferenceSerialization; -use ulid::Ulid; -pub type ColumnId = Ulid; +pub type ColumnId = u64; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum CharLengthUnits { diff --git a/src/types/serialize.rs b/src/types/serialize.rs index 22bd405a..b986a83a 100644 --- a/src/types/serialize.rs +++ b/src/types/serialize.rs @@ -16,7 +16,6 @@ use crate::errors::DatabaseError; use crate::types::value::{DataValue, Utf8Type}; use crate::types::CharLengthUnits; use crate::types::LogicalType; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use kite_sql_serde_macros::ReferenceSerialization; use ordered_float::OrderedFloat; #[cfg(feature = "decimal")] @@ -303,79 +302,106 @@ fn write_spaces(writer: &mut W, mut len: usize) -> Result<(), Database Ok(()) } +macro_rules! define_le_io { + ($(($write_fn:ident, $read_fn:ident, $ty:ty)),* $(,)?) => { + $( + fn $write_fn(writer: &mut W, value: $ty) -> std::io::Result<()> { + writer.write_all(&value.to_le_bytes()) + } + + fn $read_fn(reader: &mut R) -> std::io::Result<$ty> { + let mut bytes = [0u8; std::mem::size_of::<$ty>()]; + reader.read_exact(&mut bytes)?; + Ok(<$ty>::from_le_bytes(bytes)) + } + )* + }; +} + +define_le_io!( + (write_i8, read_i8, i8), + (write_i16_le, read_i16_le, i16), + (write_i32_le, read_i32_le, i32), + (write_i64_le, read_i64_le, i64), + (write_u8, read_u8, u8), + (write_u16_le, read_u16_le, u16), + (write_u32_le, read_u32_le, u32), + (write_u64_le, read_u64_le, u64), + (write_f32_le, read_f32_le, f32), + (write_f64_le, read_f64_le, f64), +); + // Int impl_tuple_value_serializable!( Int8Serializable, Int8, - |writer: &mut dyn Write, &value| writer.write_i8(value), - |reader: &mut Cursor<&[u8]>| reader.read_i8() + |writer: &mut dyn Write, &value| write_i8(writer, value), + |reader: &mut Cursor<&[u8]>| read_i8(reader) ); impl_tuple_value_serializable!( Int16Serializable, Int16, - |writer: &mut dyn Write, &value| writer.write_i16::(value), - |reader: &mut Cursor<&[u8]>| reader.read_i16::() + |writer: &mut dyn Write, &value| write_i16_le(writer, value), + |reader: &mut Cursor<&[u8]>| read_i16_le(reader) ); impl_tuple_value_serializable!( Int32Serializable, Int32, - |writer: &mut dyn Write, &value| writer.write_i32::(value), - |reader: &mut Cursor<&[u8]>| reader.read_i32::() + |writer: &mut dyn Write, &value| write_i32_le(writer, value), + |reader: &mut Cursor<&[u8]>| read_i32_le(reader) ); impl_tuple_value_serializable!( Int64Serializable, Int64, - |writer: &mut dyn Write, &value| writer.write_i64::(value), - |reader: &mut Cursor<&[u8]>| reader.read_i64::() + |writer: &mut dyn Write, &value| write_i64_le(writer, value), + |reader: &mut Cursor<&[u8]>| read_i64_le(reader) ); // Uint impl_tuple_value_serializable!( UInt8Serializable, UInt8, - |writer: &mut dyn Write, &value| writer.write_u8(value), - |reader: &mut Cursor<&[u8]>| reader.read_u8() + |writer: &mut dyn Write, &value| write_u8(writer, value), + |reader: &mut Cursor<&[u8]>| read_u8(reader) ); impl_tuple_value_serializable!( UInt16Serializable, UInt16, - |writer: &mut dyn Write, &value| writer.write_u16::(value), - |reader: &mut Cursor<&[u8]>| reader.read_u16::() + |writer: &mut dyn Write, &value| write_u16_le(writer, value), + |reader: &mut Cursor<&[u8]>| read_u16_le(reader) ); impl_tuple_value_serializable!( UInt32Serializable, UInt32, - |writer: &mut dyn Write, &value| writer.write_u32::(value), - |reader: &mut Cursor<&[u8]>| reader.read_u32::() + |writer: &mut dyn Write, &value| write_u32_le(writer, value), + |reader: &mut Cursor<&[u8]>| read_u32_le(reader) ); impl_tuple_value_serializable!( UInt64Serializable, UInt64, - |writer: &mut dyn Write, &value| writer.write_u64::(value), - |reader: &mut Cursor<&[u8]>| reader.read_u64::() + |writer: &mut dyn Write, &value| write_u64_le(writer, value), + |reader: &mut Cursor<&[u8]>| read_u64_le(reader) ); // Float impl_tuple_value_serializable!( Float32Serializable, Float32, - |writer: &mut dyn Write, value: &OrderedFloat::| writer - .write_f32::(value.into_inner()), - |reader: &mut Cursor<&[u8]>| reader.read_f32::().map(OrderedFloat::) + |writer: &mut dyn Write, value: &OrderedFloat::| write_f32_le(writer, value.into_inner()), + |reader: &mut Cursor<&[u8]>| read_f32_le(reader).map(OrderedFloat::) ); impl_tuple_value_serializable!( Float64Serializable, Float64, - |writer: &mut dyn Write, value: &OrderedFloat::| writer - .write_f64::(value.into_inner()), - |reader: &mut Cursor<&[u8]>| reader.read_f64::().map(OrderedFloat::) + |writer: &mut dyn Write, value: &OrderedFloat::| write_f64_le(writer, value.into_inner()), + |reader: &mut Cursor<&[u8]>| read_f64_le(reader).map(OrderedFloat::) ); impl_tuple_value_serializable!( BooleanSerializable, Boolean, - |writer: &mut dyn Write, &value| writer.write_u8(value as u8), - |reader: &mut Cursor<&[u8]>| reader.read_u8().map(|v| v != 0) + |writer: &mut dyn Write, &value| write_u8(writer, value as u8), + |reader: &mut Cursor<&[u8]>| read_u8(reader).map(|v| v != 0) ); impl TupleValueSerializable for CharSerializable { @@ -394,7 +420,7 @@ impl TupleValueSerializable for CharSerializable { let bytes = value.as_bytes(); let spaces_len = chars_len.saturating_sub(value.chars().count()); - writer.write_u32::((bytes.len() + spaces_len) as u32)?; + write_u32_le(writer, (bytes.len() + spaces_len) as u32)?; writer.write_all(bytes)?; write_spaces(writer, spaces_len)?; } @@ -413,7 +439,7 @@ impl TupleValueSerializable for CharSerializable { fn from_raw(&self, reader: &mut Cursor<&[u8]>) -> Result { // https://dev.mysql.com/doc/refman/8.0/en/char.html#:~:text=If%20a%20given%20value%20is%20stored%20into%20the%20CHAR(4)%20and%20VARCHAR(4)%20columns%2C%20the%20values%20retrieved%20from%20the%20columns%20are%20not%20always%20the%20same%20because%20trailing%20spaces%20are%20removed%20from%20CHAR%20columns%20upon%20retrieval.%20The%20following%20example%20illustrates%20this%20difference%3A let len = match self.unit { - CharLengthUnits::Characters => reader.read_u32::()?, + CharLengthUnits::Characters => read_u32_le(reader)?, CharLengthUnits::Octets => self.len, } as usize; let mut bytes = vec![0; len]; @@ -444,13 +470,13 @@ impl TupleValueSerializable for VarcharSerializable { }; let bytes = value.as_bytes(); - writer.write_u32::(bytes.len() as u32)?; + write_u32_le(writer, bytes.len() as u32)?; writer.write_all(bytes)?; Ok(()) } fn from_raw(&self, reader: &mut Cursor<&[u8]>) -> Result { - let len = reader.read_u32::()? as usize; + let len = read_u32_le(reader)? as usize; let mut bytes = vec![0; len]; reader.read_exact(&mut bytes)?; @@ -465,14 +491,14 @@ impl TupleValueSerializable for VarcharSerializable { impl_tuple_value_serializable!( DateSerializable, Date32, - |writer: &mut dyn Write, &value| writer.write_i32::(value), - |reader: &mut Cursor<&[u8]>| reader.read_i32::() + |writer: &mut dyn Write, &value| write_i32_le(writer, value), + |reader: &mut Cursor<&[u8]>| read_i32_le(reader) ); impl_tuple_value_serializable!( DateTimeSerializable, Date64, - |writer: &mut dyn Write, &value| writer.write_i64::(value), - |reader: &mut Cursor<&[u8]>| reader.read_i64::() + |writer: &mut dyn Write, &value| write_i64_le(writer, value), + |reader: &mut Cursor<&[u8]>| read_i64_le(reader) ); impl TupleValueSerializable for TimeSerializable { @@ -480,16 +506,13 @@ impl TupleValueSerializable for TimeSerializable { let DataValue::Time32(v, ..) = value else { unsafe { std::hint::unreachable_unchecked() } }; - writer.write_u32::(*v)?; + write_u32_le(writer, *v)?; Ok(()) } fn from_raw(&self, reader: &mut Cursor<&[u8]>) -> Result { let precision = self.precision.unwrap_or_default(); - Ok(DataValue::Time32( - reader.read_u32::()?, - precision, - )) + Ok(DataValue::Time32(read_u32_le(reader)?, precision)) } } @@ -498,14 +521,14 @@ impl TupleValueSerializable for TimeStampSerializable { let DataValue::Time64(v, ..) = value else { unsafe { std::hint::unreachable_unchecked() } }; - writer.write_i64::(*v)?; + write_i64_le(writer, *v)?; Ok(()) } fn from_raw(&self, reader: &mut Cursor<&[u8]>) -> Result { let precision = self.precision.unwrap_or_default(); Ok(DataValue::Time64( - reader.read_i64::()?, + read_i64_le(reader)?, precision, self.zone, )) @@ -566,7 +589,7 @@ impl TupleValueSerializable for SkipVariable { } fn from_raw(&self, reader: &mut Cursor<&[u8]>) -> Result { - let len = reader.read_u32::()? as usize; + let len = read_u32_le(reader)? as usize; reader.seek(SeekFrom::Current(len as i64))?; Ok(DataValue::Null) } diff --git a/src/types/tuple.rs b/src/types/tuple.rs index f27db8d5..ab3aae3c 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -14,10 +14,10 @@ use crate::catalog::{ColumnCatalog, ColumnRef}; use crate::errors::DatabaseError; +use crate::iter_ext::Itertools; use crate::planner::PlanArena; use crate::types::serialize::{TupleValueSerializable, TupleValueSerializableImpl}; use crate::types::value::DataValue; -use itertools::Itertools; use std::borrow::Borrow; use std::io::Cursor; @@ -278,11 +278,11 @@ impl Tuple { #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc}; + use crate::iter_ext::Itertools; use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use crate::types::CharLengthUnits; use crate::types::LogicalType; - use itertools::Itertools; use ordered_float::OrderedFloat; use rust_decimal::Decimal; use std::sync::Arc; diff --git a/src/types/value.rs b/src/types/value.rs index a231a5b4..138a1191 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -14,16 +14,15 @@ use super::LogicalType; use crate::errors::DatabaseError; +use crate::iter_ext::Itertools; use crate::storage::table_codec::{BumpBytes, BOUND_MAX_TAG, NOTNULL_TAG, NULL_TAG}; use crate::types::evaluator::cast::{cast_create, to_char, to_varchar}; use crate::types::CharLengthUnits; -use byteorder::ReadBytesExt; #[cfg(feature = "time")] use chrono::{ format::{DelayedFormat, StrftimeItems}, DateTime, Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc, }; -use itertools::Itertools; use ordered_float::OrderedFloat; #[cfg(feature = "decimal")] use rust_decimal::Decimal; @@ -36,6 +35,16 @@ use std::io::Read; use std::mem; use std::{cmp, fmt}; +trait ReadByteExt: Read { + fn read_u8(&mut self) -> std::io::Result { + let mut byte = [0u8; 1]; + self.read_exact(&mut byte)?; + Ok(byte[0]) + } +} + +impl ReadByteExt for R {} + #[cfg(feature = "time")] mod chrono_value { use chrono::{DateTime, NaiveDateTime, NaiveTime}; @@ -384,26 +393,78 @@ impl Hash for DataValue { fn hash(&self, state: &mut H) { use DataValue::*; match self { - Boolean(v) => v.hash(state), - Float32(v) => v.hash(state), - Float64(v) => v.hash(state), - Int8(v) => v.hash(state), - Int16(v) => v.hash(state), - Int32(v) => v.hash(state), - Int64(v) => v.hash(state), - UInt8(v) => v.hash(state), - UInt16(v) => v.hash(state), - UInt32(v) => v.hash(state), - UInt64(v) => v.hash(state), - Utf8 { value: v, .. } => v.hash(state), - Null => 1.hash(state), - Date32(v) => v.hash(state), - Date64(v) => v.hash(state), - Time32(v, ..) => v.hash(state), - Time64(v, ..) => v.hash(state), + Null => 0u8.hash(state), + Boolean(v) => { + 1u8.hash(state); + v.hash(state); + } + Float32(v) => { + 2u8.hash(state); + v.hash(state); + } + Float64(v) => { + 3u8.hash(state); + v.hash(state); + } + Int8(v) => { + 4u8.hash(state); + v.hash(state); + } + Int16(v) => { + 5u8.hash(state); + v.hash(state); + } + Int32(v) => { + 6u8.hash(state); + v.hash(state); + } + Int64(v) => { + 7u8.hash(state); + v.hash(state); + } + UInt8(v) => { + 8u8.hash(state); + v.hash(state); + } + UInt16(v) => { + 9u8.hash(state); + v.hash(state); + } + UInt32(v) => { + 10u8.hash(state); + v.hash(state); + } + UInt64(v) => { + 11u8.hash(state); + v.hash(state); + } + Utf8 { value: v, .. } => { + 12u8.hash(state); + v.hash(state); + } + Date32(v) => { + 13u8.hash(state); + v.hash(state); + } + Date64(v) => { + 14u8.hash(state); + v.hash(state); + } + Time32(v, ..) => { + 15u8.hash(state); + v.hash(state); + } + Time64(v, ..) => { + 16u8.hash(state); + v.hash(state); + } #[cfg(feature = "decimal")] - Decimal(v) => v.hash(state), + Decimal(v) => { + 17u8.hash(state); + v.hash(state); + } Tuple(values, is_upper) => { + 18u8.hash(state); values.hash(state); is_upper.hash(state); } diff --git a/src/wasm.rs b/src/wasm.rs index c0f68776..1c5d1158 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg(target_arch = "wasm32")] +#![cfg(all(target_arch = "wasm32", feature = "wasm"))] use crate::db::{DataBaseBuilder, Database, DatabaseIter}; use crate::storage::memory::MemoryStorage; diff --git a/tests/macros-test/src/main.rs b/tests/macros-test/src/main.rs index c8600efe..a424cdeb 100644 --- a/tests/macros-test/src/main.rs +++ b/tests/macros-test/src/main.rs @@ -16,8 +16,7 @@ fn main() {} #[cfg(test)] mod test { - use kite_sql::catalog::column::{ColumnCatalog, ColumnDesc, ColumnRelation}; - use kite_sql::catalog::table::TableName; + use kite_sql::catalog::column::{ColumnCatalog, ColumnDesc}; use kite_sql::db::{DataBaseBuilder, Database, ResultIter}; use kite_sql::errors::DatabaseError; use kite_sql::expression::function::scala::ScalarFunctionImpl; @@ -2944,35 +2943,20 @@ mod test { ); assert!(numbers.next().is_none()); - let table_name: TableName = "test_numbers".to_string().into(); let mut table_arena = TableArena::default(); let mut function_schema = Schema::new(); - function.output_schema_into(&table_name, &mut table_arena, &mut function_schema); + function.output_schema_into(&mut table_arena, &mut function_schema); let c1_ref = function_schema[0]; let c2_ref = function_schema[1]; - let mut c1 = ColumnCatalog::new( - "c1".to_string(), - true, - ColumnDesc::new(LogicalType::Integer, None, false, None)?, - ); - c1.summary_mut().relation = ColumnRelation::Table { - column_id: table_arena.column(c1_ref).id().unwrap(), - table_name: table_name.clone(), - is_temp: false, - }; - let mut c2 = ColumnCatalog::new( - "c2".to_string(), - true, - ColumnDesc::new(LogicalType::Integer, None, false, None)?, - ); - c2.summary_mut().relation = ColumnRelation::Table { - column_id: table_arena.column(c2_ref).id().unwrap(), - table_name: table_name.clone(), - is_temp: false, - }; - - assert_eq!(table_arena.column(c1_ref), &c1); - assert_eq!(table_arena.column(c2_ref), &c2); + let c1 = table_arena.column(c1_ref); + let c2 = table_arena.column(c2_ref); + + assert_eq!(c1.name(), "c1"); + assert_eq!(c1.datatype(), &LogicalType::Integer); + assert!(c1.nullable()); + assert_eq!(c2.name(), "c2"); + assert_eq!(c2.datatype(), &LogicalType::Integer); + assert!(c2.nullable()); Ok(()) } diff --git a/tests/slt/show_view.slt b/tests/slt/show_view.slt index 905e644e..4e749c5d 100644 --- a/tests/slt/show_view.slt +++ b/tests/slt/show_view.slt @@ -24,8 +24,8 @@ create view v2 as select * from t1 where a != 1 query I show views ---- -v1 v2 +v1 statement ok drop view v1 @@ -41,4 +41,3 @@ drop view v2 query I show views ---- - diff --git a/tests/slt/where_by_index_explain.slt b/tests/slt/where_by_index_explain.slt index ae05516b..ff641049 100644 --- a/tests/slt/where_by_index_explain.slt +++ b/tests/slt/where_by_index_explain.slt @@ -127,7 +127,7 @@ Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (((#2 = 7) || query T explain select * from t1 where c1 is null and c2 is null; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#2 is null && #3 is null), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #2 => null => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#2 is null && #3 is null), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => null => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where c1 > 0 and c1 < 8; @@ -165,13 +165,13 @@ Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#3 > 0) && ( query T explain select c1 from t1 where c1 < 10; ---- -Projection [#2] [Project => (Sort Option: Follow)] Filter (#2 < 10), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#2] [IndexScan By #3 => (-inf, (10)) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last, #3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#2] [Project => (Sort Option: Follow)] Filter (#2 < 10), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#2] [IndexScan By #1 => (-inf, 10) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] # unique covered with primary key projection query T explain select c1, id from t1 where c1 < 10; ---- -Projection [#2, #1] [Project => (Sort Option: Follow)] Filter (#2 < 10), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2] [IndexScan By #3 => (-inf, (10)) => (Sort Option: OrderBy: (#2 Asc Nulls Last, #3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#2, #1] [Project => (Sort Option: Follow)] Filter (#2 < 10), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2] [IndexScan By #1 => (-inf, 10) => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] statement ok drop index t1.u_c1_index;