From 7ed2e66e7c22bec60b6ea4d6ef30a064915ec6ed Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Mon, 16 Dec 2024 23:30:27 +0800 Subject: [PATCH] Perf/serialize (#255) * perf: use `byteorder` on `DataValue::to_raw` & `DataValue::from_raw` * perf: Use `bumpalo` to control memory allocation of `Sort` and `TableCodec` * chore: simplify `Tuple::deserialize_from` & `ScalaExpression::eval` & `HepGraph::node_iter` & `HepOptimizer::apply_batch` * perf: encode the tablename prefix in `TableCodec` into a hash, enabling it to apply rocksdb prefix range * chore: simplify parameter on execute sql --- Cargo.toml | 9 +- README.md | 12 +- src/binder/aggregate.rs | 6 +- src/binder/alter_table.rs | 3 +- src/binder/analyze.rs | 3 +- src/binder/copy.rs | 6 +- src/binder/create_index.rs | 3 +- src/binder/create_table.rs | 7 +- src/binder/create_view.rs | 3 +- src/binder/delete.rs | 3 +- src/binder/describe.rs | 3 +- src/binder/distinct.rs | 3 +- src/binder/drop_table.rs | 3 +- src/binder/drop_view.rs | 3 +- src/binder/explain.rs | 3 +- src/binder/expr.rs | 13 +- src/binder/insert.rs | 2 +- src/binder/mod.rs | 20 +- src/binder/select.rs | 2 +- src/binder/show.rs | 3 +- src/binder/truncate.rs | 3 +- src/binder/update.rs | 3 +- src/catalog/column.rs | 3 +- src/db.rs | 40 +- src/errors.rs | 10 +- src/execution/dml/analyze.rs | 2 +- src/execution/dml/copy_to_file.rs | 47 +- src/execution/dml/update.rs | 2 +- src/execution/dql/aggregate/hash_agg.rs | 4 +- src/execution/dql/aggregate/simple_agg.rs | 3 +- src/execution/dql/filter.rs | 2 +- src/execution/dql/join/hash_join.rs | 17 +- src/execution/dql/join/nested_loop_join.rs | 15 +- src/execution/dql/projection.rs | 2 +- src/execution/dql/sort.rs | 239 +++++++---- src/execution/mod.rs | 6 +- src/expression/evaluator.rs | 108 +++-- src/expression/function/scala.rs | 3 +- src/function/char_length.rs | 6 +- src/function/current_date.rs | 3 +- src/function/lower.rs | 6 +- src/function/numbers.rs | 4 +- src/function/upper.rs | 6 +- src/macros/mod.rs | 6 +- src/optimizer/core/histogram.rs | 90 ++-- src/optimizer/core/memo.rs | 7 +- src/optimizer/core/statistics_meta.rs | 2 +- src/optimizer/heuristic/batch.rs | 32 +- src/optimizer/heuristic/graph.rs | 47 +- src/optimizer/heuristic/matcher.rs | 6 +- src/optimizer/heuristic/optimizer.rs | 33 +- .../rule/normalization/pushdown_limit.rs | 12 +- .../rule/normalization/pushdown_predicates.rs | 1 + .../rule/normalization/simplification.rs | 13 +- src/planner/operator/copy_to_file.rs | 9 +- src/planner/operator/filter.rs | 7 +- src/serdes/char.rs | 10 +- src/serdes/data_value.rs | 115 +---- src/serdes/mod.rs | 29 +- src/storage/mod.rs | 241 +++++++---- src/storage/rocksdb.rs | 89 ++-- src/storage/table_codec.rs | 402 +++++++++++------- src/types/tuple.rs | 133 +++--- src/types/value.rs | 402 +++++++++++------- src/utils/bit_vector.rs | 96 ----- src/utils/mod.rs | 1 - tests/macros-test/src/main.rs | 3 +- tests/slt/copy.slt | 2 +- tpcc/Cargo.toml | 2 +- tpcc/README.md | 215 +++++----- tpcc/src/delivery.rs | 14 +- tpcc/src/load.rs | 1 - tpcc/src/main.rs | 1 - tpcc/src/new_ord.rs | 14 +- tpcc/src/order_stat.rs | 10 +- tpcc/src/payment.rs | 24 +- tpcc/src/slev.rs | 6 +- 77 files changed, 1434 insertions(+), 1285 deletions(-) delete mode 100644 src/utils/bit_vector.rs diff --git a/Cargo.toml b/Cargo.toml index c4820a2e..064da6cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ [package] name = "fnck_sql" -version = "0.0.8" +version = "0.0.9" edition = "2021" authors = ["Kould ", "Xwg "] description = "SQL as a Function for Rust" @@ -35,12 +35,13 @@ harness = false [dependencies] ahash = { version = "0.8" } bincode = { version = "1" } -bytes = { version = "1" } +bumpalo = { version = "3", features = ["allocator-api2", "collections", "std"] } +byteorder = { version = "1" } chrono = { version = "0.4" } comfy-table = { version = "7" } csv = { version = "1" } -encode_unicode = { version = "1" } dirs = { version = "5" } +fixedbitset = { version = "0.4" } itertools = { version = "0.12" } ordered-float = { version = "4" } paste = { version = "1" } @@ -68,10 +69,8 @@ tokio = { version = "1.36", features = ["full"], optional = true [dev-dependencies] -cargo-tarpaulin = { version = "0.27" } criterion = { version = "0.5", features = ["html_reports"] } indicatif = { version = "0.17" } -rand_distr = { version = "0.4" } tempfile = { version = "3.10" } # Benchmark sqlite = { version = "0.34" } diff --git a/README.md b/README.md index 313cd2ae..8197e9c2 100755 --- a/README.md +++ b/README.md @@ -73,13 +73,13 @@ run `cargo run -p tpcc --release` to run tpcc - Tips: TPC-C currently only supports single thread ```shell <90th Percentile RT (MaxRT)> - New-Order : 0.003 (0.012) - Payment : 0.001 (0.003) -Order-Status : 0.054 (0.188) - Delivery : 0.021 (0.049) - Stock-Level : 0.004 (0.006) + New-Order : 0.002 (0.004) + Payment : 0.001 (0.025) +Order-Status : 0.053 (0.175) + Delivery : 0.022 (0.027) + Stock-Level : 0.003 (0.019) -7345 Tpmc +7815 tpmC ``` #### 👉[check more](tpcc/README.md) diff --git a/src/binder/aggregate.rs b/src/binder/aggregate.rs index 9a5ccef8..90ad6174 100644 --- a/src/binder/aggregate.rs +++ b/src/binder/aggregate.rs @@ -3,18 +3,18 @@ use itertools::Itertools; use sqlparser::ast::{Expr, OrderByExpr}; use std::collections::HashSet; +use super::{Binder, QueryBindStep}; use crate::errors::DatabaseError; use crate::expression::function::scala::ScalarFunction; use crate::planner::LogicalPlan; use crate::storage::Transaction; +use crate::types::value::DataValue; use crate::{ expression::ScalarExpression, planner::operator::{aggregate::AggregateOperator, sort::SortField}, }; -use super::{Binder, QueryBindStep}; - -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub fn bind_aggregate( &mut self, children: LogicalPlan, diff --git a/src/binder/alter_table.rs b/src/binder/alter_table.rs index 8d21a3b3..1c0ff2a1 100644 --- a/src/binder/alter_table.rs +++ b/src/binder/alter_table.rs @@ -11,8 +11,9 @@ use crate::planner::operator::table_scan::TableScanOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_alter_table( &mut self, name: &ObjectName, diff --git a/src/binder/analyze.rs b/src/binder/analyze.rs index ce9d2694..6aaafe78 100644 --- a/src/binder/analyze.rs +++ b/src/binder/analyze.rs @@ -5,10 +5,11 @@ use crate::planner::operator::table_scan::TableScanOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; use sqlparser::ast::ObjectName; use std::sync::Arc; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_analyze(&mut self, name: &ObjectName) -> Result { let table_name = Arc::new(lower_case_name(name)?); diff --git a/src/binder/copy.rs b/src/binder/copy.rs index 902147c3..05966bc1 100644 --- a/src/binder/copy.rs +++ b/src/binder/copy.rs @@ -6,6 +6,7 @@ use super::*; use crate::errors::DatabaseError; use crate::planner::operator::copy_from_file::CopyFromFileOperator; use crate::planner::operator::copy_to_file::CopyToFileOperator; +use crate::planner::operator::table_scan::TableScanOperator; use crate::planner::operator::Operator; use crate::planner::Childrens; use fnck_sql_serde_macros::ReferenceSerialization; @@ -63,7 +64,7 @@ impl FromStr for ExtSource { } } -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(super) fn bind_copy( &mut self, source: CopySource, @@ -96,11 +97,10 @@ impl Binder<'_, '_, T> { // COPY TO Ok(LogicalPlan::new( Operator::CopyToFile(CopyToFileOperator { - table: table.name.to_string(), target: ext_source, schema_ref, }), - Childrens::None, + Childrens::Only(TableScanOperator::build(table_name, table)), )) } else { // COPY FROM diff --git a/src/binder/create_index.rs b/src/binder/create_index.rs index 06660aa1..69a0b103 100644 --- a/src/binder/create_index.rs +++ b/src/binder/create_index.rs @@ -7,10 +7,11 @@ use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; use crate::types::index::IndexType; +use crate::types::value::DataValue; use sqlparser::ast::{ObjectName, OrderByExpr}; use std::sync::Arc; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_create_index( &mut self, table_name: &ObjectName, diff --git a/src/binder/create_table.rs b/src/binder/create_table.rs index b8bcd9d9..54c9d840 100644 --- a/src/binder/create_table.rs +++ b/src/binder/create_table.rs @@ -12,9 +12,10 @@ use crate::planner::operator::create_table::CreateTableOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; use crate::types::LogicalType; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { // TODO: TableConstraint pub(crate) fn bind_create_table( &mut self, @@ -157,7 +158,6 @@ mod tests { use crate::types::LogicalType; use crate::utils::lru::SharedLruCache; use sqlparser::ast::CharLengthUnits; - use std::cell::RefCell; use std::hash::RandomState; use std::sync::atomic::AtomicUsize; use tempfile::TempDir; @@ -173,7 +173,6 @@ mod tests { let table_functions = Default::default(); let sql = "create table t1 (id int primary key, name varchar(10) null)"; - let args = RefCell::new(Vec::new()); let mut binder = Binder::new( BinderContext::new( &table_cache, @@ -183,7 +182,7 @@ mod tests { &table_functions, Arc::new(AtomicUsize::new(0)), ), - &args, + &[], None, ); let stmt = crate::parser::parse_sql(sql).unwrap(); diff --git a/src/binder/create_view.rs b/src/binder/create_view.rs index c48d1db0..2d99b0bd 100644 --- a/src/binder/create_view.rs +++ b/src/binder/create_view.rs @@ -7,12 +7,13 @@ use crate::planner::operator::create_view::CreateViewOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; use itertools::Itertools; use sqlparser::ast::{Ident, ObjectName, Query}; use std::sync::Arc; use ulid::Ulid; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_create_view( &mut self, or_replace: &bool, diff --git a/src/binder/delete.rs b/src/binder/delete.rs index 026c0844..96971e3b 100644 --- a/src/binder/delete.rs +++ b/src/binder/delete.rs @@ -5,11 +5,12 @@ use crate::planner::operator::table_scan::TableScanOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; use itertools::Itertools; use sqlparser::ast::{Expr, TableAlias, TableFactor, TableWithJoins}; use std::sync::Arc; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_delete( &mut self, from: &TableWithJoins, diff --git a/src/binder/describe.rs b/src/binder/describe.rs index d77ecf2b..e8677077 100644 --- a/src/binder/describe.rs +++ b/src/binder/describe.rs @@ -4,10 +4,11 @@ use crate::planner::operator::describe::DescribeOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; use sqlparser::ast::ObjectName; use std::sync::Arc; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_describe( &mut self, name: &ObjectName, diff --git a/src/binder/distinct.rs b/src/binder/distinct.rs index fa88d1eb..db431f2f 100644 --- a/src/binder/distinct.rs +++ b/src/binder/distinct.rs @@ -3,8 +3,9 @@ use crate::expression::ScalarExpression; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; +use crate::types::value::DataValue; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub fn bind_distinct( &mut self, children: LogicalPlan, diff --git a/src/binder/drop_table.rs b/src/binder/drop_table.rs index 30bc13f0..5666ee88 100644 --- a/src/binder/drop_table.rs +++ b/src/binder/drop_table.rs @@ -4,10 +4,11 @@ use crate::planner::operator::drop_table::DropTableOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; use sqlparser::ast::ObjectName; use std::sync::Arc; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_drop_table( &mut self, name: &ObjectName, diff --git a/src/binder/drop_view.rs b/src/binder/drop_view.rs index 4e635b7c..4cb40442 100644 --- a/src/binder/drop_view.rs +++ b/src/binder/drop_view.rs @@ -4,10 +4,11 @@ use crate::planner::operator::drop_view::DropViewOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; use sqlparser::ast::ObjectName; use std::sync::Arc; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_drop_view( &mut self, name: &ObjectName, diff --git a/src/binder/explain.rs b/src/binder/explain.rs index 9119feb9..48a9067c 100644 --- a/src/binder/explain.rs +++ b/src/binder/explain.rs @@ -3,8 +3,9 @@ use crate::errors::DatabaseError; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_explain(&mut self, plan: LogicalPlan) -> Result { Ok(LogicalPlan::new(Operator::Explain, Childrens::Only(plan))) } diff --git a/src/binder/expr.rs b/src/binder/expr.rs index 53c4958a..5fa653f2 100644 --- a/src/binder/expr.rs +++ b/src/binder/expr.rs @@ -40,7 +40,7 @@ macro_rules! try_default { }; } -impl<'a, T: Transaction> Binder<'a, '_, T> { +impl<'a, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'a, '_, T, A> { pub(crate) fn bind_expr(&mut self, expr: &Expr) -> Result { match expr { Expr::Identifier(ident) => { @@ -50,14 +50,11 @@ impl<'a, T: Transaction> Binder<'a, '_, T> { Expr::BinaryOp { left, right, op } => self.bind_binary_op_internal(left, right, op), Expr::Value(v) => { let value = if let Value::Placeholder(name) = v { - let (i, _) = self - .args - .borrow() + self.args + .as_ref() .iter() - .enumerate() - .find(|(_, (key, _))| key == name) - .ok_or_else(|| DatabaseError::ParametersNotFound(name.to_string()))?; - self.args.borrow_mut().remove(i).1 + .find_map(|(key, value)| (key == name).then(|| value.clone())) + .ok_or_else(|| DatabaseError::ParametersNotFound(name.to_string()))? } else { v.into() }; diff --git a/src/binder/insert.rs b/src/binder/insert.rs index 56f4dce3..0d9eb2b4 100644 --- a/src/binder/insert.rs +++ b/src/binder/insert.rs @@ -12,7 +12,7 @@ use sqlparser::ast::{Expr, Ident, ObjectName}; use std::slice; use std::sync::Arc; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_insert( &mut self, name: &ObjectName, diff --git a/src/binder/mod.rs b/src/binder/mod.rs index 7e9e5340..5ff832ba 100644 --- a/src/binder/mod.rs +++ b/src/binder/mod.rs @@ -19,20 +19,20 @@ mod truncate; mod update; use sqlparser::ast::{Ident, ObjectName, ObjectType, SetExpr, Statement}; -use std::cell::RefCell; use std::collections::{BTreeMap, HashMap, HashSet}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use crate::catalog::view::View; use crate::catalog::{ColumnRef, TableCatalog, TableName}; -use crate::db::{Args, ScalaFunctions, TableFunctions}; +use crate::db::{ScalaFunctions, TableFunctions}; use crate::errors::DatabaseError; use crate::expression::ScalarExpression; use crate::planner::operator::join::JoinType; use crate::planner::{LogicalPlan, SchemaOutput}; use crate::storage::{TableCache, Transaction, ViewCache}; use crate::types::tuple::SchemaRef; +use crate::types::value::DataValue; pub enum InputRefType { AggCall, @@ -313,18 +313,18 @@ impl<'a, T: Transaction> BinderContext<'a, T> { } } -pub struct Binder<'a, 'b, T: Transaction> { +pub struct Binder<'a, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> { context: BinderContext<'a, T>, table_schema_buf: HashMap>, - args: &'a RefCell, - pub(crate) parent: Option<&'b Binder<'a, 'b, T>>, + args: &'a A, + pub(crate) parent: Option<&'b Binder<'a, 'b, T, A>>, } -impl<'a, 'b, T: Transaction> Binder<'a, 'b, T> { +impl<'a, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'a, 'b, T, A> { pub fn new( context: BinderContext<'a, T>, - args: &'a RefCell, - parent: Option<&'b Binder<'a, 'b, T>>, + args: &'a A, + parent: Option<&'b Binder<'a, 'b, T, A>>, ) -> Self { Binder { context, @@ -488,7 +488,6 @@ pub mod test { use crate::types::ColumnId; use crate::types::LogicalType::Integer; use crate::utils::lru::SharedLruCache; - use std::cell::RefCell; use std::hash::RandomState; use std::path::PathBuf; use std::sync::atomic::AtomicUsize; @@ -507,7 +506,6 @@ pub mod test { let scala_functions = Default::default(); let table_functions = Default::default(); let transaction = self.storage.transaction()?; - let args = RefCell::new(Vec::new()); let mut binder = Binder::new( BinderContext::new( &self.table_cache, @@ -517,7 +515,7 @@ pub mod test { &table_functions, Arc::new(AtomicUsize::new(0)), ), - &args, + &[], None, ); let stmt = crate::parser::parse_sql(sql)?; diff --git a/src/binder/select.rs b/src/binder/select.rs index b079eb6e..ec7a4e59 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -38,7 +38,7 @@ use sqlparser::ast::{ TableWithJoins, }; -impl<'a: 'b, 'b, T: Transaction> Binder<'a, 'b, T> { +impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'a, 'b, T, A> { pub(crate) fn bind_query(&mut self, query: &Query) -> Result { let origin_step = self.context.step_now(); diff --git a/src/binder/show.rs b/src/binder/show.rs index 2c49f1fc..f229555d 100644 --- a/src/binder/show.rs +++ b/src/binder/show.rs @@ -3,8 +3,9 @@ use crate::errors::DatabaseError; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_show_tables(&mut self) -> Result { Ok(LogicalPlan::new(Operator::Show, Childrens::None)) } diff --git a/src/binder/truncate.rs b/src/binder/truncate.rs index 39720670..ac382d47 100644 --- a/src/binder/truncate.rs +++ b/src/binder/truncate.rs @@ -4,10 +4,11 @@ use crate::planner::operator::truncate::TruncateOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; use sqlparser::ast::ObjectName; use std::sync::Arc; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_truncate( &mut self, name: &ObjectName, diff --git a/src/binder/update.rs b/src/binder/update.rs index d33bd50b..dd134726 100644 --- a/src/binder/update.rs +++ b/src/binder/update.rs @@ -5,11 +5,12 @@ use crate::planner::operator::update::UpdateOperator; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; +use crate::types::value::DataValue; use sqlparser::ast::{Assignment, Expr, TableFactor, TableWithJoins}; use std::slice; use std::sync::Arc; -impl Binder<'_, '_, T> { +impl> Binder<'_, '_, T, A> { pub(crate) fn bind_update( &mut self, to: &TableWithJoins, diff --git a/src/catalog/column.rs b/src/catalog/column.rs index 93a9e967..db84d16a 100644 --- a/src/catalog/column.rs +++ b/src/catalog/column.rs @@ -1,7 +1,6 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; use crate::expression::ScalarExpression; -use crate::types::tuple::EMPTY_TUPLE; use crate::types::value::DataValue; use crate::types::{ColumnId, LogicalType}; use fnck_sql_serde_macros::ReferenceSerialization; @@ -170,7 +169,7 @@ impl ColumnCatalog { self.desc .default .as_ref() - .map(|expr| expr.eval(&EMPTY_TUPLE, &[])) + .map(|expr| expr.eval(None)) .transpose() } diff --git a/src/db.rs b/src/db.rs index ce16769c..a7379ad4 100644 --- a/src/db.rs +++ b/src/db.rs @@ -23,7 +23,6 @@ use crate::utils::lru::SharedLruCache; use ahash::HashMap; use parking_lot::lock_api::{ArcRwLockReadGuard, ArcRwLockWriteGuard}; use parking_lot::{RawRwLock, RwLock}; -use std::cell::RefCell; use std::hash::RandomState; use std::marker::PhantomData; use std::mem; @@ -36,7 +35,6 @@ use std::sync::Arc; pub(crate) type ScalaFunctions = HashMap>; pub(crate) type TableFunctions = HashMap>; -pub type Args = Vec<(&'static str, DataValue)>; pub type Statement = sqlparser::ast::Statement; #[allow(dead_code)] @@ -130,9 +128,9 @@ impl State { } #[allow(clippy::too_many_arguments)] - pub(crate) fn build_plan( + pub(crate) fn build_plan>( stmt: &Statement, - args: &RefCell, + params: A, table_cache: &TableCache, view_cache: &ViewCache, meta_cache: &StatisticsMetaCache, @@ -149,7 +147,7 @@ impl State { table_functions, Arc::new(AtomicUsize::new(0)), ), - args, + ¶ms, None, ); /// Build a logical plan. @@ -254,17 +252,15 @@ impl State { stmts.pop().ok_or(DatabaseError::EmptyStatement) } - fn execute<'a>( + fn execute<'a, A: AsRef<[(&'static str, DataValue)]>>( &'a self, transaction: &'a mut S::TransactionType<'_>, stmt: &Statement, - args: Args, + params: A, ) -> Result<(SchemaRef, Executor<'a>), DatabaseError> { - let args = RefCell::new(args); - let mut plan = Self::build_plan( stmt, - &args, + params, self.table_cache(), self.view_cache(), self.meta_cache(), @@ -294,18 +290,18 @@ impl Database { pub fn run>(&self, sql: T) -> Result, DatabaseError> { let statement = self.prepare(sql)?; - self.execute(&statement, vec![]) + self.execute(&statement, &[]) } pub fn prepare>(&self, sql: T) -> Result { self.state.prepare(sql) } - fn execute( + fn execute>( &self, statement: &Statement, - args: Args, - ) -> Result, DatabaseError> { + params: A, + ) -> Result, DatabaseError> { let _guard = if matches!(command_type(statement)?, CommandType::DDL) { MetaDataLock::Write(self.mdl.write_arc()) } else { @@ -314,7 +310,7 @@ impl Database { let transaction = Box::into_raw(Box::new(self.storage.transaction()?)); let (schema, executor) = self.state - .execute(unsafe { &mut (*transaction) }, statement, args)?; + .execute(unsafe { &mut (*transaction) }, statement, params)?; let inner = Box::into_raw(Box::new(TransactionIter::new(schema, executor))); Ok(DatabaseIter { transaction, inner }) } @@ -388,24 +384,24 @@ impl DBTransaction<'_, S> { pub fn run>(&mut self, sql: T) -> Result, DatabaseError> { let statement = self.state.prepare(sql)?; - self.execute(&statement, vec![]) + self.execute(&statement, &[]) } pub fn prepare>(&self, sql: T) -> Result { self.state.prepare(sql) } - pub fn execute( + pub fn execute>( &mut self, statement: &Statement, - args: Args, + params: A, ) -> Result { if matches!(command_type(statement)?, CommandType::DDL) { return Err(DatabaseError::UnsupportedStmt( "`DDL` is not allowed to execute within a transaction".to_string(), )); } - let (schema, executor) = self.state.execute(&mut self.inner, statement, args)?; + let (schema, executor) = self.state.execute(&mut self.inner, statement, params)?; Ok(TransactionIter::new(schema, executor)) } @@ -587,7 +583,7 @@ pub(crate) mod test { { let statement = fnck_sql.prepare("explain select * from t1 where b > ?1")?; - let mut iter = fnck_sql.execute(&statement, vec![("?1", DataValue::Int32(Some(0)))])?; + let mut iter = fnck_sql.execute(&statement, &[("?1", DataValue::Int32(Some(0)))])?; assert_eq!( iter.next().unwrap()?.values[0].utf8().unwrap(), @@ -604,7 +600,7 @@ pub(crate) mod test { let mut iter = fnck_sql.execute( &statement, - vec![ + &[ ("?1", DataValue::Int32(Some(0))), ("?2", DataValue::Int32(Some(0))), ("?3", DataValue::Int32(Some(1))), @@ -624,7 +620,7 @@ pub(crate) mod test { let mut iter = fnck_sql.execute( &statement, - vec![ + &[ ("?1", DataValue::Int32(Some(9))), ("?2", DataValue::Int32(Some(0))), ("?3", DataValue::Int32(Some(1))), diff --git a/src/errors.rs b/src/errors.rs index 1ae98579..7bd53d48 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -4,7 +4,7 @@ use crate::types::LogicalType; use chrono::ParseError; use sqlparser::parser::ParserError; use std::num::{ParseFloatError, ParseIntError, TryFromIntError}; -use std::str::ParseBoolError; +use std::str::{ParseBoolError, Utf8Error}; use std::string::FromUtf8Error; #[derive(thiserror::Error, Debug)] @@ -41,6 +41,8 @@ pub enum DatabaseError { DefaultNotExist, #[error("column: {0} already exists")] DuplicateColumn(String), + #[error("table or view: {0} hash already exists")] + DuplicateSourceHash(String), #[error("index: {0} already exists")] DuplicateIndex(String), #[error("duplicate primary key")] @@ -161,6 +163,12 @@ pub enum DatabaseError { UnsupportedBinaryOperator(LogicalType, BinaryOperator), #[error("unsupported statement: {0}")] UnsupportedStmt(String), + #[error("utf8: {0}")] + Utf8( + #[source] + #[from] + Utf8Error, + ), #[error("values length not match, expect {0}, got {1}")] ValuesLenMismatch(usize, usize), #[error("the view already exists")] diff --git a/src/execution/dml/analyze.rs b/src/execution/dml/analyze.rs index 8e4519ab..dc890967 100644 --- a/src/execution/dml/analyze.rs +++ b/src/execution/dml/analyze.rs @@ -77,7 +77,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Analyze { builders.push(( index.id, throw!(index.column_exprs(&table)), - throw!(HistogramBuilder::new(index, None)), + HistogramBuilder::new(index, None), )); } diff --git a/src/execution/dml/copy_to_file.rs b/src/execution/dml/copy_to_file.rs index 7e824894..c72d28c2 100644 --- a/src/execution/dml/copy_to_file.rs +++ b/src/execution/dml/copy_to_file.rs @@ -1,19 +1,23 @@ use crate::binder::copy::FileFormat; use crate::errors::DatabaseError; -use crate::execution::{Executor, ReadExecutor}; +use crate::execution::{build_read, Executor, ReadExecutor}; use crate::planner::operator::copy_to_file::CopyToFileOperator; -use crate::storage::{Iter, StatisticsMetaCache, TableCache, Transaction, ViewCache}; +use crate::planner::LogicalPlan; +use crate::storage::{StatisticsMetaCache, TableCache, Transaction, ViewCache}; use crate::throw; use crate::types::tuple_builder::TupleBuilder; -use std::sync::Arc; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; pub struct CopyToFile { - pub op: CopyToFileOperator, + op: CopyToFileOperator, + input: LogicalPlan, } -impl From for CopyToFile { - fn from(op: CopyToFileOperator) -> Self { - CopyToFile { op } +impl From<(CopyToFileOperator, LogicalPlan)> for CopyToFile { + fn from((op, input): (CopyToFileOperator, LogicalPlan)) -> Self { + CopyToFile { op, input } } } @@ -27,20 +31,13 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for CopyToFile { #[coroutine] move || { let mut writer = throw!(self.create_writer()); + let CopyToFile { input, .. } = self; + + let mut coroutine = build_read(input, cache, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let tuple = throw!(tuple); - let mut iter = throw!(unsafe { &mut (*transaction) }.read( - cache.0, - Arc::new(self.op.table.clone()), - (None, None), - self.op - .schema_ref - .iter() - .enumerate() - .map(|(index, column_ref)| (index, column_ref.clone())) - .collect() - )); - - while let Some(tuple) = throw!(iter.next_tuple()) { throw!(writer .write_record( tuple @@ -96,6 +93,7 @@ mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef, ColumnRelation, ColumnSummary}; use crate::db::{DataBaseBuilder, ResultIter}; use crate::errors::DatabaseError; + use crate::planner::operator::table_scan::TableScanOperator; use crate::storage::Storage; use crate::types::LogicalType; use sqlparser::ast::CharLengthUnits; @@ -158,7 +156,6 @@ mod tests { let file_path = tmp_dir.path().join("test.csv"); let op = CopyToFileOperator { - table: "t1".to_string(), target: ExtSource { path: file_path.clone(), format: FileFormat::Csv { @@ -181,8 +178,14 @@ mod tests { let storage = db.storage; let mut transaction = storage.transaction()?; + let table = transaction + .table(&db.state.table_cache(), Arc::new("t1".to_string()))? + .unwrap(); - let executor = CopyToFile { op: op.clone() }; + let executor = CopyToFile { + op: op.clone(), + input: TableScanOperator::build(Arc::new("t1".to_string()), table), + }; let mut coroutine = executor.execute( ( db.state.table_cache(), diff --git a/src/execution/dml/update.rs b/src/execution/dml/update.rs index ba0ae31b..c028aead 100644 --- a/src/execution/dml/update.rs +++ b/src/execution/dml/update.rs @@ -96,7 +96,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Update { } for (i, column) in input_schema.iter().enumerate() { if let Some(expr) = exprs_map.get(&column.id()) { - tuple.values[i] = throw!(expr.eval(&tuple, &input_schema)); + tuple.values[i] = throw!(expr.eval(Some((&tuple, &input_schema)))); } } tuple.clear_id(); diff --git a/src/execution/dql/aggregate/hash_agg.rs b/src/execution/dql/aggregate/hash_agg.rs index 39e2abe0..9debaee2 100644 --- a/src/execution/dql/aggregate/hash_agg.rs +++ b/src/execution/dql/aggregate/hash_agg.rs @@ -69,14 +69,14 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashAggExecutor { if args.len() > 1 { throw!(Err(DatabaseError::UnsupportedStmt("currently aggregate functions only support a single Column as a parameter".to_string()))) } - values.push(throw!(args[0].eval(&tuple, &schema_ref))); + values.push(throw!(args[0].eval(Some((&tuple, &schema_ref))))); } else { unreachable!() } } let group_keys: Vec = throw!(groupby_exprs .iter() - .map(|expr| expr.eval(&tuple, &schema_ref)) + .map(|expr| expr.eval(Some((&tuple, &schema_ref)))) .try_collect()); let entry = match group_hash_accs.entry(group_keys) { diff --git a/src/execution/dql/aggregate/simple_agg.rs b/src/execution/dql/aggregate/simple_agg.rs index 2fb13dcd..d6063911 100644 --- a/src/execution/dql/aggregate/simple_agg.rs +++ b/src/execution/dql/aggregate/simple_agg.rs @@ -50,7 +50,8 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for SimpleAggExecutor { let values: Vec = throw!(agg_calls .iter() .map(|expr| match expr { - ScalarExpression::AggCall { args, .. } => args[0].eval(&tuple, &schema), + ScalarExpression::AggCall { args, .. } => + args[0].eval(Some((&tuple, &schema))), _ => unreachable!(), }) .try_collect()); diff --git a/src/execution/dql/filter.rs b/src/execution/dql/filter.rs index 57d3cc65..21ce815a 100644 --- a/src/execution/dql/filter.rs +++ b/src/execution/dql/filter.rs @@ -40,7 +40,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Filter { while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { let tuple = throw!(tuple); - if throw!(throw!(predicate.eval(&tuple, &schema)).is_true()) { + if throw!(throw!(predicate.eval(Some((&tuple, &schema)))).is_true()) { yield Ok(tuple); } } diff --git a/src/execution/dql/join/hash_join.rs b/src/execution/dql/join/hash_join.rs index 96ae3c64..b943e7b1 100644 --- a/src/execution/dql/join/hash_join.rs +++ b/src/execution/dql/join/hash_join.rs @@ -9,8 +9,8 @@ use crate::storage::{StatisticsMetaCache, TableCache, Transaction, ViewCache}; use crate::throw; use crate::types::tuple::{Schema, Tuple}; use crate::types::value::{DataValue, NULL_VALUE}; -use crate::utils::bit_vector::BitVector; use ahash::{HashMap, HashMapExt}; +use fixedbitset::FixedBitSet; use itertools::Itertools; use std::ops::Coroutine; use std::ops::CoroutineState; @@ -49,7 +49,7 @@ impl HashJoin { let mut values = Vec::with_capacity(on_keys.len()); for expr in on_keys { - values.push(expr.eval(tuple, schema)?); + values.push(expr.eval(Some((tuple, schema)))?); } Ok(values) } @@ -62,7 +62,7 @@ impl HashJoin { left_schema_len: usize, ) -> Result, DatabaseError> { if let (Some(expr), false) = (filter, matches!(join_ty, JoinType::Full | JoinType::Cross)) { - match &expr.eval(&tuple, schema)? { + match &expr.eval(Some((&tuple, schema)))? { DataValue::Boolean(Some(false) | None) => { let full_schema_len = schema.len(); @@ -193,7 +193,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashJoin { if *is_filtered { continue; } else { - bits_option = Some(BitVector::new(tuples.len())); + bits_option = Some(FixedBitSet::with_capacity(tuples.len())); } } JoinType::LeftAnti => continue, @@ -214,7 +214,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashJoin { left_schema_len )) { if let Some(bits) = bits_option.as_mut() { - bits.set_bit(i, true); + bits.insert(i); } else { yield Ok(tuple); } @@ -223,7 +223,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashJoin { if let Some(bits) = bits_option { let mut cnt = 0; tuples.retain(|_| { - let res = bits.get_bit(cnt); + let res = bits.contains(cnt); cnt += 1; res }); @@ -312,10 +312,12 @@ mod test { use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::rocksdb::RocksStorage; + use crate::storage::table_codec::BumpBytes; use crate::storage::Storage; use crate::types::value::DataValue; use crate::types::LogicalType; use crate::utils::lru::SharedLruCache; + use bumpalo::Bump; use std::hash::RandomState; use std::sync::Arc; use tempfile::TempDir; @@ -494,9 +496,10 @@ mod test { executor.execute((&table_cache, &view_cache, &meta_cache), &mut transaction), )?; + let arena = Bump::new(); assert_eq!(tuples.len(), 2); tuples.sort_by_key(|tuple| { - let mut bytes = Vec::new(); + let mut bytes = BumpBytes::new_in(&arena); tuple.values[0].memcomparable_encode(&mut bytes).unwrap(); bytes }); diff --git a/src/execution/dql/join/nested_loop_join.rs b/src/execution/dql/join/nested_loop_join.rs index c84b44e8..7df5df1e 100644 --- a/src/execution/dql/join/nested_loop_join.rs +++ b/src/execution/dql/join/nested_loop_join.rs @@ -13,7 +13,7 @@ use crate::storage::{StatisticsMetaCache, TableCache, Transaction, ViewCache}; use crate::throw; use crate::types::tuple::{Schema, SchemaRef, Tuple}; use crate::types::value::{DataValue, NULL_VALUE}; -use crate::utils::bit_vector::BitVector; +use fixedbitset::FixedBitSet; use itertools::Itertools; use std::ops::Coroutine; use std::ops::CoroutineState; @@ -146,7 +146,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { let right_schema_len = eq_cond.right_schema.len(); let mut left_coroutine = build_read(left_input, cache, transaction); - let mut bitmap: Option = None; + let mut bitmap: Option = None; let mut first_matches = Vec::new(); while let CoroutineState::Yielded(left_tuple) = @@ -177,7 +177,8 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { } (Some(filter), true) => { let new_tuple = Self::merge_tuple(&left_tuple, &right_tuple, &ty); - let value = throw!(filter.eval(&new_tuple, &output_schema_ref)); + let value = + throw!(filter.eval(Some((&new_tuple, &output_schema_ref)))); match &value { DataValue::Boolean(Some(true)) => { let tuple = match ty { @@ -215,7 +216,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { break; } if let Some(bits) = bitmap.as_mut() { - bits.set_bit(right_idx, true); + bits.insert(right_idx); } else if matches!(ty, JoinType::Full) { first_matches.push(right_idx); } @@ -227,7 +228,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { } if matches!(self.ty, JoinType::Full) && bitmap.is_none() { - bitmap = Some(BitVector::new(right_idx)); + bitmap = Some(FixedBitSet::with_capacity(right_idx)); } // handle no matched tuple case @@ -256,7 +257,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { if matches!(ty, JoinType::Full) { for idx in first_matches.into_iter() { - bitmap.as_mut().unwrap().set_bit(idx, true); + bitmap.as_mut().unwrap().insert(idx); } let mut right_coroutine = build_read(right_input.clone(), cache, transaction); @@ -264,7 +265,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { while let CoroutineState::Yielded(right_tuple) = Pin::new(&mut right_coroutine).resume(()) { - if !bitmap.as_ref().unwrap().get_bit(idx) { + if !bitmap.as_ref().unwrap().contains(idx) { let mut right_tuple: Tuple = throw!(right_tuple); let mut values = vec![NULL_VALUE.clone(); right_schema_len]; values.append(&mut right_tuple.values); diff --git a/src/execution/dql/projection.rs b/src/execution/dql/projection.rs index 9d3bf75e..3584912f 100644 --- a/src/execution/dql/projection.rs +++ b/src/execution/dql/projection.rs @@ -58,7 +58,7 @@ impl Projection { let mut values = Vec::with_capacity(exprs.len()); for expr in exprs.iter() { - values.push(expr.eval(tuple, schmea)?); + values.push(expr.eval(Some((tuple, schmea)))?); } Ok(values) } diff --git a/src/execution/dql/sort.rs b/src/execution/dql/sort.rs index a882be10..8bc40fd1 100644 --- a/src/execution/dql/sort.rs +++ b/src/execution/dql/sort.rs @@ -2,54 +2,60 @@ use crate::errors::DatabaseError; use crate::execution::{build_read, Executor, ReadExecutor}; use crate::planner::operator::sort::{SortField, SortOperator}; use crate::planner::LogicalPlan; +use crate::storage::table_codec::BumpBytes; use crate::storage::{StatisticsMetaCache, TableCache, Transaction, ViewCache}; use crate::throw; use crate::types::tuple::{Schema, Tuple}; -use itertools::Itertools; +use bumpalo::Bump; use std::cmp::Ordering; -use std::mem; use std::ops::Coroutine; use std::ops::CoroutineState; use std::pin::Pin; +pub(crate) type BumpVec<'bump, T> = bumpalo::collections::Vec<'bump, T>; + #[derive(Clone)] -pub(crate) struct NullableVec(Vec>); +pub(crate) struct NullableVec<'a, T>(BumpVec<'a, Option>); + +impl<'a, T> NullableVec<'a, T> { + #[inline] + pub(crate) fn new(arena: &'a Bump) -> NullableVec<'a, T> { + NullableVec(BumpVec::new_in(arena)) + } -impl NullableVec { - pub(crate) fn with_capacity(capacity: usize) -> NullableVec { - NullableVec(Vec::with_capacity(capacity)) + #[inline] + pub(crate) fn with_capacity(capacity: usize, arena: &'a Bump) -> NullableVec<'a, T> { + NullableVec(BumpVec::with_capacity_in(capacity, arena)) } + #[inline] pub(crate) fn put(&mut self, item: T) { self.0.push(Some(item)); } + #[inline] pub(crate) fn take(&mut self, offset: usize) -> T { self.0[offset].take().unwrap() } + #[inline] pub(crate) fn get(&self, offset: usize) -> &T { self.0[offset].as_ref().unwrap() } + #[inline] pub(crate) fn len(&self) -> usize { self.0.len() } } -impl Default for NullableVec { - fn default() -> Self { - NullableVec(Vec::default()) - } -} - -struct RemappingIterator { +struct RemappingIterator<'a> { pos: usize, - tuples: NullableVec<(usize, Tuple)>, - indices: Vec, + tuples: NullableVec<'a, (usize, Tuple)>, + indices: BumpVec<'a, usize>, } -impl Iterator for RemappingIterator { +impl Iterator for RemappingIterator<'_> { type Item = Tuple; fn next(&mut self) -> Option { @@ -66,26 +72,34 @@ impl Iterator for RemappingIterator { const BUCKET_SIZE: usize = u8::MAX as usize + 1; // LSD Radix Sort -pub(crate) fn radix_sort(mut tuples: Vec<(T, Vec)>) -> Vec { - if let Some(max_len) = tuples.iter().map(|(_, bytes)| bytes.len()).max() { +pub(crate) fn radix_sort<'a, T, A: AsRef<[u8]>>( + mut tuples: BumpVec<'a, (T, A)>, + arena: &'a Bump, +) -> BumpVec<'a, T> { + if let Some(max_len) = tuples.iter().map(|(_, bytes)| bytes.as_ref().len()).max() { // init buckets - let mut temp_buckets = Vec::with_capacity(BUCKET_SIZE); + let mut temp_buckets = BumpVec::with_capacity_in(BUCKET_SIZE, arena); for _ in 0..BUCKET_SIZE { - temp_buckets.push(Vec::new()); + temp_buckets.push(BumpVec::new_in(arena)); } for i in (0..max_len).rev() { - for (t, bytes) in tuples { + for (t, value) in tuples.drain(..) { + let bytes = value.as_ref(); let index = if bytes.len() > i { bytes[i] } else { 0 }; - temp_buckets[index as usize].push((t, bytes)); + temp_buckets[index as usize].push((t, value)); + } + for bucket in temp_buckets.iter_mut() { + tuples.append(bucket); } - - tuples = temp_buckets.iter_mut().flat_map(mem::take).collect_vec(); } - return tuples.into_iter().map(|(tuple, _)| tuple).collect_vec(); } - Vec::new() + let mut result = BumpVec::with_capacity_in(tuples.len(), arena); + for (item, _) in tuples { + result.push(item); + } + result } pub enum SortBy { @@ -94,20 +108,21 @@ pub enum SortBy { } impl SortBy { - pub(crate) fn sorted_tuples( + pub(crate) fn sorted_tuples<'a>( &self, + arena: &'a Bump, schema: &Schema, sort_fields: &[SortField], - mut tuples: NullableVec<(usize, Tuple)>, - ) -> Result>, DatabaseError> { + mut tuples: NullableVec<'a, (usize, Tuple)>, + ) -> Result + 'a>, DatabaseError> { match self { SortBy::Radix => { - let mut sort_keys = Vec::with_capacity(tuples.len()); + let mut sort_keys = BumpVec::with_capacity_in(tuples.len(), arena); for (i, tuple) in tuples.0.iter().enumerate() { debug_assert!(tuple.is_some()); - let mut full_key = Vec::new(); + let mut full_key = BumpVec::new_in(arena); for SortField { expr, @@ -115,10 +130,11 @@ impl SortBy { asc, } in sort_fields { - let mut key = Vec::new(); + let mut key = BumpBytes::new_in(arena); let tuple = tuple.as_ref().map(|(_, tuple)| tuple).unwrap(); - expr.eval(tuple, schema)?.memcomparable_encode(&mut key)?; + expr.eval(Some((tuple, schema)))? + .memcomparable_encode(&mut key)?; if !asc { for byte in key.iter_mut() { *byte ^= 0xFF; @@ -129,7 +145,7 @@ impl SortBy { } sort_keys.push((i, full_key)) } - let indices = radix_sort(sort_keys); + let indices = radix_sort(sort_keys, arena); Ok(Box::new(RemappingIterator { pos: 0, @@ -154,7 +170,7 @@ impl SortBy { debug_assert!(tuple.is_some()); let (_, tuple) = tuple.as_ref().unwrap(); - eval_values[x].push(expr.eval(tuple, schema)?); + eval_values[x].push(expr.eval(Some((tuple, schema)))?); } } @@ -209,6 +225,7 @@ impl SortBy { } pub struct Sort { + arena: Bump, sort_fields: Vec, limit: Option, input: LogicalPlan, @@ -217,6 +234,7 @@ pub struct Sort { impl From<(SortOperator, LogicalPlan)> for Sort { fn from((SortOperator { sort_fields, limit }, input): (SortOperator, LogicalPlan)) -> Self { Sort { + arena: Default::default(), sort_fields, limit, input, @@ -234,13 +252,15 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Sort { #[coroutine] move || { let Sort { + arena, sort_fields, limit, mut input, } = self; + let arena: *const Bump = &arena; let schema = input.output_schema().clone(); - let mut tuples = NullableVec::default(); + let mut tuples = NullableVec::new(unsafe { &*arena }); let mut offset = 0; let mut coroutine = build_read(input, cache, transaction); @@ -257,7 +277,9 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Sort { }; let mut limit = limit.unwrap_or(tuples.len()); - for tuple in throw!(sort_by.sorted_tuples(&schema, &sort_fields, tuples)) { + for tuple in + throw!(sort_by.sorted_tuples(unsafe { &*arena }, &schema, &sort_fields, tuples)) + { if limit != 0 { yield Ok(tuple); limit -= 1; @@ -272,24 +294,29 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Sort { mod test { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; use crate::errors::DatabaseError; - use crate::execution::dql::sort::{radix_sort, NullableVec, SortBy}; + use crate::execution::dql::sort::{radix_sort, BumpVec, NullableVec, SortBy}; use crate::expression::ScalarExpression; use crate::planner::operator::sort::SortField; use crate::types::tuple::Tuple; use crate::types::value::DataValue; use crate::types::LogicalType; + use bumpalo::Bump; use std::sync::Arc; #[test] fn test_radix_sort() { - let indices = vec![ - (0, "abc".as_bytes().to_vec()), - (1, "abz".as_bytes().to_vec()), - (2, "abe".as_bytes().to_vec()), - (3, "abcd".as_bytes().to_vec()), - ]; - - assert_eq!(radix_sort(indices), vec![0, 3, 2, 1]) + let arena = Bump::new(); + { + let mut indices = BumpVec::new_in(&arena); + indices.push((0, "abc".as_bytes().to_vec())); + indices.push((1, "abz".as_bytes().to_vec())); + indices.push((2, "abe".as_bytes().to_vec())); + indices.push((3, "abcd".as_bytes().to_vec())); + + let indices = radix_sort(indices, &arena); + assert_eq!(indices.as_slice(), &[0, 3, 2, 1]); + drop(indices) + } } #[test] @@ -309,11 +336,22 @@ mod test { true, ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(), ))]); - let tuples = NullableVec(vec![ - Some((0_usize, Tuple::new(None, vec![DataValue::Int32(None)]))), - Some((1_usize, Tuple::new(None, vec![DataValue::Int32(Some(0))]))), - Some((2_usize, Tuple::new(None, vec![DataValue::Int32(Some(1))]))), - ]); + + let arena = Bump::new(); + let mut inner = BumpVec::new_in(&arena); + inner.push(Some(( + 0_usize, + Tuple::new(None, vec![DataValue::Int32(None)]), + ))); + inner.push(Some(( + 1_usize, + Tuple::new(None, vec![DataValue::Int32(Some(0))]), + ))); + inner.push(Some(( + 2_usize, + Tuple::new(None, vec![DataValue::Int32(Some(1))]), + ))); + let tuples = NullableVec(inner); let fn_asc_and_nulls_last_eq = |mut iter: Box>| { if let Some(tuple) = iter.next() { @@ -386,21 +424,25 @@ mod test { // RadixSort fn_asc_and_nulls_first_eq(SortBy::Radix.sorted_tuples( + &arena, &schema, &fn_sort_fields(true, true), tuples.clone(), )?); fn_asc_and_nulls_last_eq(SortBy::Radix.sorted_tuples( + &arena, &schema, &fn_sort_fields(true, false), tuples.clone(), )?); fn_desc_and_nulls_first_eq(SortBy::Radix.sorted_tuples( + &arena, &schema, &fn_sort_fields(false, true), tuples.clone(), )?); fn_desc_and_nulls_last_eq(SortBy::Radix.sorted_tuples( + &arena, &schema, &fn_sort_fields(false, false), tuples.clone(), @@ -408,21 +450,25 @@ mod test { // FastSort fn_asc_and_nulls_first_eq(SortBy::Fast.sorted_tuples( + &arena, &schema, &fn_sort_fields(true, true), tuples.clone(), )?); fn_asc_and_nulls_last_eq(SortBy::Fast.sorted_tuples( + &arena, &schema, &fn_sort_fields(true, false), tuples.clone(), )?); fn_desc_and_nulls_first_eq(SortBy::Fast.sorted_tuples( + &arena, &schema, &fn_sort_fields(false, true), tuples.clone(), )?); fn_desc_and_nulls_last_eq(SortBy::Fast.sorted_tuples( + &arena, &schema, &&fn_sort_fields(false, false), tuples.clone(), @@ -466,47 +512,48 @@ mod test { ColumnDesc::new(LogicalType::Integer, None, false, None).unwrap(), )), ]); - let tuples = NullableVec(vec![ - Some(( - 0_usize, - Tuple::new(None, vec![DataValue::Int32(None), DataValue::Int32(None)]), - )), - Some(( - 1_usize, - Tuple::new( - None, - vec![DataValue::Int32(Some(0)), DataValue::Int32(None)], - ), - )), - Some(( - 2_usize, - Tuple::new( - None, - vec![DataValue::Int32(Some(1)), DataValue::Int32(None)], - ), - )), - Some(( - 3_usize, - Tuple::new( - None, - vec![DataValue::Int32(None), DataValue::Int32(Some(0))], - ), - )), - Some(( - 4_usize, - Tuple::new( - None, - vec![DataValue::Int32(Some(0)), DataValue::Int32(Some(0))], - ), - )), - Some(( - 5_usize, - Tuple::new( - None, - vec![DataValue::Int32(Some(1)), DataValue::Int32(Some(0))], - ), - )), - ]); + let arena = Bump::new(); + let mut inner = BumpVec::new_in(&arena); + inner.push(Some(( + 0_usize, + Tuple::new(None, vec![DataValue::Int32(None), DataValue::Int32(None)]), + ))); + inner.push(Some(( + 1_usize, + Tuple::new( + None, + vec![DataValue::Int32(Some(0)), DataValue::Int32(None)], + ), + ))); + inner.push(Some(( + 2_usize, + Tuple::new( + None, + vec![DataValue::Int32(Some(1)), DataValue::Int32(None)], + ), + ))); + inner.push(Some(( + 3_usize, + Tuple::new( + None, + vec![DataValue::Int32(None), DataValue::Int32(Some(0))], + ), + ))); + inner.push(Some(( + 4_usize, + Tuple::new( + None, + vec![DataValue::Int32(Some(0)), DataValue::Int32(Some(0))], + ), + ))); + inner.push(Some(( + 5_usize, + Tuple::new( + None, + vec![DataValue::Int32(Some(1)), DataValue::Int32(Some(0))], + ), + ))); + let tuples = NullableVec(inner); let fn_asc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq = |mut iter: Box>| { if let Some(tuple) = iter.next() { @@ -714,21 +761,25 @@ mod test { // RadixSort fn_asc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq(SortBy::Radix.sorted_tuples( + &arena, &schema, &fn_sort_fields(true, true, true, true), tuples.clone(), )?); fn_asc_1_and_nulls_last_1_and_asc_2_and_nulls_first_2_eq(SortBy::Radix.sorted_tuples( + &arena, &schema, &fn_sort_fields(true, false, true, true), tuples.clone(), )?); fn_desc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq(SortBy::Radix.sorted_tuples( + &arena, &schema, &fn_sort_fields(false, true, true, true), tuples.clone(), )?); fn_desc_1_and_nulls_last_1_and_asc_2_and_nulls_first_2_eq(SortBy::Radix.sorted_tuples( + &arena, &schema, &fn_sort_fields(false, false, true, true), tuples.clone(), @@ -736,21 +787,25 @@ mod test { // FastSort fn_asc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq(SortBy::Fast.sorted_tuples( + &arena, &schema, &fn_sort_fields(true, true, true, true), tuples.clone(), )?); fn_asc_1_and_nulls_last_1_and_asc_2_and_nulls_first_2_eq(SortBy::Fast.sorted_tuples( + &arena, &schema, &fn_sort_fields(true, false, true, true), tuples.clone(), )?); fn_desc_1_and_nulls_first_1_and_asc_2_and_nulls_first_2_eq(SortBy::Fast.sorted_tuples( + &arena, &schema, &fn_sort_fields(false, true, true, true), tuples.clone(), )?); fn_desc_1_and_nulls_last_1_and_asc_2_and_nulls_first_2_eq(SortBy::Fast.sorted_tuples( + &arena, &schema, &fn_sort_fields(false, false, true, true), tuples.clone(), diff --git a/src/execution/mod.rs b/src/execution/mod.rs index 1f25ae89..7fab3246 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -194,7 +194,11 @@ pub fn build_write<'a, T: Transaction + 'a>( Operator::DropView(op) => DropView::from(op).execute_mut(cache, transaction), Operator::Truncate(op) => Truncate::from(op).execute_mut(cache, transaction), Operator::CopyFromFile(op) => CopyFromFile::from(op).execute_mut(cache, transaction), - Operator::CopyToFile(op) => CopyToFile::from(op).execute(cache, transaction), + Operator::CopyToFile(op) => { + let input = childrens.pop_only(); + + CopyToFile::from((op, input)).execute(cache, transaction) + } Operator::Analyze(op) => { let input = childrens.pop_only(); diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index 747a6edb..bcf77587 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -11,17 +11,10 @@ use regex::Regex; use sqlparser::ast::{CharLengthUnits, TrimWhereField}; use std::cmp; use std::cmp::Ordering; -use std::sync::LazyLock; - -static NULL_VALUE: LazyLock = LazyLock::new(|| DataValue::Null); macro_rules! eval_to_num { - ($num_expr:expr, $tuple:expr, $schema:expr) => { - if let Some(num_i32) = $num_expr - .eval($tuple, $schema)? - .cast(&LogicalType::Integer)? - .i32() - { + ($num_expr:expr, $tuple:expr) => { + if let Some(num_i32) = $num_expr.eval($tuple)?.cast(&LogicalType::Integer)?.i32() { num_i32 } else { return Ok(DataValue::Utf8 { @@ -34,7 +27,7 @@ macro_rules! eval_to_num { } impl ScalarExpression { - pub fn eval(&self, tuple: &Tuple, schema: &[ColumnRef]) -> Result { + pub fn eval(&self, tuple: Option<(&Tuple, &[ColumnRef])>) -> Result { let check_cast = |value: DataValue, return_type: &LogicalType| { if value.logical_type() != *return_type { return value.cast(return_type); @@ -45,16 +38,21 @@ impl ScalarExpression { match self { ScalarExpression::Constant(val) => Ok(val.clone()), ScalarExpression::ColumnRef(col) => { + let Some((tuple, schema)) = tuple else { + return Ok(DataValue::Null); + }; let value = schema .iter() .find_position(|tul_col| tul_col.summary() == col.summary()) - .map(|(i, _)| &tuple.values[i]) - .unwrap_or(&NULL_VALUE) - .clone(); + .map(|(i, _)| tuple.values[i].clone()) + .unwrap_or(DataValue::Null); Ok(value) } ScalarExpression::Alias { expr, alias } => { + let Some((tuple, schema)) = tuple else { + return Ok(DataValue::Null); + }; if let Some(value) = schema .iter() .find_position(|tul_col| match alias { @@ -65,24 +63,22 @@ impl ScalarExpression { alias_expr.output_column().summary() == tul_col.summary() } }) - .map(|(i, _)| &tuple.values[i]) + .map(|(i, _)| tuple.values[i].clone()) { return Ok(value.clone()); } - expr.eval(tuple, schema) - } - ScalarExpression::TypeCast { expr, ty, .. } => { - Ok(expr.eval(tuple, schema)?.cast(ty)?) + expr.eval(Some((tuple, schema))) } + ScalarExpression::TypeCast { expr, ty, .. } => Ok(expr.eval(tuple)?.cast(ty)?), ScalarExpression::Binary { left_expr, right_expr, evaluator, .. } => { - let left = left_expr.eval(tuple, schema)?; - let right = right_expr.eval(tuple, schema)?; + let left = left_expr.eval(tuple)?; + let right = right_expr.eval(tuple)?; Ok(evaluator .as_ref() @@ -91,7 +87,7 @@ impl ScalarExpression { .binary_eval(&left, &right)) } ScalarExpression::IsNull { expr, negated } => { - let mut is_null = expr.eval(tuple, schema)?.is_null(); + let mut is_null = expr.eval(tuple)?.is_null(); if *negated { is_null = !is_null; } @@ -102,13 +98,13 @@ impl ScalarExpression { args, negated, } => { - let value = expr.eval(tuple, schema)?; + let value = expr.eval(tuple)?; if value.is_null() { return Ok(DataValue::Boolean(None)); } let mut is_in = false; for arg in args { - let arg_value = arg.eval(tuple, schema)?; + let arg_value = arg.eval(tuple)?; if arg_value.is_null() { return Ok(DataValue::Boolean(None)); @@ -126,7 +122,7 @@ impl ScalarExpression { ScalarExpression::Unary { expr, evaluator, .. } => { - let value = expr.eval(tuple, schema)?; + let value = expr.eval(tuple)?; Ok(evaluator .as_ref() @@ -143,9 +139,9 @@ impl ScalarExpression { right_expr, negated, } => { - let value = expr.eval(tuple, schema)?; - let left = left_expr.eval(tuple, schema)?; - let right = right_expr.eval(tuple, schema)?; + let value = expr.eval(tuple)?; + let left = left_expr.eval(tuple)?; + let right = right_expr.eval(tuple)?; let mut is_between = match ( value.partial_cmp(&left).map(Ordering::is_ge), @@ -166,12 +162,12 @@ impl ScalarExpression { from_expr, } => { if let Some(mut string) = expr - .eval(tuple, schema)? + .eval(tuple)? .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))? .utf8() { if let Some(from_expr) = from_expr { - let mut from = eval_to_num!(from_expr, tuple, schema).saturating_sub(1); + let mut from = eval_to_num!(from_expr, tuple).saturating_sub(1); let len_i = string.len() as i32; while from < 0 { @@ -187,8 +183,7 @@ impl ScalarExpression { string = string.split_off(from as usize); } if let Some(for_expr) = for_expr { - let for_i = - cmp::min(eval_to_num!(for_expr, tuple, schema) as usize, string.len()); + let for_i = cmp::min(eval_to_num!(for_expr, tuple) as usize, string.len()); let _ = string.split_off(for_i); } @@ -208,7 +203,7 @@ impl ScalarExpression { ScalarExpression::Position { expr, in_expr } => { let unpack = |expr: &ScalarExpression| -> Result { Ok(expr - .eval(tuple, schema)? + .eval(tuple)? .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))? .utf8() .unwrap_or("".to_owned())) @@ -226,14 +221,14 @@ impl ScalarExpression { } => { let mut value = None; if let Some(string) = expr - .eval(tuple, schema)? + .eval(tuple)? .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))? .utf8() { let mut trim_what = String::from(" "); if let Some(trim_what_expr) = trim_what_expr { trim_what = trim_what_expr - .eval(tuple, schema)? + .eval(tuple)? .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))? .utf8() .unwrap_or_default(); @@ -263,23 +258,24 @@ impl ScalarExpression { unit: CharLengthUnits::Characters, }) } - ScalarExpression::Reference { pos, .. } => Ok(tuple - .values - .get(*pos) - .unwrap_or_else(|| &NULL_VALUE) - .clone()), + ScalarExpression::Reference { pos, .. } => { + let Some((tuple, _)) = tuple else { + return Ok(DataValue::Null); + }; + Ok(tuple.values.get(*pos).cloned().unwrap_or(DataValue::Null)) + } ScalarExpression::Tuple(exprs) => { let mut values = Vec::with_capacity(exprs.len()); for expr in exprs { - values.push(expr.eval(tuple, schema)?); + values.push(expr.eval(tuple)?); } Ok(DataValue::Tuple( (!values.is_empty()).then_some((values, false)), )) } ScalarExpression::ScalaFunction(ScalarFunction { inner, args, .. }) => { - inner.eval(args, tuple, schema)?.cast(inner.return_type()) + inner.eval(args, tuple)?.cast(inner.return_type()) } ScalarExpression::Empty => unreachable!(), ScalarExpression::If { @@ -288,10 +284,10 @@ impl ScalarExpression { right_expr, ty, } => { - if condition.eval(tuple, schema)?.is_true()? { - check_cast(left_expr.eval(tuple, schema)?, ty) + if condition.eval(tuple)?.is_true()? { + check_cast(left_expr.eval(tuple)?, ty) } else { - check_cast(right_expr.eval(tuple, schema)?, ty) + check_cast(right_expr.eval(tuple)?, ty) } } ScalarExpression::IfNull { @@ -299,10 +295,10 @@ impl ScalarExpression { right_expr, ty, } => { - let mut value = left_expr.eval(tuple, schema)?; + let mut value = left_expr.eval(tuple)?; if value.is_null() { - value = right_expr.eval(tuple, schema)?; + value = right_expr.eval(tuple)?; } check_cast(value, ty) } @@ -311,10 +307,10 @@ impl ScalarExpression { right_expr, ty, } => { - let mut value = left_expr.eval(tuple, schema)?; + let mut value = left_expr.eval(tuple)?; - if right_expr.eval(tuple, schema)? == value { - value = NULL_VALUE.clone(); + if right_expr.eval(tuple)? == value { + value = DataValue::Null; } check_cast(value, ty) } @@ -322,14 +318,14 @@ impl ScalarExpression { let mut value = None; for expr in exprs { - let temp = expr.eval(tuple, schema)?; + let temp = expr.eval(tuple)?; if !temp.is_null() { value = Some(temp); break; } } - check_cast(value.unwrap_or_else(|| NULL_VALUE.clone()), ty) + check_cast(value.unwrap_or(DataValue::Null), ty) } ScalarExpression::CaseWhen { operand_expr, @@ -341,10 +337,10 @@ impl ScalarExpression { let mut result = None; if let Some(expr) = operand_expr { - operand_value = Some(expr.eval(tuple, schema)?); + operand_value = Some(expr.eval(tuple)?); } for (when_expr, result_expr) in expr_pairs { - let mut when_value = when_expr.eval(tuple, schema)?; + let mut when_value = when_expr.eval(tuple)?; let is_true = if let Some(operand_value) = &operand_value { let ty = operand_value.logical_type(); let evaluator = @@ -361,16 +357,16 @@ impl ScalarExpression { when_value.is_true()? }; if is_true { - result = Some(result_expr.eval(tuple, schema)?); + result = Some(result_expr.eval(tuple)?); break; } } if result.is_none() { if let Some(expr) = else_expr { - result = Some(expr.eval(tuple, schema)?); + result = Some(expr.eval(tuple)?); } } - check_cast(result.unwrap_or_else(|| NULL_VALUE.clone()), ty) + check_cast(result.unwrap_or(DataValue::Null), ty) } ScalarExpression::TableFunction(_) => unreachable!(), } diff --git a/src/expression/function/scala.rs b/src/expression/function/scala.rs index 60409b69..f351ad24 100644 --- a/src/expression/function/scala.rs +++ b/src/expression/function/scala.rs @@ -54,8 +54,7 @@ pub trait ScalarFunctionImpl: Debug + Send + Sync { fn eval( &self, args: &[ScalarExpression], - tuple: &Tuple, - schema: &[ColumnRef], + tuple: Option<(&Tuple, &[ColumnRef])>, ) -> Result; // TODO: Exploiting monotonicity when optimizing `ScalarFunctionImpl::monotonicity()` diff --git a/src/function/char_length.rs b/src/function/char_length.rs index d4eeb834..dd5b7693 100644 --- a/src/function/char_length.rs +++ b/src/function/char_length.rs @@ -35,11 +35,9 @@ impl ScalarFunctionImpl for CharLength { fn eval( &self, exprs: &[ScalarExpression], - tuples: &Tuple, - columns: &[ColumnRef], + tuples: Option<(&Tuple, &[ColumnRef])>, ) -> Result { - let value = exprs[0].eval(tuples, columns)?; - let mut value = DataValue::clone(&value); + let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { value = value.cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?; } diff --git a/src/function/current_date.rs b/src/function/current_date.rs index 7e7a4d85..f1519a93 100644 --- a/src/function/current_date.rs +++ b/src/function/current_date.rs @@ -37,8 +37,7 @@ impl ScalarFunctionImpl for CurrentDate { fn eval( &self, _: &[ScalarExpression], - _: &Tuple, - _: &[ColumnRef], + _: Option<(&Tuple, &[ColumnRef])>, ) -> Result { Ok(DataValue::Date32(Some(Local::now().num_days_from_ce()))) } diff --git a/src/function/lower.rs b/src/function/lower.rs index 655c7506..57aa83fc 100644 --- a/src/function/lower.rs +++ b/src/function/lower.rs @@ -37,11 +37,9 @@ impl ScalarFunctionImpl for Lower { fn eval( &self, exprs: &[ScalarExpression], - tuples: &Tuple, - columns: &[ColumnRef], + tuples: Option<(&Tuple, &[ColumnRef])>, ) -> Result { - let value = exprs[0].eval(tuples, columns)?; - let mut value = DataValue::clone(&value); + let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { value = value.cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?; } diff --git a/src/function/numbers.rs b/src/function/numbers.rs index adbda58f..d5e7cfd5 100644 --- a/src/function/numbers.rs +++ b/src/function/numbers.rs @@ -5,8 +5,8 @@ use crate::errors::DatabaseError; use crate::expression::function::table::TableFunctionImpl; use crate::expression::function::FunctionSummary; use crate::expression::ScalarExpression; +use crate::types::tuple::SchemaRef; use crate::types::tuple::Tuple; -use crate::types::tuple::{SchemaRef, EMPTY_TUPLE}; use crate::types::value::DataValue; use crate::types::LogicalType; use serde::Deserialize; @@ -52,7 +52,7 @@ impl TableFunctionImpl for Numbers { &self, args: &[ScalarExpression], ) -> Result>>, DatabaseError> { - let mut value = args[0].eval(&EMPTY_TUPLE, &[])?; + let mut value = args[0].eval(None)?; if value.logical_type() != LogicalType::Integer { value = value.cast(&LogicalType::Integer)?; diff --git a/src/function/upper.rs b/src/function/upper.rs index 531cc9b0..29a3e9b5 100644 --- a/src/function/upper.rs +++ b/src/function/upper.rs @@ -37,11 +37,9 @@ impl ScalarFunctionImpl for Upper { fn eval( &self, exprs: &[ScalarExpression], - tuples: &Tuple, - columns: &[ColumnRef], + tuples: Option<(&Tuple, &[ColumnRef])>, ) -> Result { - let value = exprs[0].eval(tuples, columns)?; - let mut value = DataValue::clone(&value); + let mut value = exprs[0].eval(tuples)?; if !matches!(value.logical_type(), LogicalType::Varchar(_, _)) { value = value.cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?; } diff --git a/src/macros/mod.rs b/src/macros/mod.rs index b3f7499e..c3abfc50 100644 --- a/src/macros/mod.rs +++ b/src/macros/mod.rs @@ -93,11 +93,11 @@ macro_rules! scala_function { #[typetag::serde] impl ::fnck_sql::expression::function::scala::ScalarFunctionImpl for $struct_name { #[allow(unused_variables, clippy::redundant_closure_call)] - fn eval(&self, args: &[::fnck_sql::expression::ScalarExpression], tuple: &::fnck_sql::types::tuple::Tuple, schema: &[::fnck_sql::catalog::column::ColumnRef]) -> Result<::fnck_sql::types::value::DataValue, ::fnck_sql::errors::DatabaseError> { + fn eval(&self, args: &[::fnck_sql::expression::ScalarExpression], tuple: Option<(&::fnck_sql::types::tuple::Tuple, &[::fnck_sql::catalog::column::ColumnRef])>) -> Result<::fnck_sql::types::value::DataValue, ::fnck_sql::errors::DatabaseError> { let mut _index = 0; $closure($({ - let mut value = args[_index].eval(tuple, schema)?; + let mut value = args[_index].eval(tuple)?; _index += 1; if value.logical_type() != $arg_ty { @@ -184,7 +184,7 @@ macro_rules! table_function { let mut _index = 0; $closure($({ - let mut value = args[_index].eval(&::fnck_sql::types::tuple::EMPTY_TUPLE, &[])?; + let mut value = args[_index].eval(None)?; _index += 1; if value.logical_type() != $arg_ty { diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs index 75690900..fb66ae21 100644 --- a/src/optimizer/core/histogram.rs +++ b/src/optimizer/core/histogram.rs @@ -1,23 +1,28 @@ use crate::errors::DatabaseError; -use crate::execution::dql::sort::{radix_sort, NullableVec}; +use crate::execution::dql::sort::{radix_sort, BumpVec, NullableVec}; use crate::expression::range_detacher::Range; use crate::expression::BinaryOperator; use crate::optimizer::core::cm_sketch::CountMinSketch; +use crate::storage::table_codec::BumpBytes; use crate::types::evaluator::EvaluatorFactory; use crate::types::index::{IndexId, IndexMeta}; use crate::types::value::DataValue; use crate::types::LogicalType; +use bumpalo::Bump; use fnck_sql_serde_macros::ReferenceSerialization; use ordered_float::OrderedFloat; use std::collections::Bound; use std::{cmp, mem}; pub struct HistogramBuilder { + arena: Bump, index_id: IndexId, + capacity: Option, + is_init: bool, null_count: usize, - values: NullableVec<(usize, DataValue)>, - sort_keys: Vec<(usize, Vec)>, + values: Option>, + sort_keys: Option)>>, value_index: usize, } @@ -47,25 +52,56 @@ struct Bucket { } impl HistogramBuilder { - pub fn new(index_meta: &IndexMeta, capacity: Option) -> Result { - Ok(Self { + #[allow(clippy::missing_transmute_annotations)] + pub(crate) fn init(&mut self) { + if self.is_init { + return; + } + let (values, sort_keys) = self + .capacity + .map(|capacity| { + ( + NullableVec::<(usize, DataValue)>::with_capacity(capacity, &self.arena), + BumpVec::<(usize, BumpBytes<'static>)>::with_capacity_in(capacity, &self.arena), + ) + }) + .unwrap_or_else(|| (NullableVec::new(&self.arena), BumpVec::new_in(&self.arena))); + + self.values = Some(unsafe { mem::transmute::<_, _>(values) }); + self.sort_keys = Some(unsafe { mem::transmute::<_, _>(sort_keys) }); + self.is_init = true; + } + + pub fn new(index_meta: &IndexMeta, capacity: Option) -> Self { + Self { + arena: Default::default(), index_id: index_meta.id, + capacity, + is_init: false, null_count: 0, - values: capacity.map(NullableVec::with_capacity).unwrap_or_default(), - sort_keys: capacity.map(Vec::with_capacity).unwrap_or_default(), + values: None, + sort_keys: None, value_index: 0, - }) + } } + #[allow(clippy::missing_transmute_annotations)] pub fn append(&mut self, value: &DataValue) -> Result<(), DatabaseError> { + self.init(); if value.is_null() { self.null_count += 1; } else { - let mut bytes = Vec::new(); + let mut bytes = BumpBytes::new_in(&self.arena); value.memcomparable_encode(&mut bytes)?; - self.values.put((self.value_index, value.clone())); - self.sort_keys.push((self.value_index, bytes)) + self.values + .as_mut() + .unwrap() + .put((self.value_index, value.clone())); + self.sort_keys + .as_mut() + .unwrap() + .push((self.value_index, unsafe { mem::transmute::<_, _>(bytes) })) } self.value_index += 1; @@ -74,32 +110,33 @@ impl HistogramBuilder { } pub fn build( - self, + mut self, number_of_buckets: usize, ) -> Result<(Histogram, CountMinSketch), DatabaseError> { - if number_of_buckets > self.values.len() { - return Err(DatabaseError::TooManyBuckets( - number_of_buckets, - self.values.len(), - )); + self.init(); + let values_len = self.values.as_ref().unwrap().len(); + if number_of_buckets > values_len { + return Err(DatabaseError::TooManyBuckets(number_of_buckets, values_len)); } - let mut sketch = CountMinSketch::new(self.values.len(), 0.95, 1.0); + let mut sketch = CountMinSketch::new(values_len, 0.95, 1.0); let HistogramBuilder { + arena, index_id, null_count, - mut values, + values, sort_keys, .. } = self; + let mut values = values.unwrap(); + let sort_keys = sort_keys.unwrap(); let mut buckets = Vec::with_capacity(number_of_buckets); - let values_len = values.len(); let bucket_len = if values_len % number_of_buckets == 0 { values_len / number_of_buckets } else { (values_len + number_of_buckets) / number_of_buckets }; - let sorted_indices = radix_sort(sort_keys); + let sorted_indices = radix_sort(sort_keys, &arena); for i in 0..number_of_buckets { let mut bucket = Bucket::empty(); @@ -135,6 +172,9 @@ impl HistogramBuilder { } sketch.add(&DataValue::Null, self.null_count); + drop(values); + drop(arena); + Ok(( Histogram { index_id, @@ -485,7 +525,7 @@ mod tests { #[test] fn test_sort_tuples_on_histogram() -> Result<(), DatabaseError> { - let mut builder = HistogramBuilder::new(&index_meta(), Some(15))?; + let mut builder = HistogramBuilder::new(&index_meta(), Some(15)); builder.append(&DataValue::Int32(Some(0)))?; builder.append(&DataValue::Int32(Some(1)))?; @@ -551,7 +591,7 @@ mod tests { #[test] fn test_rev_sort_tuples_on_histogram() -> Result<(), DatabaseError> { - let mut builder = HistogramBuilder::new(&index_meta(), Some(15))?; + let mut builder = HistogramBuilder::new(&index_meta(), Some(15)); builder.append(&DataValue::Int32(Some(14)))?; builder.append(&DataValue::Int32(Some(13)))?; @@ -615,7 +655,7 @@ mod tests { #[test] fn test_non_average_on_histogram() -> Result<(), DatabaseError> { - let mut builder = HistogramBuilder::new(&index_meta(), Some(15))?; + let mut builder = HistogramBuilder::new(&index_meta(), Some(15)); builder.append(&DataValue::Int32(Some(14)))?; builder.append(&DataValue::Int32(Some(13)))?; @@ -674,7 +714,7 @@ mod tests { #[test] fn test_collect_count() -> Result<(), DatabaseError> { - let mut builder = HistogramBuilder::new(&index_meta(), Some(15))?; + let mut builder = HistogramBuilder::new(&index_meta(), Some(15)); builder.append(&DataValue::Int32(Some(14)))?; builder.append(&DataValue::Int32(Some(13)))?; diff --git a/src/optimizer/core/memo.rs b/src/optimizer/core/memo.rs index d06dde5c..43dbe26f 100644 --- a/src/optimizer/core/memo.rs +++ b/src/optimizer/core/memo.rs @@ -2,7 +2,6 @@ use crate::errors::DatabaseError; use crate::optimizer::core::pattern::PatternMatcher; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::optimizer::heuristic::batch::HepMatchOrder; use crate::optimizer::heuristic::graph::{HepGraph, HepNodeId}; use crate::optimizer::heuristic::matcher::HepMatcher; use crate::optimizer::rule::implementation::ImplementationRuleImpl; @@ -47,7 +46,7 @@ impl Memo { return Err(DatabaseError::EmptyPlan); } - for node_id in graph.nodes_iter(HepMatchOrder::BottomUp, None) { + for node_id in graph.nodes_iter(None) { for rule in implementations { if HepMatcher::new(rule.pattern(), node_id, graph).match_opt_expr() { let op = graph.operator(node_id); @@ -98,7 +97,6 @@ mod tests { use crate::types::value::DataValue; use crate::types::LogicalType; use petgraph::stable_graph::NodeIndex; - use std::cell::RefCell; use std::ops::Bound; use std::sync::atomic::AtomicUsize; use std::sync::Arc; @@ -132,7 +130,6 @@ mod tests { }; let scala_functions = Default::default(); let table_functions = Default::default(); - let args = RefCell::new(Vec::new()); let mut binder = Binder::new( BinderContext::new( database.state.table_cache(), @@ -142,7 +139,7 @@ mod tests { &table_functions, Arc::new(AtomicUsize::new(0)), ), - &args, + &[], None, ); // where: c1 => 2, (40, +inf) diff --git a/src/optimizer/core/statistics_meta.rs b/src/optimizer/core/statistics_meta.rs index a545b09d..ab2b3285 100644 --- a/src/optimizer/core/statistics_meta.rs +++ b/src/optimizer/core/statistics_meta.rs @@ -128,7 +128,7 @@ mod tests { ty: IndexType::PrimaryKey { is_multiple: false }, }; - let mut builder = HistogramBuilder::new(&index, Some(15))?; + let mut builder = HistogramBuilder::new(&index, Some(15)); builder.append(&Arc::new(DataValue::Int32(Some(14))))?; builder.append(&Arc::new(DataValue::Int32(Some(13))))?; diff --git a/src/optimizer/heuristic/batch.rs b/src/optimizer/heuristic/batch.rs index 84fbc763..85b92f17 100644 --- a/src/optimizer/heuristic/batch.rs +++ b/src/optimizer/heuristic/batch.rs @@ -23,40 +23,28 @@ impl HepBatch { } } -#[derive(Clone)] -pub struct HepBatchStrategy { +#[derive(Clone, Copy)] +pub enum HepBatchStrategy { /// An execution_ap strategy for rules that indicates the maximum number of executions. If the /// execution_ap reaches fix point (i.e. converge) before maxIterations, it will stop. /// /// Fix Point means that plan tree not changed after applying all rules. - pub max_iteration: usize, - /// An order to traverse the plan tree nodes. - pub match_order: HepMatchOrder, + MaxTimes(usize), + #[allow(dead_code)] + LoopIfApplied, } impl HepBatchStrategy { pub fn once_topdown() -> Self { - HepBatchStrategy { - max_iteration: 1, - match_order: HepMatchOrder::TopDown, - } + HepBatchStrategy::MaxTimes(1) } pub fn fix_point_topdown(max_iteration: usize) -> Self { - HepBatchStrategy { - max_iteration, - match_order: HepMatchOrder::TopDown, - } + HepBatchStrategy::MaxTimes(max_iteration) } -} -#[derive(Clone, Copy)] -pub enum HepMatchOrder { - /// Match from root down. A match attempt at an ancestor always precedes all match attempts at - /// its descendants. - TopDown, - /// Match from leaves up. A match attempt at a descendant precedes all match attempts at its - /// ancestors. #[allow(dead_code)] - BottomUp, + pub fn loop_if_applied() -> Self { + HepBatchStrategy::LoopIfApplied + } } diff --git a/src/optimizer/heuristic/graph.rs b/src/optimizer/heuristic/graph.rs index 841d6d74..f6de8c61 100644 --- a/src/optimizer/heuristic/graph.rs +++ b/src/optimizer/heuristic/graph.rs @@ -1,7 +1,7 @@ use crate::optimizer::core::memo::Memo; -use crate::optimizer::heuristic::batch::HepMatchOrder; use crate::planner::operator::Operator; use crate::planner::{Childrens, LogicalPlan}; +use fixedbitset::FixedBitSet; use itertools::Itertools; use petgraph::stable_graph::{NodeIndex, StableDiGraph}; use petgraph::visit::{Bfs, EdgeRef}; @@ -136,29 +136,6 @@ impl HepGraph { self.graph.remove_node(source_id) } - /// Traverse the graph in BFS order. - fn bfs(&self, start: HepNodeId) -> Vec { - let mut ids = Vec::with_capacity(self.graph.node_count()); - let mut iter = Bfs::new(&self.graph, start); - while let Some(node_id) = iter.next(&self.graph) { - ids.push(node_id); - } - ids - } - - /// Use bfs to traverse the graph and return node ids - pub fn nodes_iter( - &self, - order: HepMatchOrder, - start_option: Option, - ) -> Box> { - let ids = self.bfs(start_option.unwrap_or(self.root_index)); - match order { - HepMatchOrder::TopDown => Box::new(ids.into_iter()), - HepMatchOrder::BottomUp => Box::new(ids.into_iter().rev()), - } - } - #[allow(dead_code)] pub fn node(&self, node_id: HepNodeId) -> Option<&Operator> { self.graph.node_weight(node_id) @@ -200,6 +177,15 @@ impl HepGraph { self.build_childrens(self.root_index, memo) } + /// Use bfs to traverse the graph and return node ids + pub fn nodes_iter(&self, start_option: Option) -> HepGraphIter { + let inner = Bfs::new(&self.graph, start_option.unwrap_or(self.root_index)); + HepGraphIter { + inner, + graph: &self.graph, + } + } + fn build_childrens(&mut self, start: HepNodeId, memo: Option<&Memo>) -> Option { let physical_option = memo.and_then(|memo| memo.cheapest_physical_option(&start)); @@ -230,6 +216,19 @@ impl HepGraph { } } +pub struct HepGraphIter<'a> { + inner: Bfs, + graph: &'a StableDiGraph, +} + +impl Iterator for HepGraphIter<'_> { + type Item = HepNodeId; + + fn next(&mut self) -> Option { + self.inner.next(self.graph) + } +} + #[cfg(test)] mod tests { use crate::binder::test::build_t1_table; diff --git a/src/optimizer/heuristic/matcher.rs b/src/optimizer/heuristic/matcher.rs index 1e3d0493..c8195dd7 100644 --- a/src/optimizer/heuristic/matcher.rs +++ b/src/optimizer/heuristic/matcher.rs @@ -1,5 +1,4 @@ use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate, PatternMatcher}; -use crate::optimizer::heuristic::batch::HepMatchOrder; use crate::optimizer::heuristic::graph::{HepGraph, HepNodeId}; /// Use pattern to determines which rule can be applied @@ -30,10 +29,7 @@ impl PatternMatcher for HepMatcher<'_, '_> { match &self.pattern.children { PatternChildrenPredicate::Recursive => { // check - for node_id in self - .graph - .nodes_iter(HepMatchOrder::TopDown, Some(self.start_id)) - { + for node_id in self.graph.nodes_iter(Some(self.start_id)) { if !(self.pattern.predicate)(self.graph.operator(node_id)) { return false; } diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index 16d5f4c5..116d310e 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -47,14 +47,16 @@ impl HepOptimizer { loader: Option<&StatisticMetaLoader<'_, T>>, ) -> Result { for ref batch in self.batches { - let mut batch_over = false; - let mut iteration = 1usize; - - while iteration <= batch.strategy.max_iteration && !batch_over { - if Self::apply_batch(&mut self.graph, batch)? { - iteration += 1; - } else { - batch_over = true + match batch.strategy { + HepBatchStrategy::MaxTimes(max_iteration) => { + for _ in 0..max_iteration { + if !Self::apply_batch(&mut self.graph, batch)? { + break; + } + } + } + HepBatchStrategy::LoopIfApplied => { + while Self::apply_batch(&mut self.graph, batch)? {} } } } @@ -73,22 +75,21 @@ impl HepOptimizer { } fn apply_batch( - graph: &mut HepGraph, - HepBatch { - rules, strategy, .. - }: &HepBatch, + graph: *mut HepGraph, + HepBatch { rules, .. }: &HepBatch, ) -> Result { - let before_version = graph.version; + let before_version = unsafe { &*graph }.version; for rule in rules { - for node_id in graph.nodes_iter(strategy.match_order, None) { - if Self::apply_rule(graph, rule, node_id)? { + // SAFETY: after successfully modifying the graph, the iterator is no longer used. + for node_id in unsafe { &*graph }.nodes_iter(None) { + if Self::apply_rule(unsafe { &mut *graph }, rule, node_id)? { break; } } } - Ok(before_version != graph.version) + Ok(before_version != unsafe { &*graph }.version) } fn apply_rule( diff --git a/src/optimizer/rule/normalization/pushdown_limit.rs b/src/optimizer/rule/normalization/pushdown_limit.rs index a32efd61..fd6497a0 100644 --- a/src/optimizer/rule/normalization/pushdown_limit.rs +++ b/src/optimizer/rule/normalization/pushdown_limit.rs @@ -107,13 +107,15 @@ impl NormalizationRule for PushLimitIntoScan { fn apply(&self, node_id: HepNodeId, graph: &mut HepGraph) -> Result<(), DatabaseError> { if let Operator::Limit(limit_op) = graph.operator(node_id) { if let Some(child_index) = graph.eldest_child_at(node_id) { - if let Operator::TableScan(scan_op) = graph.operator(child_index) { - let mut new_scan_op = scan_op.clone(); - - new_scan_op.limit = (limit_op.offset, limit_op.limit); + let mut is_apply = false; + let limit = (limit_op.offset, limit_op.limit); + if let Operator::TableScan(scan_op) = graph.operator_mut(child_index) { + scan_op.limit = limit; + is_apply = true; + } + if is_apply { graph.remove_node(node_id, false); - graph.replace_node(child_index, Operator::TableScan(new_scan_op)); } } } diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index 6778d817..d2d1a51a 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -72,6 +72,7 @@ fn reduce_filters(filters: Vec, having: bool) -> Option Result<(), DatabaseError> { - if let Operator::Filter(mut filter_op) = graph.operator(node_id).clone() { + let mut is_optimized = false; + if let Operator::Filter(filter_op) = graph.operator_mut(node_id) { + if filter_op.is_optimized { + return Ok(()); + } filter_op.predicate.simplify()?; filter_op.predicate.constant_calculation()?; - - graph.replace_node(node_id, Operator::Filter(filter_op)) + filter_op.is_optimized = true; + is_optimized = true; + } + if is_optimized { + graph.version += 1; } Ok(()) diff --git a/src/planner/operator/copy_to_file.rs b/src/planner/operator/copy_to_file.rs index 268c9628..30a8c2b8 100644 --- a/src/planner/operator/copy_to_file.rs +++ b/src/planner/operator/copy_to_file.rs @@ -7,7 +7,6 @@ use std::fmt::Formatter; #[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] pub struct CopyToFileOperator { - pub table: String, pub target: ExtSource, pub schema_ref: SchemaRef, } @@ -19,13 +18,7 @@ impl fmt::Display for CopyToFileOperator { .iter() .map(|column| column.name().to_string()) .join(", "); - write!( - f, - "Copy {} -> {} [{}]", - self.table, - self.target.path.display(), - columns - )?; + write!(f, "Copy To {} [{}]", self.target.path.display(), columns)?; Ok(()) } diff --git a/src/planner/operator/filter.rs b/src/planner/operator/filter.rs index 69700a6c..c8a9fd2f 100644 --- a/src/planner/operator/filter.rs +++ b/src/planner/operator/filter.rs @@ -9,13 +9,18 @@ use super::Operator; #[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] pub struct FilterOperator { pub predicate: ScalarExpression, + pub is_optimized: bool, pub having: bool, } impl FilterOperator { pub fn build(predicate: ScalarExpression, children: LogicalPlan, having: bool) -> LogicalPlan { LogicalPlan::new( - Operator::Filter(FilterOperator { predicate, having }), + Operator::Filter(FilterOperator { + predicate, + is_optimized: false, + having, + }), Childrens::Only(children), ) } diff --git a/src/serdes/char.rs b/src/serdes/char.rs index ba6db4e2..f853d001 100644 --- a/src/serdes/char.rs +++ b/src/serdes/char.rs @@ -1,7 +1,6 @@ use crate::errors::DatabaseError; use crate::serdes::{ReferenceSerialization, ReferenceTables}; use crate::storage::{TableCache, Transaction}; -use encode_unicode::CharExt; use std::io::{Read, Write}; impl ReferenceSerialization for char { @@ -11,9 +10,10 @@ impl ReferenceSerialization for char { _: bool, _: &mut ReferenceTables, ) -> Result<(), DatabaseError> { - let (bytes, _) = self.to_utf8_array(); + let mut buf = [0u8; 2]; + self.encode_utf8(&mut buf); - Ok(writer.write_all(&bytes)?) + Ok(writer.write_all(&buf)?) } fn decode( @@ -21,10 +21,10 @@ impl ReferenceSerialization for char { _: Option<(&T, &TableCache)>, _: &ReferenceTables, ) -> Result { - let mut buf = [0u8; 4]; + let mut buf = [0u8; 2]; reader.read_exact(&mut buf)?; // SAFETY - Ok(char::from_utf8_array(buf).unwrap()) + Ok(std::str::from_utf8(&buf)?.chars().next().unwrap()) } } diff --git a/src/serdes/data_value.rs b/src/serdes/data_value.rs index 9fd7647c..2a023c2f 100644 --- a/src/serdes/data_value.rs +++ b/src/serdes/data_value.rs @@ -1,118 +1,7 @@ -use crate::errors::DatabaseError; -use crate::serdes::{ReferenceSerialization, ReferenceTables}; -use crate::storage::{TableCache, Transaction}; +use crate::implement_serialization_by_bincode; use crate::types::value::DataValue; -use crate::types::LogicalType; -use std::io::{Read, Write}; -impl DataValue { - // FIXME: redundant code - pub(crate) fn inner_encode( - &self, - writer: &mut W, - ty: &LogicalType, - ) -> Result<(), DatabaseError> { - writer.write_all(&[if self.is_null() { 0u8 } else { 1u8 }])?; - - if self.is_null() { - return Ok(()); - } - if let DataValue::Tuple(values) = self { - match values { - None => writer.write_all(&[0u8])?, - Some((values, is_upper)) => { - writer.write_all(&[1u8])?; - writer.write_all(&(values.len() as u32).to_le_bytes())?; - for value in values.iter() { - value.inner_encode(writer, &value.logical_type())? - } - writer.write_all(&[if *is_upper { 1u8 } else { 0u8 }])?; - } - } - - return Ok(()); - } - if ty.raw_len().is_none() { - let mut bytes = Vec::new(); - writer.write_all(&(self.to_raw(&mut bytes)? as u32).to_le_bytes())?; - writer.write_all(&bytes)?; - } else { - let _ = self.to_raw(writer)?; - } - - Ok(()) - } - - pub(crate) fn inner_decode( - reader: &mut R, - ty: &LogicalType, - ) -> Result { - let mut bytes = [0u8; 1]; - reader.read_exact(&mut bytes)?; - if bytes[0] == 0 { - return Ok(DataValue::none(ty)); - } - if let LogicalType::Tuple(types) = ty { - let mut bytes = [0u8; 1]; - reader.read_exact(&mut bytes)?; - let values = match bytes[0] { - 0 => None, - 1 => { - let mut bytes = [0u8; 4]; - reader.read_exact(&mut bytes)?; - let len = u32::from_le_bytes(bytes) as usize; - let mut vec = Vec::with_capacity(len); - - for ty in types.iter() { - vec.push(Self::inner_decode(reader, ty)?); - } - let mut bytes = [0u8]; - reader.read_exact(&mut bytes)?; - Some((vec, bytes[0] == 1)) - } - _ => unreachable!(), - }; - - return Ok(DataValue::Tuple(values)); - } - let value_len = match ty.raw_len() { - None => { - let mut bytes = [0u8; 4]; - reader.read_exact(&mut bytes)?; - u32::from_le_bytes(bytes) as usize - } - Some(len) => len, - }; - let mut buf = vec![0u8; value_len]; - reader.read_exact(&mut buf)?; - - Ok(DataValue::from_raw(&buf, ty)) - } -} - -impl ReferenceSerialization for DataValue { - fn encode( - &self, - writer: &mut W, - is_direct: bool, - reference_tables: &mut ReferenceTables, - ) -> Result<(), DatabaseError> { - let ty = self.logical_type(); - ty.encode(writer, is_direct, reference_tables)?; - - self.inner_encode(writer, &ty) - } - - fn decode( - reader: &mut R, - drive: Option<(&T, &TableCache)>, - reference_tables: &ReferenceTables, - ) -> Result { - let logical_type = LogicalType::decode(reader, drive, reference_tables)?; - - Self::inner_decode(reader, &logical_type) - } -} +implement_serialization_by_bincode!(DataValue); #[cfg(test)] pub(crate) mod test { diff --git a/src/serdes/mod.rs b/src/serdes/mod.rs index fa836161..48ec0c9c 100644 --- a/src/serdes/mod.rs +++ b/src/serdes/mod.rs @@ -33,37 +33,20 @@ macro_rules! implement_serialization_by_bincode { fn encode( &self, writer: &mut W, - is_direct: bool, - reference_tables: &mut $crate::serdes::ReferenceTables, + _: bool, + _: &mut $crate::serdes::ReferenceTables, ) -> Result<(), $crate::errors::DatabaseError> { - let bytes = bincode::serialize(self)?; - $crate::serdes::ReferenceSerialization::encode( - &bytes.len(), - writer, - is_direct, - reference_tables, - )?; - std::io::Write::write_all(writer, &bytes)?; + bincode::serialize_into(writer, self)?; Ok(()) } fn decode( reader: &mut R, - drive: Option<(&T, &$crate::storage::TableCache)>, - reference_tables: &$crate::serdes::ReferenceTables, + _: Option<(&T, &$crate::storage::TableCache)>, + _: &$crate::serdes::ReferenceTables, ) -> Result { - let mut buf = vec![ - 0u8; - ::decode( - reader, - drive, - reference_tables - )? - ]; - std::io::Read::read_exact(reader, &mut buf)?; - - Ok(bincode::deserialize::(&buf)?) + Ok(bincode::deserialize_from(reader)?) } } }; diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 6563c22a..c3c37164 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -9,13 +9,12 @@ use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; use crate::optimizer::core::statistics_meta::{StatisticMetaLoader, StatisticsMeta}; use crate::serdes::ReferenceTables; -use crate::storage::table_codec::TableCodec; +use crate::storage::table_codec::{BumpBytes, Bytes, TableCodec}; use crate::types::index::{Index, IndexId, IndexMetaRef, IndexType}; use crate::types::tuple::{Tuple, TupleId}; use crate::types::value::DataValue; use crate::types::{ColumnId, LogicalType}; use crate::utils::lru::SharedLruCache; -use bytes::Bytes; use itertools::Itertools; use std::collections::Bound; use std::io::Cursor; @@ -45,6 +44,8 @@ pub trait Transaction: Sized { where Self: 'a; + fn table_codec(&self) -> *const TableCodec; + /// The bounds is applied to the whole data batches, not per batch. /// /// The projections is column indices. @@ -64,8 +65,9 @@ pub trait Transaction: Sized { let pk_indices = table.primary_keys_indices(); let table_types = table.types(); if columns.is_empty() { - let (i, column) = &table.primary_keys()[0]; - columns.push((*i, column.clone())); + for (i, column) in table.primary_keys() { + columns.push((*i, column.clone())); + } } let mut tuple_columns = Vec::with_capacity(columns.len()); let mut projections = Vec::with_capacity(columns.len()); @@ -74,7 +76,7 @@ pub trait Transaction: Sized { projections.push(projection); } - let (min, max) = TableCodec::tuple_bound(&table_name); + let (min, max) = unsafe { &*self.table_codec() }.tuple_bound(&table_name); let iter = self.range(Bound::Included(min), Bound::Included(max))?; Ok(TupleIter { @@ -144,7 +146,8 @@ pub trait Transaction: Sized { ) -> Result { if let Some(mut table) = self.table(table_cache, table_name.clone())?.cloned() { let index_meta = table.add_index_meta(index_name, column_ids, ty)?; - let (key, value) = TableCodec::encode_index_meta(table_name, index_meta)?; + let (key, value) = + unsafe { &*self.table_codec() }.encode_index_meta(table_name, index_meta)?; self.set(key, value)?; table_cache.remove(table_name); @@ -163,11 +166,12 @@ pub trait Transaction: Sized { if matches!(index.ty, IndexType::PrimaryKey { .. }) { return Ok(()); } - let (key, value) = TableCodec::encode_index(table_name, &index, tuple_id)?; + let (key, value) = + unsafe { &*self.table_codec() }.encode_index(table_name, &index, tuple_id)?; if matches!(index.ty, IndexType::Unique) { if let Some(bytes) = self.get(&key)? { - return if bytes != value { + return if bytes != value.as_slice() { Err(DatabaseError::DuplicateUniqueValue) } else { Ok(()) @@ -188,7 +192,7 @@ pub trait Transaction: Sized { if matches!(index.ty, IndexType::PrimaryKey { .. }) { return Ok(()); } - self.remove(&TableCodec::encode_index_key( + self.remove(&unsafe { &*self.table_codec() }.encode_index_key( table_name, index, Some(tuple_id), @@ -204,7 +208,8 @@ pub trait Transaction: Sized { types: &[LogicalType], is_overwrite: bool, ) -> Result<(), DatabaseError> { - let (key, value) = TableCodec::encode_tuple(table_name, &mut tuple, types)?; + let (key, value) = + unsafe { &*self.table_codec() }.encode_tuple(table_name, &mut tuple, types)?; if !is_overwrite && self.get(&key)?.is_some() { return Err(DatabaseError::DuplicatePrimaryKey); @@ -215,7 +220,7 @@ pub trait Transaction: Sized { } fn remove_tuple(&mut self, table_name: &str, tuple_id: &TupleId) -> Result<(), DatabaseError> { - let key = TableCodec::encode_tuple_key(table_name, tuple_id)?; + let key = unsafe { &*self.table_codec() }.encode_tuple_key(table_name, tuple_id)?; self.remove(&key)?; Ok(()) @@ -251,12 +256,14 @@ pub trait Transaction: Sized { vec![col_id], IndexType::Unique, )?; - let (key, value) = TableCodec::encode_index_meta(table_name, meta_ref)?; + let (key, value) = + unsafe { &*self.table_codec() }.encode_index_meta(table_name, meta_ref)?; self.set(key, value)?; } let column = table.get_column_by_id(&col_id).unwrap(); - let (key, value) = TableCodec::encode_column(column, &mut ReferenceTables::new())?; + let (key, value) = unsafe { &*self.table_codec() } + .encode_column(column, &mut ReferenceTables::new())?; self.set(key, value)?; table_cache.remove(table_name); @@ -276,17 +283,20 @@ pub trait Transaction: Sized { if let Some(table_catalog) = self.table(table_cache, table_name.clone())?.cloned() { let column = table_catalog.get_column_by_name(column_name).unwrap(); - let (key, _) = TableCodec::encode_column(column, &mut ReferenceTables::new())?; + let (key, _) = unsafe { &*self.table_codec() } + .encode_column(column, &mut ReferenceTables::new())?; self.remove(&key)?; for index_meta in table_catalog.indexes.iter() { if !index_meta.column_ids.contains(&column.id().unwrap()) { continue; } - let (index_meta_key, _) = TableCodec::encode_index_meta(table_name, index_meta)?; + let (index_meta_key, _) = + unsafe { &*self.table_codec() }.encode_index_meta(table_name, index_meta)?; self.remove(&index_meta_key)?; - let (index_min, index_max) = TableCodec::index_bound(table_name, &index_meta.id)?; + let (index_min, index_max) = + unsafe { &*self.table_codec() }.index_bound(table_name, &index_meta.id)?; self._drop_data(index_min, index_max)?; self.remove_table_meta(meta_cache, table_name, index_meta.id)?; @@ -305,11 +315,15 @@ pub trait Transaction: Sized { view: View, or_replace: bool, ) -> Result<(), DatabaseError> { - let (view_key, value) = TableCodec::encode_view(&view)?; + let (view_key, value) = unsafe { &*self.table_codec() }.encode_view(&view)?; - if !or_replace && self.get(&view_key)?.is_some() { + let already_exists = self.get(&view_key)?.is_some(); + if !or_replace && already_exists { return Err(DatabaseError::ViewExists); } + if !already_exists { + self.check_name_hash(&view.name)?; + } self.set(view_key, value)?; let _ = view_cache.put(view.name.clone(), view); @@ -329,20 +343,22 @@ pub trait Transaction: Sized { TableCodec::check_primary_key_type(column.datatype())?; } - let (table_key, value) = - TableCodec::encode_root_table(&TableMeta::empty(table_name.clone()))?; + let (table_key, value) = unsafe { &*self.table_codec() } + .encode_root_table(&TableMeta::empty(table_name.clone()))?; if self.get(&table_key)?.is_some() { if if_not_exists { return Ok(table_name); } return Err(DatabaseError::TableExists); } + self.check_name_hash(&table_name)?; self.create_index_meta_from_column(&mut table_catalog)?; self.set(table_key, value)?; let mut reference_tables = ReferenceTables::new(); for column in table_catalog.columns() { - let (key, value) = TableCodec::encode_column(column, &mut reference_tables)?; + let (key, value) = + unsafe { &*self.table_codec() }.encode_column(column, &mut reference_tables)?; self.set(key, value)?; } debug_assert_eq!(reference_tables.len(), 1); @@ -351,6 +367,18 @@ pub trait Transaction: Sized { Ok(table_name) } + fn check_name_hash(&mut self, table_name: &TableName) -> Result<(), DatabaseError> { + let (hash_key, value) = unsafe { &*self.table_codec() }.encode_table_hash(table_name); + if self.get(&hash_key)?.is_some() { + return Err(DatabaseError::DuplicateSourceHash(table_name.to_string())); + } + self.set(hash_key, value) + } + + fn drop_name_hash(&mut self, table_name: &TableName) -> Result<(), DatabaseError> { + self.remove(&unsafe { &*self.table_codec() }.encode_table_hash_key(table_name)) + } + fn drop_view( &mut self, view_cache: &ViewCache, @@ -358,6 +386,7 @@ pub trait Transaction: Sized { view_name: TableName, if_exists: bool, ) -> Result<(), DatabaseError> { + self.drop_name_hash(&view_name)?; if self .view(table_cache, view_cache, view_name.clone())? .is_none() @@ -369,7 +398,7 @@ pub trait Transaction: Sized { } } - self.remove(&TableCodec::encode_view_key(view_name.as_str()))?; + self.remove(&unsafe { &*self.table_codec() }.encode_view_key(view_name.as_str()))?; view_cache.remove(&view_name); Ok(()) @@ -381,6 +410,7 @@ pub trait Transaction: Sized { table_name: TableName, if_exists: bool, ) -> Result<(), DatabaseError> { + self.drop_name_hash(&table_name)?; if self.table(table_cache, table_name.clone())?.is_none() { if if_exists { return Ok(()); @@ -390,26 +420,29 @@ pub trait Transaction: Sized { } self.drop_data(table_name.as_str())?; - let (column_min, column_max) = TableCodec::columns_bound(table_name.as_str()); + let (column_min, column_max) = + unsafe { &*self.table_codec() }.columns_bound(table_name.as_str()); self._drop_data(column_min, column_max)?; - let (index_meta_min, index_meta_max) = TableCodec::index_meta_bound(table_name.as_str()); + let (index_meta_min, index_meta_max) = + unsafe { &*self.table_codec() }.index_meta_bound(table_name.as_str()); self._drop_data(index_meta_min, index_meta_max)?; - self.remove(&TableCodec::encode_root_table_key(table_name.as_str()))?; + self.remove(&unsafe { &*self.table_codec() }.encode_root_table_key(table_name.as_str()))?; table_cache.remove(&table_name); Ok(()) } fn drop_data(&mut self, table_name: &str) -> Result<(), DatabaseError> { - let (tuple_min, tuple_max) = TableCodec::tuple_bound(table_name); + let (tuple_min, tuple_max) = unsafe { &*self.table_codec() }.tuple_bound(table_name); self._drop_data(tuple_min, tuple_max)?; - let (index_min, index_max) = TableCodec::all_index_bound(table_name); + let (index_min, index_max) = unsafe { &*self.table_codec() }.all_index_bound(table_name); self._drop_data(index_min, index_max)?; - let (statistics_min, statistics_max) = TableCodec::statistics_bound(table_name); + let (statistics_min, statistics_max) = + unsafe { &*self.table_codec() }.statistics_bound(table_name); self._drop_data(statistics_min, statistics_max)?; Ok(()) @@ -424,7 +457,8 @@ pub trait Transaction: Sized { if let Some(view) = view_cache.get(&view_name) { return Ok(Some(view)); } - let Some(bytes) = self.get(&TableCodec::encode_view_key(&view_name))? else { + let Some(bytes) = self.get(&unsafe { &*self.table_codec() }.encode_view_key(&view_name))? + else { return Ok(None); }; Ok(Some(view_cache.get_or_insert(view_name.clone(), |_| { @@ -453,7 +487,7 @@ pub trait Transaction: Sized { fn table_metas(&self) -> Result, DatabaseError> { let mut metas = vec![]; - let (min, max) = TableCodec::root_table_bound(); + let (min, max) = unsafe { &*self.table_codec() }.root_table_bound(); let mut iter = self.range(Bound::Included(min), Bound::Included(max))?; while let Some((_, value)) = iter.try_next().ok().flatten() { @@ -475,7 +509,11 @@ pub trait Transaction: Sized { let index_id = statistics_meta.index_id(); meta_cache.put((table_name.clone(), index_id), statistics_meta); - let (key, value) = TableCodec::encode_statistics_path(table_name.as_str(), index_id, path); + let (key, value) = unsafe { &*self.table_codec() }.encode_statistics_path( + table_name.as_str(), + index_id, + path, + ); self.set(key, value)?; Ok(()) @@ -486,7 +524,7 @@ pub trait Transaction: Sized { table_name: &str, index_id: IndexId, ) -> Result, DatabaseError> { - let key = TableCodec::encode_statistics_path_key(table_name, index_id); + let key = unsafe { &*self.table_codec() }.encode_statistics_path_key(table_name, index_id); self.get(&key)? .map(|bytes| TableCodec::decode_statistics_path(&bytes)) .transpose() @@ -498,7 +536,7 @@ pub trait Transaction: Sized { table_name: &TableName, index_id: IndexId, ) -> Result<(), DatabaseError> { - let key = TableCodec::encode_statistics_path_key(table_name, index_id); + let key = unsafe { &*self.table_codec() }.encode_statistics_path_key(table_name, index_id); self.remove(&key)?; meta_cache.remove(&(table_name.clone(), index_id)); @@ -521,7 +559,7 @@ pub trait Transaction: Sized { &self, table_name: &TableName, ) -> Result, Vec)>, DatabaseError> { - let (table_min, table_max) = TableCodec::table_bound(table_name); + let (table_min, table_max) = unsafe { &*self.table_codec() }.table_bound(table_name); let mut column_iter = self.range( Bound::Included(table_min.clone()), Bound::Included(table_max), @@ -535,7 +573,7 @@ pub trait Transaction: Sized { // Tips: only `Column`, `IndexMeta`, `TableMeta` while let Some((key, value)) = column_iter.try_next().ok().flatten() { if key.starts_with(&table_min) { - let mut cursor = Cursor::new(value.as_ref()); + let mut cursor = Cursor::new(value); columns.push(TableCodec::decode_column::( &mut cursor, &reference_tables, @@ -548,7 +586,7 @@ pub trait Transaction: Sized { Ok((!columns.is_empty()).then_some((columns, index_metas))) } - fn _drop_data(&mut self, min: Vec, max: Vec) -> Result<(), DatabaseError> { + fn _drop_data(&mut self, min: BumpBytes, max: BumpBytes) -> Result<(), DatabaseError> { let mut iter = self.range(Bound::Included(min), Bound::Included(max))?; let mut data_keys = vec![]; @@ -571,9 +609,9 @@ pub trait Transaction: Sized { let table_name = table.name.clone(); let mut primary_keys = Vec::new(); - // FIXME: no clone - for col in table.columns().cloned().collect_vec() { - let col_id = col.id().unwrap(); + let schema_ref = table.schema_ref().clone(); + for col in schema_ref.iter() { + let col_id = col.id().ok_or(DatabaseError::PrimaryKeyNotFound)?; let index_ty = if let Some(i) = col.desc().primary() { primary_keys.push((i, col_id)); continue; @@ -584,7 +622,8 @@ pub trait Transaction: Sized { }; let meta_ref = table.add_index_meta(format!("uk_{}_index", col.name()), vec![col_id], index_ty)?; - let (key, value) = TableCodec::encode_index_meta(&table_name, meta_ref)?; + let (key, value) = + unsafe { &*self.table_codec() }.encode_index_meta(&table_name, meta_ref)?; self.set(key, value)?; } let primary_keys = table @@ -596,7 +635,8 @@ pub trait Transaction: Sized { is_multiple: primary_keys.len() != 1, }; let meta_ref = table.add_index_meta("pk_index".to_string(), primary_keys, pk_index_ty)?; - let (key, value) = TableCodec::encode_index_meta(&table_name, meta_ref)?; + let (key, value) = + unsafe { &*self.table_codec() }.encode_index_meta(&table_name, meta_ref)?; self.set(key, value)?; Ok(()) @@ -604,20 +644,20 @@ pub trait Transaction: Sized { fn get(&self, key: &[u8]) -> Result, DatabaseError>; - fn set(&mut self, key: Bytes, value: Bytes) -> Result<(), DatabaseError>; + fn set(&mut self, key: BumpBytes, value: BumpBytes) -> Result<(), DatabaseError>; fn remove(&mut self, key: &[u8]) -> Result<(), DatabaseError>; - fn range( - &self, - min: Bound>, - max: Bound>, - ) -> Result, DatabaseError>; + fn range<'a>( + &'a self, + min: Bound>, + max: Bound>, + ) -> Result, DatabaseError>; fn commit(self) -> Result<(), DatabaseError>; } -trait IndexImpl { +trait IndexImpl<'bytes, T: Transaction + 'bytes> { fn index_lookup( &self, bytes: &Bytes, @@ -636,7 +676,7 @@ trait IndexImpl { &self, params: &IndexImplParams, value: &DataValue, - ) -> Result, DatabaseError>; + ) -> Result, DatabaseError>; } enum IndexImplEnum { @@ -673,10 +713,16 @@ struct IndexImplParams<'a, T: Transaction> { } impl IndexImplParams<'_, T> { + #[inline] pub(crate) fn value_ty(&self) -> &LogicalType { &self.index_meta.value_ty } + #[inline] + pub(crate) fn table_codec(&self) -> *const TableCodec { + self.tx.table_codec() + } + pub(crate) fn try_cast(&self, mut val: DataValue) -> Result { let value_ty = self.value_ty(); @@ -691,17 +737,20 @@ impl IndexImplParams<'_, T> { pk_indices: &PrimaryKeyIndices, tuple_id: &TupleId, ) -> Result, DatabaseError> { - let key = TableCodec::encode_tuple_key(self.table_name, tuple_id)?; + let key = unsafe { &*self.table_codec() }.encode_tuple_key(self.table_name, tuple_id)?; - Ok(self.tx.get(&key)?.map(|bytes| { - TableCodec::decode_tuple( - &self.table_types, - pk_indices, - &self.projections, - &self.tuple_schema_ref, - &bytes, - ) - })) + self.tx + .get(&key)? + .map(|bytes| { + TableCodec::decode_tuple( + &self.table_types, + pk_indices, + &self.projections, + &self.tuple_schema_ref, + &bytes, + ) + }) + .transpose() } } @@ -710,7 +759,7 @@ enum IndexResult<'a, T: Transaction + 'a> { Scope(T::IterType<'a>), } -impl IndexImpl for IndexImplEnum { +impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for IndexImplEnum { fn index_lookup( &self, bytes: &Bytes, @@ -743,7 +792,7 @@ impl IndexImpl for IndexImplEnum { &self, params: &IndexImplParams, value: &DataValue, - ) -> Result, DatabaseError> { + ) -> Result, DatabaseError> { match self { IndexImplEnum::PrimaryKey(inner) => inner.bound_key(params, value), IndexImplEnum::Unique(inner) => inner.bound_key(params, value), @@ -753,20 +802,20 @@ impl IndexImpl for IndexImplEnum { } } -impl IndexImpl for PrimaryKeyIndexImpl { +impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for PrimaryKeyIndexImpl { fn index_lookup( &self, bytes: &Bytes, pk_indices: &PrimaryKeyIndices, params: &IndexImplParams, ) -> Result { - Ok(TableCodec::decode_tuple( + TableCodec::decode_tuple( ¶ms.table_types, pk_indices, ¶ms.projections, ¶ms.tuple_schema_ref, bytes, - )) + ) } fn eq_to_res<'a>( @@ -777,7 +826,7 @@ impl IndexImpl for PrimaryKeyIndexImpl { ) -> Result, DatabaseError> { let tuple = params .tx - .get(&TableCodec::encode_tuple_key(params.table_name, value)?)? + .get(&unsafe { &*params.table_codec() }.encode_tuple_key(params.table_name, value)?)? .map(|bytes| { TableCodec::decode_tuple( ¶ms.table_types, @@ -786,16 +835,17 @@ impl IndexImpl for PrimaryKeyIndexImpl { ¶ms.tuple_schema_ref, &bytes, ) - }); + }) + .transpose()?; Ok(IndexResult::Tuple(tuple)) } fn bound_key( &self, params: &IndexImplParams, - val: &DataValue, - ) -> Result, DatabaseError> { - TableCodec::encode_tuple_key(params.table_name, val) + value: &DataValue, + ) -> Result, DatabaseError> { + unsafe { &*params.table_codec() }.encode_tuple_key(params.table_name, value) } } @@ -804,13 +854,13 @@ fn secondary_index_lookup( pk_indices: &PrimaryKeyIndices, params: &IndexImplParams, ) -> Result { - let tuple_id = TableCodec::decode_index(bytes, ¶ms.index_meta.pk_ty)?; + let tuple_id = TableCodec::decode_index(bytes)?; params .get_tuple_by_id(pk_indices, &tuple_id)? .ok_or(DatabaseError::TupleIdNotFound(tuple_id)) } -impl IndexImpl for UniqueIndexImpl { +impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for UniqueIndexImpl { fn index_lookup( &self, bytes: &Bytes, @@ -829,7 +879,7 @@ impl IndexImpl for UniqueIndexImpl { let Some(bytes) = params.tx.get(&self.bound_key(params, value)?)? else { return Ok(IndexResult::Tuple(None)); }; - let tuple_id = TableCodec::decode_index(&bytes, ¶ms.index_meta.pk_ty)?; + let tuple_id = TableCodec::decode_index(&bytes)?; let tuple = params .get_tuple_by_id(pk_indices, &tuple_id)? .ok_or(DatabaseError::TupleIdNotFound(tuple_id))?; @@ -840,14 +890,14 @@ impl IndexImpl for UniqueIndexImpl { &self, params: &IndexImplParams, value: &DataValue, - ) -> Result, DatabaseError> { + ) -> Result, DatabaseError> { let index = Index::new(params.index_meta.id, value, IndexType::Unique); - TableCodec::encode_index_key(params.table_name, &index, None) + unsafe { &*params.table_codec() }.encode_index_key(params.table_name, &index, None) } } -impl IndexImpl for NormalIndexImpl { +impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for NormalIndexImpl { fn index_lookup( &self, bytes: &Bytes, @@ -876,14 +926,14 @@ impl IndexImpl for NormalIndexImpl { &self, params: &IndexImplParams, value: &DataValue, - ) -> Result, DatabaseError> { + ) -> Result, DatabaseError> { let index = Index::new(params.index_meta.id, value, IndexType::Normal); - TableCodec::encode_index_bound_key(params.table_name, &index) + unsafe { &*params.table_codec() }.encode_index_bound_key(params.table_name, &index) } } -impl IndexImpl for CompositeIndexImpl { +impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for CompositeIndexImpl { fn index_lookup( &self, bytes: &Bytes, @@ -912,10 +962,10 @@ impl IndexImpl for CompositeIndexImpl { &self, params: &IndexImplParams, value: &DataValue, - ) -> Result, DatabaseError> { + ) -> Result, DatabaseError> { let index = Index::new(params.index_meta.id, value, IndexType::Composite); - TableCodec::encode_index_bound_key(params.table_name, &index) + unsafe { &*params.table_codec() }.encode_index_bound_key(params.table_name, &index) } } @@ -950,7 +1000,7 @@ impl<'a, T: Transaction + 'a> Iter for TupleIter<'a, T> { &self.projections, &self.tuple_columns, &value, - ); + )?; if let Some(num) = self.limit.as_mut() { num.sub_assign(1); @@ -1002,6 +1052,11 @@ impl<'a, T: Transaction + 'a> IndexIter<'a, T> { /// expression -> index value -> tuple impl Iter for IndexIter<'_, T> { fn next_tuple(&mut self) -> Result, DatabaseError> { + fn check_bound<'a>(value: &mut Bound>, bound: BumpBytes<'a>) { + if matches!(value, Bound::Unbounded) { + let _ = mem::replace(value, Bound::Included(bound)); + } + } if matches!(self.limit, Some(0)) { self.state = IndexIterState::Over; @@ -1041,16 +1096,11 @@ impl Iter for IndexIter<'_, T> { }; let (bound_min, bound_max) = if matches!(index_meta.ty, IndexType::PrimaryKey { .. }) { - TableCodec::tuple_bound(table_name) + unsafe { &*self.params.table_codec() }.tuple_bound(table_name) } else { - TableCodec::index_bound(table_name, &index_meta.id)? + unsafe { &*self.params.table_codec() } + .index_bound(table_name, &index_meta.id)? }; - let check_bound = |value: &mut Bound>, bound: Vec| { - if matches!(value, Bound::Unbounded) { - let _ = mem::replace(value, Bound::Included(bound)); - } - }; - let mut encode_min = bound_encode(min)?; check_bound(&mut encode_min, bound_min); @@ -1291,6 +1341,7 @@ mod test { #[test] fn test_tuple_append_delete() -> Result<(), DatabaseError> { + let table_codec = TableCodec::default(); let temp_dir = TempDir::new().expect("unable to create temporary working directory"); let storage = RocksStorage::new(temp_dir.path())?; let mut transaction = storage.transaction()?; @@ -1323,7 +1374,7 @@ mod test { assert_eq!(tuple_iter.next_tuple()?.unwrap(), tuples[1]); assert_eq!(tuple_iter.next_tuple()?.unwrap(), tuples[2]); - let (min, max) = TableCodec::tuple_bound("t1"); + let (min, max) = table_codec.tuple_bound("t1"); let mut iter = transaction.range(Bound::Included(min), Bound::Included(max))?; let (_, value) = iter.try_next()?.unwrap(); @@ -1347,7 +1398,7 @@ mod test { assert_eq!(tuple_iter.next_tuple()?.unwrap(), tuples[0]); assert_eq!(tuple_iter.next_tuple()?.unwrap(), tuples[2]); - let (min, max) = TableCodec::tuple_bound("t1"); + let (min, max) = table_codec.tuple_bound("t1"); let mut iter = transaction.range(Bound::Included(min), Bound::Included(max))?; let (_, value) = iter.try_next()?.unwrap(); @@ -1362,6 +1413,7 @@ mod test { #[test] fn test_add_index_meta() -> Result<(), DatabaseError> { + let table_codec = TableCodec::default(); let temp_dir = TempDir::new().expect("unable to create temporary working directory"); let storage = RocksStorage::new(temp_dir.path())?; let mut transaction = storage.transaction()?; @@ -1425,7 +1477,7 @@ mod test { &Arc::new(SharedLruCache::new(4, 1, RandomState::new())?), )?; { - let (min, max) = TableCodec::index_meta_bound("t1"); + let (min, max) = table_codec.index_meta_bound("t1"); let mut iter = transaction.range(Bound::Included(min), Bound::Included(max))?; let (_, value) = iter.try_next()?.unwrap(); @@ -1468,6 +1520,7 @@ mod test { ) } + let table_codec = TableCodec::default(); let temp_dir = TempDir::new().expect("unable to create temporary working directory"); let storage = RocksStorage::new(temp_dir.path())?; let mut transaction = storage.transaction()?; @@ -1524,7 +1577,7 @@ mod test { assert_eq!(index_iter.next_tuple()?.unwrap(), tuples[2]); assert_eq!(index_iter.next_tuple()?.unwrap(), tuples[1]); - let (min, max) = TableCodec::index_bound("t1", &1)?; + let (min, max) = table_codec.index_bound("t1", &1)?; let mut iter = transaction.range(Bound::Included(min), Bound::Included(max))?; let (_, value) = iter.try_next()?.unwrap(); @@ -1542,7 +1595,7 @@ mod test { assert_eq!(index_iter.next_tuple()?.unwrap(), tuples[2]); assert_eq!(index_iter.next_tuple()?.unwrap(), tuples[1]); - let (min, max) = TableCodec::index_bound("t1", &1)?; + let (min, max) = table_codec.index_bound("t1", &1)?; let mut iter = transaction.range(Bound::Included(min), Bound::Included(max))?; let (_, value) = iter.try_next()?.unwrap(); diff --git a/src/storage/rocksdb.rs b/src/storage/rocksdb.rs index c9e0d0b2..8e16bd17 100644 --- a/src/storage/rocksdb.rs +++ b/src/storage/rocksdb.rs @@ -1,7 +1,9 @@ use crate::errors::DatabaseError; +use crate::storage::table_codec::{BumpBytes, Bytes, TableCodec}; use crate::storage::{InnerIter, Storage, Transaction}; -use bytes::Bytes; -use rocksdb::{DBIteratorWithThreadMode, Direction, IteratorMode, OptimisticTransactionDB}; +use rocksdb::{ + DBIteratorWithThreadMode, Direction, IteratorMode, OptimisticTransactionDB, SliceTransform, +}; use std::collections::Bound; use std::path::PathBuf; use std::sync::Arc; @@ -15,10 +17,12 @@ impl RocksStorage { pub fn new(path: impl Into + Send) -> Result { let mut bb = rocksdb::BlockBasedOptions::default(); bb.set_block_cache(&rocksdb::Cache::new_lru_cache(40 * 1_024 * 1_024)); + bb.set_whole_key_filtering(false); let mut opts = rocksdb::Options::default(); opts.set_block_based_table_factory(&bb); opts.create_if_missing(true); + opts.set_prefix_extractor(SliceTransform::create_fixed_prefix(4)); let storage = OptimisticTransactionDB::open(&opts, path.into())?; @@ -37,12 +41,14 @@ impl Storage for RocksStorage { fn transaction(&self) -> Result, DatabaseError> { Ok(RocksTransaction { tx: self.inner.transaction(), + table_codec: Default::default(), }) } } pub struct RocksTransaction<'db> { tx: rocksdb::Transaction<'db, OptimisticTransactionDB>, + table_codec: TableCodec, } impl<'txn> Transaction for RocksTransaction<'txn> { @@ -51,16 +57,24 @@ impl<'txn> Transaction for RocksTransaction<'txn> { where Self: 'iter; + #[inline] + fn table_codec(&self) -> *const TableCodec { + &self.table_codec + } + + #[inline] fn get(&self, key: &[u8]) -> Result, DatabaseError> { - Ok(self.tx.get(key)?.map(Bytes::from)) + Ok(self.tx.get(key)?) } - fn set(&mut self, key: Bytes, value: Bytes) -> Result<(), DatabaseError> { + #[inline] + fn set(&mut self, key: BumpBytes, value: BumpBytes) -> Result<(), DatabaseError> { self.tx.put(key, value)?; Ok(()) } + #[inline] fn remove(&mut self, key: &[u8]) -> Result<(), DatabaseError> { self.tx.delete(key)?; @@ -68,28 +82,44 @@ impl<'txn> Transaction for RocksTransaction<'txn> { } // Tips: rocksdb has weak support for `Include` and `Exclude`, so precision will be lost - fn range( - &self, - min: Bound>, - max: Bound>, - ) -> Result, DatabaseError> { - fn bound_to_include(bound: Bound<&[u8]>) -> Option<&[u8]> { - match bound { - Bound::Included(bytes) | Bound::Excluded(bytes) => Some(bytes), - Bound::Unbounded => None, + #[inline] + fn range<'a>( + &'a self, + min: Bound>, + max: Bound>, + ) -> Result, DatabaseError> { + let min = match min { + Bound::Included(bytes) => Some(bytes), + Bound::Excluded(mut bytes) => { + // the prefix is the same, but the length is larger + bytes.push(0u8); + Some(bytes) } - } - - let lower = bound_to_include(min.as_ref().map(Vec::as_slice)) + Bound::Unbounded => None, + }; + let lower = min + .as_ref() .map(|bytes| IteratorMode::From(bytes, Direction::Forward)) .unwrap_or(IteratorMode::Start); + + if let (Some(min_bytes), Bound::Included(max_bytes) | Bound::Excluded(max_bytes)) = + (&min, &max) + { + let len = min_bytes + .iter() + .zip(max_bytes.iter()) + .take_while(|(x, y)| x == y) + .count(); + + debug_assert!(len > 0); + let mut iter = self.tx.prefix_iterator(&min_bytes[..len]); + iter.set_mode(lower); + + return Ok(RocksIter { upper: max, iter }); + } let iter = self.tx.iterator(lower); - Ok(RocksIter { - lower: min, - upper: max, - iter, - }) + Ok(RocksIter { upper: max, iter }) } fn commit(self) -> Result<(), DatabaseError> { @@ -99,14 +129,14 @@ impl<'txn> Transaction for RocksTransaction<'txn> { } pub struct RocksIter<'txn, 'iter> { - lower: Bound>, - upper: Bound>, + upper: Bound>, iter: DBIteratorWithThreadMode<'iter, rocksdb::Transaction<'txn, OptimisticTransactionDB>>, } impl InnerIter for RocksIter<'_, '_> { + #[inline] fn try_next(&mut self) -> Result, DatabaseError> { - for result in self.iter.by_ref() { + if let Some(result) = self.iter.by_ref().next() { let (key, value) = result?; let upper_bound_check = match &self.upper { Bound::Included(ref upper) => { @@ -116,16 +146,9 @@ impl InnerIter for RocksIter<'_, '_> { Bound::Unbounded => true, }; if !upper_bound_check { - break; - } - let lower_bound_check = match &self.lower { - Bound::Included(ref lower) => key.as_ref() >= lower.as_slice(), - Bound::Excluded(ref lower) => key.as_ref() > lower.as_slice(), - Bound::Unbounded => true, - }; - if lower_bound_check { - return Ok(Some((Bytes::from(key), Bytes::from(value)))); + return Ok(None); } + return Ok(Some((Vec::from(key), Vec::from(value)))); } Ok(None) } diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index 1333d5ea..aa517a40 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -7,7 +7,9 @@ use crate::types::index::{Index, IndexId, IndexMeta, IndexType}; use crate::types::tuple::{Schema, Tuple, TupleId}; use crate::types::value::DataValue; use crate::types::LogicalType; -use bytes::Bytes; +use bumpalo::Bump; +use siphasher::sip::SipHasher; +use std::hash::{Hash, Hasher}; use std::io::{Cursor, Read, Seek, SeekFrom, Write}; use std::sync::LazyLock; @@ -16,10 +18,16 @@ pub(crate) const BOUND_MAX_TAG: u8 = u8::MAX; static ROOT_BYTES: LazyLock> = LazyLock::new(|| b"Root".to_vec()); static VIEW_BYTES: LazyLock> = LazyLock::new(|| b"View".to_vec()); +static HASH_BYTES: LazyLock> = LazyLock::new(|| b"Hash".to_vec()); static EMPTY_REFERENCE_TABLES: LazyLock = LazyLock::new(ReferenceTables::new); -#[derive(Clone)] -pub struct TableCodec {} +pub type Bytes = Vec; +pub type BumpBytes<'bump> = bumpalo::collections::Vec<'bump, u8>; + +#[derive(Default)] +pub struct TableCodec { + arena: Bump, +} #[derive(Copy, Clone)] enum CodecType { @@ -30,9 +38,16 @@ enum CodecType { View, Tuple, Root, + Hash, } impl TableCodec { + fn hash_bytes(table_name: &str) -> [u8; 8] { + let mut hasher = SipHasher::new(); + table_name.hash(&mut hasher); + hasher.finish().to_le_bytes() + } + pub fn check_primary_key(value: &DataValue, indentation: usize) -> Result<(), DatabaseError> { if indentation > 1 { return Err(DatabaseError::PrimaryKeyTooManyLayers); @@ -75,9 +90,12 @@ impl TableCodec { /// TableName + Type /// - /// Tips: Root & View full key = key_prefix - fn key_prefix(ty: CodecType, name: &str) -> Vec { - let mut table_bytes = name.to_string().into_bytes(); + /// Tips: + /// 1. Root & View & Hash full key = key_prefix + /// 2. hash table name makes it 4 as a fixed length, and [prefix_extractor](https://github.com/facebook/rocksdb/wiki/Prefix-Seek#defining-a-prefix) can be enabled in rocksdb + fn key_prefix(&self, ty: CodecType, name: &str) -> BumpBytes { + let mut table_bytes = BumpBytes::new_in(&self.arena); + table_bytes.extend_from_slice(Self::hash_bytes(name).as_slice()); match ty { CodecType::Column => { @@ -96,16 +114,30 @@ impl TableCodec { table_bytes.push(b'8'); } CodecType::Root => { - let mut bytes = ROOT_BYTES.clone(); + let mut bytes = BumpBytes::new_in(&self.arena); + + bytes.extend_from_slice(&ROOT_BYTES); bytes.push(BOUND_MIN_TAG); - bytes.append(&mut table_bytes); + bytes.extend_from_slice(&table_bytes); return bytes; } CodecType::View => { - let mut bytes = VIEW_BYTES.clone(); + let mut bytes = BumpBytes::new_in(&self.arena); + + bytes.extend_from_slice(&VIEW_BYTES); + bytes.push(BOUND_MIN_TAG); + bytes.extend_from_slice(&table_bytes); + + return bytes; + } + CodecType::Hash => { + let mut bytes = BumpBytes::new_in(&self.arena); + + bytes.extend_from_slice(&HASH_BYTES); bytes.push(BOUND_MIN_TAG); bytes.append(&mut table_bytes); + bytes.extend_from_slice(&table_bytes); return bytes; } @@ -114,9 +146,9 @@ impl TableCodec { table_bytes } - pub fn tuple_bound(table_name: &str) -> (Vec, Vec) { + pub fn tuple_bound(&self, table_name: &str) -> (BumpBytes, BumpBytes) { let op = |bound_id| { - let mut key_prefix = Self::key_prefix(CodecType::Tuple, table_name); + let mut key_prefix = self.key_prefix(CodecType::Tuple, table_name); key_prefix.push(bound_id); key_prefix @@ -125,9 +157,9 @@ impl TableCodec { (op(BOUND_MIN_TAG), op(BOUND_MAX_TAG)) } - pub fn index_meta_bound(table_name: &str) -> (Vec, Vec) { + pub fn index_meta_bound(&self, table_name: &str) -> (BumpBytes, BumpBytes) { let op = |bound_id| { - let mut key_prefix = Self::key_prefix(CodecType::IndexMeta, table_name); + let mut key_prefix = self.key_prefix(CodecType::IndexMeta, table_name); key_prefix.push(bound_id); key_prefix @@ -137,25 +169,25 @@ impl TableCodec { } pub fn index_bound( + &self, table_name: &str, index_id: &IndexId, - ) -> Result<(Vec, Vec), DatabaseError> { - let op = |bound_id| -> Result, DatabaseError> { - let mut key_prefix = Cursor::new(Self::key_prefix(CodecType::Index, table_name)); - key_prefix.seek(SeekFrom::End(0))?; + ) -> Result<(BumpBytes, BumpBytes), DatabaseError> { + let op = |bound_id| -> Result { + let mut key_prefix = self.key_prefix(CodecType::Index, table_name); key_prefix.write_all(&[BOUND_MIN_TAG])?; key_prefix.write_all(&index_id.to_be_bytes()[..])?; key_prefix.write_all(&[bound_id])?; - Ok(key_prefix.into_inner()) + Ok(key_prefix) }; Ok((op(BOUND_MIN_TAG)?, op(BOUND_MAX_TAG)?)) } - pub fn all_index_bound(table_name: &str) -> (Vec, Vec) { + pub fn all_index_bound(&self, table_name: &str) -> (BumpBytes, BumpBytes) { let op = |bound_id| { - let mut key_prefix = Self::key_prefix(CodecType::Index, table_name); + let mut key_prefix = self.key_prefix(CodecType::Index, table_name); key_prefix.push(bound_id); key_prefix @@ -164,10 +196,11 @@ impl TableCodec { (op(BOUND_MIN_TAG), op(BOUND_MAX_TAG)) } - pub fn root_table_bound() -> (Vec, Vec) { + pub fn root_table_bound(&self) -> (BumpBytes, BumpBytes) { let op = |bound_id| { - let mut key_prefix = ROOT_BYTES.clone(); + let mut key_prefix = BumpBytes::new_in(&self.arena); + key_prefix.extend_from_slice(&ROOT_BYTES); key_prefix.push(bound_id); key_prefix }; @@ -175,19 +208,19 @@ impl TableCodec { (op(BOUND_MIN_TAG), op(BOUND_MAX_TAG)) } - pub fn table_bound(table_name: &str) -> (Vec, Vec) { - let mut column_prefix = Self::key_prefix(CodecType::Column, table_name); + pub fn table_bound(&self, table_name: &str) -> (BumpBytes, BumpBytes) { + let mut column_prefix = self.key_prefix(CodecType::Column, table_name); column_prefix.push(BOUND_MIN_TAG); - let mut index_prefix = Self::key_prefix(CodecType::IndexMeta, table_name); + let mut index_prefix = self.key_prefix(CodecType::IndexMeta, table_name); index_prefix.push(BOUND_MAX_TAG); (column_prefix, index_prefix) } - pub fn columns_bound(table_name: &str) -> (Vec, Vec) { + pub fn columns_bound(&self, table_name: &str) -> (BumpBytes, BumpBytes) { let op = |bound_id| { - let mut key_prefix = Self::key_prefix(CodecType::Column, table_name); + let mut key_prefix = self.key_prefix(CodecType::Column, table_name); key_prefix.push(bound_id); key_prefix @@ -196,9 +229,9 @@ impl TableCodec { (op(BOUND_MIN_TAG), op(BOUND_MAX_TAG)) } - pub fn statistics_bound(table_name: &str) -> (Vec, Vec) { + pub fn statistics_bound(&self, table_name: &str) -> (BumpBytes, BumpBytes) { let op = |bound_id| { - let mut key_prefix = Self::key_prefix(CodecType::Statistics, table_name); + let mut key_prefix = self.key_prefix(CodecType::Statistics, table_name); key_prefix.push(bound_id); key_prefix @@ -207,10 +240,11 @@ impl TableCodec { (op(BOUND_MIN_TAG), op(BOUND_MAX_TAG)) } - pub fn view_bound() -> (Vec, Vec) { + pub fn view_bound(&self) -> (BumpBytes, BumpBytes) { let op = |bound_id| { - let mut key_prefix = VIEW_BYTES.clone(); + let mut key_prefix = BumpBytes::new_in(&self.arena); + key_prefix.extend_from_slice(&VIEW_BYTES); key_prefix.push(bound_id); key_prefix }; @@ -221,23 +255,25 @@ impl TableCodec { /// Key: {TableName}{TUPLE_TAG}{BOUND_MIN_TAG}{RowID}(Sorted) /// Value: Tuple pub fn encode_tuple( + &self, table_name: &str, tuple: &mut Tuple, types: &[LogicalType], - ) -> Result<(Bytes, Bytes), DatabaseError> { + ) -> Result<(BumpBytes, BumpBytes), DatabaseError> { let tuple_id = tuple.id().ok_or(DatabaseError::PrimaryKeyNotFound)?; - let key = Self::encode_tuple_key(table_name, tuple_id)?; + let key = self.encode_tuple_key(table_name, tuple_id)?; - Ok((Bytes::from(key), Bytes::from(tuple.serialize_to(types)?))) + Ok((key, tuple.serialize_to(types, &self.arena)?)) } pub fn encode_tuple_key( + &self, table_name: &str, tuple_id: &TupleId, - ) -> Result, DatabaseError> { + ) -> Result { Self::check_primary_key(tuple_id, 0)?; - let mut key_prefix = Self::key_prefix(CodecType::Tuple, table_name); + let mut key_prefix = self.key_prefix(CodecType::Tuple, table_name); key_prefix.push(BOUND_MIN_TAG); tuple_id.memcomparable_encode(&mut key_prefix)?; @@ -245,35 +281,33 @@ impl TableCodec { Ok(key_prefix) } + #[inline] pub fn decode_tuple( table_types: &[LogicalType], pk_indices: &PrimaryKeyIndices, projections: &[usize], schema: &Schema, bytes: &[u8], - ) -> Tuple { + ) -> Result { Tuple::deserialize_from(table_types, pk_indices, projections, schema, bytes) } /// Key: {TableName}{INDEX_META_TAG}{BOUND_MIN_TAG}{IndexID} /// Value: IndexMeta pub fn encode_index_meta( + &self, table_name: &str, index_meta: &IndexMeta, - ) -> Result<(Bytes, Bytes), DatabaseError> { - let mut key_prefix = Cursor::new(Self::key_prefix(CodecType::IndexMeta, table_name)); - key_prefix.seek(SeekFrom::End(0))?; + ) -> Result<(BumpBytes, BumpBytes), DatabaseError> { + let mut key_prefix = self.key_prefix(CodecType::IndexMeta, table_name); key_prefix.write_all(&[BOUND_MIN_TAG])?; key_prefix.write_all(&index_meta.id.to_be_bytes()[..])?; - let mut value_bytes = Cursor::new(Vec::new()); + let mut value_bytes = BumpBytes::new_in(&self.arena); index_meta.encode(&mut value_bytes, true, &mut ReferenceTables::new())?; - Ok(( - Bytes::from(key_prefix.into_inner()), - Bytes::from(value_bytes.into_inner()), - )) + Ok((key_prefix, value_bytes)) } pub fn decode_index_meta(bytes: &[u8]) -> Result { @@ -291,20 +325,25 @@ impl TableCodec { /// Tips: The unique index has only one ColumnID and one corresponding DataValue, /// so it can be positioned directly. pub fn encode_index( + &self, name: &str, index: &Index, tuple_id: &TupleId, - ) -> Result<(Bytes, Bytes), DatabaseError> { - let key = TableCodec::encode_index_key(name, index, Some(tuple_id))?; - let mut bytes = Vec::new(); + ) -> Result<(BumpBytes, BumpBytes), DatabaseError> { + let key = self.encode_index_key(name, index, Some(tuple_id))?; + let mut bytes = BumpBytes::new_in(&self.arena); - tuple_id.inner_encode(&mut bytes, &tuple_id.logical_type())?; + bincode::serialize_into(&mut bytes, tuple_id)?; - Ok((Bytes::from(key), Bytes::from(bytes))) + Ok((key, bytes)) } - pub fn encode_index_bound_key(name: &str, index: &Index) -> Result, DatabaseError> { - let mut key_prefix = Self::key_prefix(CodecType::Index, name); + pub fn encode_index_bound_key( + &self, + name: &str, + index: &Index, + ) -> Result { + let mut key_prefix = self.key_prefix(CodecType::Index, name); key_prefix.push(BOUND_MIN_TAG); key_prefix.extend_from_slice(&index.id.to_be_bytes()); key_prefix.push(BOUND_MIN_TAG); @@ -315,11 +354,12 @@ impl TableCodec { } pub fn encode_index_key( + &self, name: &str, index: &Index, tuple_id: Option<&TupleId>, - ) -> Result, DatabaseError> { - let mut key_prefix = Self::encode_index_bound_key(name, index)?; + ) -> Result { + let mut key_prefix = self.encode_index_bound_key(name, index)?; if let Some(tuple_id) = tuple_id { if matches!(index.ty, IndexType::Normal | IndexType::Composite) { @@ -329,11 +369,8 @@ impl TableCodec { Ok(key_prefix) } - pub fn decode_index( - bytes: &[u8], - primary_key_ty: &LogicalType, - ) -> Result { - DataValue::inner_decode(&mut Cursor::new(bytes), primary_key_ty) + pub fn decode_index(bytes: &[u8]) -> Result { + Ok(bincode::deserialize_from(&mut Cursor::new(bytes))?) } /// Key: {TableName}{COLUMN_TAG}{BOUND_MIN_TAG}{ColumnId} @@ -341,28 +378,25 @@ impl TableCodec { /// /// Tips: the `0` for bound range pub fn encode_column( + &self, col: &ColumnRef, reference_tables: &mut ReferenceTables, - ) -> Result<(Bytes, Bytes), DatabaseError> { + ) -> Result<(BumpBytes, BumpBytes), DatabaseError> { if let ColumnRelation::Table { column_id, table_name, is_temp: false, } = &col.summary().relation { - let mut key_prefix = Cursor::new(Self::key_prefix(CodecType::Column, table_name)); - key_prefix.seek(SeekFrom::End(0))?; + let mut key_prefix = self.key_prefix(CodecType::Column, table_name); key_prefix.write_all(&[BOUND_MIN_TAG])?; key_prefix.write_all(&column_id.to_bytes()[..])?; - let mut column_bytes = Cursor::new(Vec::new()); + let mut column_bytes = BumpBytes::new_in(&self.arena); col.encode(&mut column_bytes, true, reference_tables)?; - Ok(( - Bytes::from(key_prefix.into_inner()), - Bytes::from(column_bytes.into_inner()), - )) + Ok((key_prefix, column_bytes)) } else { Err(DatabaseError::InvalidColumn( "column does not belong to table".to_string(), @@ -381,17 +415,21 @@ impl TableCodec { /// Key: {TableName}{STATISTICS_TAG}{BOUND_MIN_TAG}{INDEX_ID} /// Value: StatisticsMeta Path pub fn encode_statistics_path( + &self, table_name: &str, index_id: IndexId, path: String, - ) -> (Bytes, Bytes) { - let key = Self::encode_statistics_path_key(table_name, index_id); + ) -> (BumpBytes, BumpBytes) { + let key = self.encode_statistics_path_key(table_name, index_id); + + let mut value = BumpBytes::new_in(&self.arena); + value.extend_from_slice(path.as_bytes()); - (Bytes::from(key), Bytes::from(path)) + (key, value) } - pub fn encode_statistics_path_key(table_name: &str, index_id: IndexId) -> Vec { - let mut key_prefix = Self::key_prefix(CodecType::Statistics, table_name); + pub fn encode_statistics_path_key(&self, table_name: &str, index_id: IndexId) -> BumpBytes { + let mut key_prefix = self.key_prefix(CodecType::Statistics, table_name); key_prefix.push(BOUND_MIN_TAG); key_prefix.extend(index_id.to_le_bytes()); @@ -404,26 +442,26 @@ impl TableCodec { /// Key: View{BOUND_MIN_TAG}{ViewName} /// Value: View - pub fn encode_view(view: &View) -> Result<(Bytes, Bytes), DatabaseError> { - let key = Self::encode_view_key(&view.name); + pub fn encode_view(&self, view: &View) -> Result<(BumpBytes, BumpBytes), DatabaseError> { + let key = self.encode_view_key(&view.name); let mut reference_tables = ReferenceTables::new(); - let mut bytes = vec![0u8; 4]; + let mut bytes = BumpBytes::new_in(&self.arena); + bytes.resize(4, 0u8); + let reference_tables_pos = { - let mut value = Cursor::new(&mut bytes); - value.seek(SeekFrom::End(0))?; - view.encode(&mut value, false, &mut reference_tables)?; - let pos = value.position() as usize; + view.encode(&mut bytes, false, &mut reference_tables)?; + let pos = bytes.len(); reference_tables.to_raw(&mut bytes)?; pos }; bytes[..4].copy_from_slice(&(reference_tables_pos as u32).to_le_bytes()); - Ok((Bytes::from(key), Bytes::from(bytes))) + Ok((key, bytes)) } - pub fn encode_view_key(view_name: &str) -> Vec { - Self::key_prefix(CodecType::View, view_name) + pub fn encode_view_key(&self, view_name: &str) -> BumpBytes { + self.key_prefix(CodecType::View, view_name) } pub fn decode_view( @@ -445,16 +483,19 @@ impl TableCodec { /// Key: Root{BOUND_MIN_TAG}{TableName} /// Value: TableMeta - pub fn encode_root_table(meta: &TableMeta) -> Result<(Bytes, Bytes), DatabaseError> { - let key = Self::encode_root_table_key(&meta.table_name); + pub fn encode_root_table( + &self, + meta: &TableMeta, + ) -> Result<(BumpBytes, BumpBytes), DatabaseError> { + let key = self.encode_root_table_key(&meta.table_name); - let mut meta_bytes = Cursor::new(Vec::new()); + let mut meta_bytes = BumpBytes::new_in(&self.arena); meta.encode(&mut meta_bytes, true, &mut ReferenceTables::new())?; - Ok((Bytes::from(key), Bytes::from(meta_bytes.into_inner()))) + Ok((key, meta_bytes)) } - pub fn encode_root_table_key(table_name: &str) -> Vec { - Self::key_prefix(CodecType::Root, table_name) + pub fn encode_root_table_key(&self, table_name: &str) -> BumpBytes { + self.key_prefix(CodecType::Root, table_name) } pub fn decode_root_table(bytes: &[u8]) -> Result { @@ -462,6 +503,17 @@ impl TableCodec { TableMeta::decode::(&mut bytes, None, &EMPTY_REFERENCE_TABLES) } + + pub fn encode_table_hash_key(&self, table_name: &str) -> BumpBytes { + self.key_prefix(CodecType::Hash, table_name) + } + + pub fn encode_table_hash(&self, table_name: &str) -> (BumpBytes, BumpBytes) { + ( + self.key_prefix(CodecType::Hash, table_name), + BumpBytes::new_in(&self.arena), + ) + } } #[cfg(test)] @@ -474,13 +526,12 @@ mod tests { use crate::errors::DatabaseError; use crate::serdes::ReferenceTables; use crate::storage::rocksdb::RocksTransaction; - use crate::storage::table_codec::TableCodec; + use crate::storage::table_codec::{BumpBytes, TableCodec}; use crate::storage::Storage; use crate::types::index::{Index, IndexMeta, IndexType}; use crate::types::tuple::Tuple; use crate::types::value::DataValue; use crate::types::LogicalType; - use bytes::Bytes; use itertools::Itertools; use rust_decimal::Decimal; use std::collections::BTreeSet; @@ -507,6 +558,9 @@ mod tests { #[test] fn test_table_codec_tuple() -> Result<(), DatabaseError> { + let table_codec = TableCodec { + arena: Default::default(), + }; let table_catalog = build_table_codec(); let mut tuple = Tuple::new( @@ -516,7 +570,7 @@ mod tests { DataValue::Decimal(Some(Decimal::new(1, 0))), ], ); - let (_, bytes) = TableCodec::encode_tuple( + let (_, bytes) = table_codec.encode_tuple( &table_catalog.name, &mut tuple, &[LogicalType::Integer, LogicalType::Decimal(None, None)], @@ -526,7 +580,7 @@ mod tests { tuple.clear_id(); assert_eq!( - TableCodec::decode_tuple(&table_catalog.types(), pk_indices, &[0, 1], schema, &bytes), + TableCodec::decode_tuple(&table_catalog.types(), pk_indices, &[0, 1], schema, &bytes)?, tuple ); @@ -535,11 +589,15 @@ mod tests { #[test] fn test_root_catalog() { + let table_codec = TableCodec { + arena: Default::default(), + }; let table_catalog = build_table_codec(); - let (_, bytes) = TableCodec::encode_root_table(&TableMeta { - table_name: table_catalog.name.clone(), - }) - .unwrap(); + let (_, bytes) = table_codec + .encode_root_table(&TableMeta { + table_name: table_catalog.name.clone(), + }) + .unwrap(); let table_meta = TableCodec::decode_root_table::(&bytes).unwrap(); @@ -548,8 +606,11 @@ mod tests { #[test] fn test_table_codec_statistics_meta_path() { + let table_codec = TableCodec { + arena: Default::default(), + }; let path = String::from("./lol"); - let (_, bytes) = TableCodec::encode_statistics_path("t1", 0, path.clone()); + let (_, bytes) = table_codec.encode_statistics_path("t1", 0, path.clone()); let decode_path = TableCodec::decode_statistics_path(&bytes).unwrap(); assert_eq!(path, decode_path); @@ -557,6 +618,9 @@ mod tests { #[test] fn test_table_codec_index_meta() -> Result<(), DatabaseError> { + let table_codec = TableCodec { + arena: Default::default(), + }; let index_meta = IndexMeta { id: 0, column_ids: vec![Ulid::new()], @@ -566,7 +630,7 @@ mod tests { name: "index_1".to_string(), ty: IndexType::PrimaryKey { is_multiple: false }, }; - let (_, bytes) = TableCodec::encode_index_meta(&"T1".to_string(), &index_meta)?; + let (_, bytes) = table_codec.encode_index_meta(&"T1".to_string(), &index_meta)?; assert_eq!( TableCodec::decode_index_meta::(&bytes)?, @@ -578,16 +642,16 @@ mod tests { #[test] fn test_table_codec_index() -> Result<(), DatabaseError> { + let table_codec = TableCodec { + arena: Default::default(), + }; let table_catalog = build_table_codec(); let value = Arc::new(DataValue::Int32(Some(0))); let index = Index::new(0, &value, IndexType::PrimaryKey { is_multiple: false }); let tuple_id = DataValue::Int32(Some(0)); - let (_, bytes) = TableCodec::encode_index(&table_catalog.name, &index, &tuple_id)?; + let (_, bytes) = table_codec.encode_index(&table_catalog.name, &index, &tuple_id)?; - assert_eq!( - TableCodec::decode_index(&bytes, &tuple_id.logical_type())?, - tuple_id - ); + assert_eq!(TableCodec::decode_index(&bytes)?, tuple_id); Ok(()) } @@ -608,8 +672,13 @@ mod tests { let mut reference_tables = ReferenceTables::new(); - let (_, bytes) = TableCodec::encode_column(&col, &mut reference_tables).unwrap(); - let mut cursor = Cursor::new(bytes.as_ref()); + let table_codec = TableCodec { + arena: Default::default(), + }; + let (_, bytes) = table_codec + .encode_column(&col, &mut reference_tables) + .unwrap(); + let mut cursor = Cursor::new(bytes); let decode_col = TableCodec::decode_column::(&mut cursor, &reference_tables)?; @@ -620,9 +689,13 @@ mod tests { #[test] fn test_table_codec_view() -> Result<(), DatabaseError> { + let table_codec = TableCodec { + arena: Default::default(), + }; let table_state = build_t1_table()?; // Subquery { + println!("==== Subquery"); let plan = table_state .plan("select * from t1 where c1 in (select c1 from t1 where c1 > 1)")?; println!("{:#?}", plan); @@ -630,7 +703,7 @@ mod tests { name: Arc::new("view_subquery".to_string()), plan: Box::new(plan), }; - let (_, bytes) = TableCodec::encode_view(&view)?; + let (_, bytes) = table_codec.encode_view(&view)?; let transaction = table_state.storage.transaction()?; assert_eq!( @@ -640,12 +713,13 @@ mod tests { } // No Join { + println!("==== No Join"); let plan = table_state.plan("select * from t1 where c1 > 1")?; let view = View { name: Arc::new("view_filter".to_string()), plan: Box::new(plan), }; - let (_, bytes) = TableCodec::encode_view(&view)?; + let (_, bytes) = table_codec.encode_view(&view)?; let transaction = table_state.storage.transaction()?; assert_eq!( @@ -655,12 +729,13 @@ mod tests { } // Join { + println!("==== Join"); let plan = table_state.plan("select * from t1 left join t2 on c1 = c3")?; let view = View { name: Arc::new("view_join".to_string()), plan: Box::new(plan), }; - let (_, bytes) = TableCodec::encode_view(&view)?; + let (_, bytes) = table_codec.encode_view(&view)?; let transaction = table_state.storage.transaction()?; assert_eq!( @@ -674,6 +749,9 @@ mod tests { #[test] fn test_table_codec_column_bound() { + let table_codec = TableCodec { + arena: Default::default(), + }; let mut set = BTreeSet::new(); let op = |col_id: usize, table_name: &str| { let mut col = ColumnCatalog::new( @@ -688,9 +766,9 @@ mod tests { is_temp: false, }; - let (key, _) = - TableCodec::encode_column(&ColumnRef::from(col), &mut ReferenceTables::new()) - .unwrap(); + let (key, _) = table_codec + .encode_column(&ColumnRef::from(col), &mut ReferenceTables::new()) + .unwrap(); key }; @@ -706,12 +784,12 @@ mod tests { set.insert(op(0, "T2")); set.insert(op(0, "T2")); - let (min, max) = TableCodec::columns_bound(&Arc::new("T1".to_string())); + let (min, max) = table_codec.columns_bound(&Arc::new("T1".to_string())); let vec = set - .range::, Bound<&Bytes>)>(( - Bound::Included(&Bytes::from(min)), - Bound::Included(&Bytes::from(max)), + .range::, Bound<&BumpBytes>)>(( + Bound::Included(&min), + Bound::Included(&max), )) .collect_vec(); @@ -724,6 +802,9 @@ mod tests { #[test] fn test_table_codec_index_meta_bound() { + let table_codec = TableCodec { + arena: Default::default(), + }; let mut set = BTreeSet::new(); let op = |index_id: usize, table_name: &str| { let index_meta = IndexMeta { @@ -736,8 +817,9 @@ mod tests { ty: IndexType::PrimaryKey { is_multiple: false }, }; - let (key, _) = - TableCodec::encode_index_meta(&table_name.to_string(), &index_meta).unwrap(); + let (key, _) = table_codec + .encode_index_meta(&table_name.to_string(), &index_meta) + .unwrap(); key }; @@ -753,12 +835,12 @@ mod tests { set.insert(op(1, "T2")); set.insert(op(2, "T2")); - let (min, max) = TableCodec::index_meta_bound(&"T1".to_string()); + let (min, max) = table_codec.index_meta_bound(&"T1".to_string()); let vec = set - .range::, Bound<&Bytes>)>(( - Bound::Included(&Bytes::from(min)), - Bound::Included(&Bytes::from(max)), + .range::, Bound<&BumpBytes>)>(( + Bound::Included(&min), + Bound::Included(&max), )) .collect_vec(); @@ -771,6 +853,9 @@ mod tests { #[test] fn test_table_codec_index_bound() { + let table_codec = TableCodec { + arena: Default::default(), + }; let mut set = BTreeSet::new(); let column = ColumnCatalog::new( "".to_string(), @@ -787,7 +872,9 @@ mod tests { IndexType::PrimaryKey { is_multiple: false }, ); - TableCodec::encode_index_key(table_name, &index, None).unwrap() + table_codec + .encode_index_key(table_name, &index, None) + .unwrap() }; set.insert(op(DataValue::Int32(Some(0)), 0, &table_catalog.name)); @@ -804,13 +891,13 @@ mod tests { println!("{:#?}", set); - let (min, max) = TableCodec::index_bound(&table_catalog.name, &1).unwrap(); + let (min, max) = table_codec.index_bound(&table_catalog.name, &1).unwrap(); println!("{:?}", min); println!("{:?}", max); let vec = set - .range::, (Bound<&Vec>, Bound<&Vec>)>(( + .range::, Bound<&BumpBytes>)>(( Bound::Included(&min), Bound::Included(&max), )) @@ -834,6 +921,9 @@ mod tests { #[test] fn test_table_codec_index_all_bound() { + let table_codec = TableCodec { + arena: Default::default(), + }; let mut set = BTreeSet::new(); let op = |value: DataValue, index_id: usize, table_name: &str| { let value = Arc::new(value); @@ -843,7 +933,9 @@ mod tests { IndexType::PrimaryKey { is_multiple: false }, ); - TableCodec::encode_index_key(&table_name.to_string(), &index, None).unwrap() + table_codec + .encode_index_key(&table_name.to_string(), &index, None) + .unwrap() }; set.insert(op(DataValue::Int32(Some(0)), 0, "T0")); @@ -858,10 +950,10 @@ mod tests { set.insert(op(DataValue::Int32(Some(1)), 0, "T2")); set.insert(op(DataValue::Int32(Some(2)), 0, "T2")); - let (min, max) = TableCodec::all_index_bound(&"T1".to_string()); + let (min, max) = table_codec.all_index_bound(&"T1".to_string()); let vec = set - .range::, (Bound<&Vec>, Bound<&Vec>)>(( + .range::, Bound<&BumpBytes>)>(( Bound::Included(&min), Bound::Included(&max), )) @@ -876,9 +968,14 @@ mod tests { #[test] fn test_table_codec_tuple_bound() { + let table_codec = TableCodec { + arena: Default::default(), + }; let mut set = BTreeSet::new(); let op = |tuple_id: DataValue, table_name: &str| { - TableCodec::encode_tuple_key(&table_name.to_string(), &Arc::new(tuple_id)).unwrap() + table_codec + .encode_tuple_key(&table_name.to_string(), &Arc::new(tuple_id)) + .unwrap() }; set.insert(op(DataValue::Int32(Some(0)), "T0")); @@ -893,10 +990,10 @@ mod tests { set.insert(op(DataValue::Int32(Some(1)), "T2")); set.insert(op(DataValue::Int32(Some(2)), "T2")); - let (min, max) = TableCodec::tuple_bound(&"T1".to_string()); + let (min, max) = table_codec.tuple_bound(&"T1".to_string()); let vec = set - .range::, (Bound<&Vec>, Bound<&Vec>)>(( + .range::, Bound<&BumpBytes>)>(( Bound::Included(&min), Bound::Included(&max), )) @@ -911,21 +1008,27 @@ mod tests { #[test] fn test_root_codec_name_bound() { - let mut set = BTreeSet::new(); - let op = |table_name: &str| TableCodec::encode_root_table_key(table_name); + let table_codec = TableCodec { + arena: Default::default(), + }; + let mut set: BTreeSet = BTreeSet::new(); + let op = |table_name: &str| table_codec.encode_root_table_key(table_name); - set.insert(b"A".to_vec()); + let mut value_0 = BumpBytes::new_in(&table_codec.arena); + value_0.push(b'A'); + let mut value_1 = BumpBytes::new_in(&table_codec.arena); + value_1.push(b'Z'); + set.insert(value_0); + set.insert(value_1); set.insert(op("T0")); set.insert(op("T1")); set.insert(op("T2")); - set.insert(b"Z".to_vec()); - - let (min, max) = TableCodec::root_table_bound(); + let (min, max) = table_codec.root_table_bound(); let vec = set - .range::, (Bound<&Vec>, Bound<&Vec>)>(( + .range::, Bound<&BumpBytes>)>(( Bound::Included(&min), Bound::Included(&max), )) @@ -938,28 +1041,35 @@ mod tests { #[test] fn test_view_codec_name_bound() { + let table_codec = TableCodec { + arena: Default::default(), + }; let mut set = BTreeSet::new(); - let op = |view_name: &str| TableCodec::encode_view_key(view_name); + let op = |view_name: &str| table_codec.encode_view_key(view_name); - set.insert(b"A".to_vec()); + let mut value_0 = BumpBytes::new_in(&table_codec.arena); + value_0.push(b'A'); + let mut value_1 = BumpBytes::new_in(&table_codec.arena); + value_1.push(b'Z'); + + set.insert(value_0); + set.insert(value_1); set.insert(op("V0")); set.insert(op("V1")); set.insert(op("V2")); - set.insert(b"Z".to_vec()); - - let (min, max) = TableCodec::view_bound(); + let (min, max) = table_codec.view_bound(); let vec = set - .range::, (Bound<&Vec>, Bound<&Vec>)>(( + .range::, Bound<&BumpBytes>)>(( Bound::Included(&min), Bound::Included(&max), )) .collect_vec(); - assert_eq!(vec[0], &op("V0")); - assert_eq!(vec[1], &op("V1")); - assert_eq!(vec[2], &op("V2")); + assert_eq!(vec[2], &op("V0")); + assert_eq!(vec[0], &op("V1")); + assert_eq!(vec[1], &op("V2")); } } diff --git a/src/types/tuple.rs b/src/types/tuple.rs index 79d7ae61..3b7ceaf4 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -1,18 +1,14 @@ use crate::catalog::{ColumnRef, PrimaryKeyIndices}; use crate::db::ResultIter; use crate::errors::DatabaseError; +use crate::storage::table_codec::BumpBytes; use crate::types::value::DataValue; use crate::types::LogicalType; +use bumpalo::Bump; use comfy_table::{Cell, Table}; use itertools::Itertools; +use std::io::Cursor; use std::sync::Arc; -use std::sync::LazyLock; - -pub static EMPTY_TUPLE: LazyLock = LazyLock::new(|| Tuple { - pk_indices: None, - values: vec![], - id_buf: None, -}); const BITS_MAX_INDEX: usize = 8; @@ -62,13 +58,14 @@ impl Tuple { .as_ref() } + #[inline] pub fn deserialize_from( table_types: &[LogicalType], pk_indices: &PrimaryKeyIndices, projections: &[usize], schema: &Schema, bytes: &[u8], - ) -> Self { + ) -> Result { debug_assert!(!schema.is_empty()); debug_assert_eq!(projections.len(), schema.len()); @@ -76,50 +73,43 @@ impl Tuple { bits & (1 << (7 - i)) > 0 } - let values_len = table_types.len(); - let mut tuple_values = Vec::with_capacity(values_len); - let bits_len = (values_len + BITS_MAX_INDEX) / BITS_MAX_INDEX; + let types_len = table_types.len(); + let bits_len = (types_len + BITS_MAX_INDEX) / BITS_MAX_INDEX; + let mut values = vec![DataValue::Null; projections.len()]; let mut projection_i = 0; - let mut pos = bits_len; + let mut cursor = Cursor::new(&bytes[bits_len..]); for (i, logic_type) in table_types.iter().enumerate() { - if projection_i >= values_len || projection_i > projections.len() - 1 { + if projections.len() <= projection_i { break; } + debug_assert!(projection_i < types_len); if is_none(bytes[i / BITS_MAX_INDEX], i % BITS_MAX_INDEX) { - if projections[projection_i] == i { - tuple_values.push(DataValue::none(logic_type)); - projection_i += 1; - } - } else if let Some(len) = logic_type.raw_len() { - /// fixed length (e.g.: int) - if projections[projection_i] == i { - tuple_values.push(DataValue::from_raw(&bytes[pos..pos + len], logic_type)); - projection_i += 1; - } - pos += len; - } else { - /// variable length (e.g.: varchar) - let len = u32::from_le_bytes(bytes[pos..pos + 4].try_into().unwrap()) as usize; - pos += 4; - if projections[projection_i] == i { - tuple_values.push(DataValue::from_raw(&bytes[pos..pos + len], logic_type)); - projection_i += 1; - } - pos += len; + projection_i += 1; + continue; + } + if let Some(value) = + DataValue::from_raw(&mut cursor, logic_type, projections[projection_i] == i)? + { + values[projection_i] = value; + projection_i += 1; } } - Tuple { + Ok(Tuple { pk_indices: Some(pk_indices.clone()), - values: tuple_values, + values, id_buf: None, - } + }) } /// e.g.: bits(u8)..|data_0(len for utf8_1)|utf8_0|data_1| /// Tips: all len is u32 - pub fn serialize_to(&self, types: &[LogicalType]) -> Result, DatabaseError> { + pub fn serialize_to<'a>( + &self, + types: &[LogicalType], + arena: &'a Bump, + ) -> Result, DatabaseError> { debug_assert_eq!(self.values.len(), types.len()); fn flip_bit(bits: u8, i: usize) -> u8 { @@ -128,23 +118,20 @@ impl Tuple { let values_len = self.values.len(); let bits_len = (values_len + BITS_MAX_INDEX) / BITS_MAX_INDEX; - let mut bytes = vec![0_u8; bits_len]; + let mut bytes = BumpBytes::new_in(arena); + bytes.resize(bits_len, 0u8); + let null_bytes: *mut BumpBytes = &mut bytes; + let mut value_bytes = &mut bytes; for (i, value) in self.values.iter().enumerate() { if value.is_null() { - bytes[i / BITS_MAX_INDEX] = flip_bit(bytes[i / BITS_MAX_INDEX], i % BITS_MAX_INDEX); + let null_bytes = unsafe { &mut *null_bytes }; + null_bytes[i / BITS_MAX_INDEX] = + flip_bit(null_bytes[i / BITS_MAX_INDEX], i % BITS_MAX_INDEX); } else { - let logical_type = &types[i]; - let value_len = value.to_raw(&mut bytes)?; - - if logical_type.raw_len().is_none() { - let index = bytes.len() - value_len; - - bytes.splice(index..index, (value_len as u32).to_le_bytes()); - } + value.to_raw(&mut value_bytes)?; } } - Ok(bytes) } @@ -185,6 +172,7 @@ mod tests { use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use crate::types::LogicalType; + use bumpalo::Bump; use itertools::Itertools; use rust_decimal::Decimal; use sqlparser::ast::CharLengthUnits; @@ -290,7 +278,7 @@ mod tests { "c16".to_string(), false, ColumnDesc::new( - LogicalType::Char(1, CharLengthUnits::Octets), + LogicalType::Char(10, CharLengthUnits::Octets), None, false, None, @@ -332,7 +320,7 @@ mod tests { }, DataValue::Utf8 { value: Some("K".to_string()), - ty: Utf8Type::Fixed(1), + ty: Utf8Type::Fixed(10), unit: CharLengthUnits::Octets, }, ], @@ -369,7 +357,7 @@ mod tests { }, DataValue::Utf8 { value: None, - ty: Utf8Type::Fixed(1), + ty: Utf8Type::Fixed(10), unit: CharLengthUnits::Octets, }, ], @@ -380,23 +368,30 @@ mod tests { .map(|column| column.datatype().clone()) .collect_vec(); let columns = Arc::new(columns); - - let tuple_0 = Tuple::deserialize_from( - &types, - &Arc::new(vec![0]), - &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - &columns, - &tuples[0].serialize_to(&types).unwrap(), - ); - let tuple_1 = Tuple::deserialize_from( - &types, - &Arc::new(vec![0]), - &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - &columns, - &tuples[1].serialize_to(&types).unwrap(), - ); - - assert_eq!(tuples[0], tuple_0); - assert_eq!(tuples[1], tuple_1); + let arena = Bump::new(); + { + let tuple_0 = Tuple::deserialize_from( + &types, + &Arc::new(vec![0]), + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + &columns, + &tuples[0].serialize_to(&types, &arena).unwrap(), + ) + .unwrap(); + + assert_eq!(tuples[0], tuple_0); + } + { + let tuple_1 = Tuple::deserialize_from( + &types, + &Arc::new(vec![0]), + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + &columns, + &tuples[1].serialize_to(&types, &arena).unwrap(), + ) + .unwrap(); + + assert_eq!(tuples[1], tuple_1); + } } } diff --git a/src/types/value.rs b/src/types/value.rs index 78ff424a..325fc5b7 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -1,6 +1,7 @@ use super::LogicalType; use crate::errors::DatabaseError; -use crate::storage::table_codec::{BOUND_MAX_TAG, BOUND_MIN_TAG}; +use crate::storage::table_codec::{BumpBytes, BOUND_MAX_TAG, BOUND_MIN_TAG}; +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use chrono::format::{DelayedFormat, StrftimeItems}; use chrono::{DateTime, Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike}; use itertools::Itertools; @@ -11,7 +12,7 @@ use sqlparser::ast::CharLengthUnits; use std::cmp::Ordering; use std::fmt::Formatter; use std::hash::Hash; -use std::io::Write; +use std::io::{Read, Seek, SeekFrom, Write}; use std::str::FromStr; use std::sync::LazyLock; use std::{cmp, fmt, mem}; @@ -30,13 +31,13 @@ pub const TIME_FMT: &str = "%H:%M:%S"; const ENCODE_GROUP_SIZE: usize = 8; const ENCODE_MARKER: u8 = 0xFF; -#[derive(Clone)] +#[derive(Clone, serde::Serialize, serde::Deserialize)] pub enum Utf8Type { Variable(Option), Fixed(u32), } -#[derive(Clone)] +#[derive(Clone, serde::Serialize, serde::Deserialize)] pub enum DataValue { Null, Boolean(Option), @@ -205,8 +206,8 @@ impl PartialOrd for DataValue { } macro_rules! encode_u { - ($b:ident, $u:expr) => { - $b.extend_from_slice(&$u.to_be_bytes()) + ($writer:ident, $u:expr) => { + $writer.write_all(&$u.to_be_bytes())? }; } @@ -313,13 +314,15 @@ impl DataValue { } } + #[inline] pub(crate) fn check_string_len(string: &str, len: usize, unit: CharLengthUnits) -> bool { match unit { CharLengthUnits::Characters => string.chars().count() > len, - CharLengthUnits::Octets => string.len() > len, + CharLengthUnits::Octets => string.as_bytes().len() > len, } } + #[inline] pub(crate) fn check_len(&self, logic_type: &LogicalType) -> Result<(), DatabaseError> { let is_over_len = match (logic_type, self) { (LogicalType::Varchar(None, _), _) => false, @@ -391,6 +394,7 @@ impl DataValue { value.and_then(|v| Self::time_format(v).map(|fmt| format!("{}", fmt))) } + #[inline] pub fn is_null(&self) -> bool { match self { DataValue::Null => true, @@ -414,6 +418,7 @@ impl DataValue { } } + #[inline] pub fn none(logic_type: &LogicalType) -> DataValue { match logic_type { LogicalType::Invalid => panic!("invalid logical type"), @@ -447,6 +452,7 @@ impl DataValue { } } + #[inline] pub fn init(logic_type: &LogicalType) -> DataValue { match logic_type { LogicalType::Invalid => panic!("invalid logical type"), @@ -484,103 +490,106 @@ impl DataValue { } } - pub fn to_raw(&self, writer: &mut W) -> Result { + #[inline] + pub fn to_raw(&self, writer: &mut W) -> Result<(), DatabaseError> { match self { DataValue::Null => (), DataValue::Boolean(v) => { if let Some(v) = v { - writer.write_all(&[*v as u8])?; - return Ok(1); + writer.write_u8(*v as u8)?; + return Ok(()); } } DataValue::Float32(v) => { if let Some(v) = v { - writer.write_all(&v.to_ne_bytes())?; - return Ok(4); + writer.write_f32::(*v)?; + return Ok(()); } } DataValue::Float64(v) => { if let Some(v) = v { - writer.write_all(&v.to_ne_bytes())?; - return Ok(8); + writer.write_f64::(*v)?; + return Ok(()); } } DataValue::Int8(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(1); + writer.write_i8(*v)?; + return Ok(()); } } DataValue::Int16(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(2); + writer.write_i16::(*v)?; + return Ok(()); } } DataValue::Int32(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(4); + writer.write_i32::(*v)?; + return Ok(()); } } DataValue::Int64(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(8); + writer.write_i64::(*v)?; + return Ok(()); } } DataValue::UInt8(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(1); + writer.write_u8(*v)?; + return Ok(()); } } DataValue::UInt16(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(2); + writer.write_u16::(*v)?; + return Ok(()); } } DataValue::UInt32(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(4); + writer.write_u32::(*v)?; + return Ok(()); } } DataValue::UInt64(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(8); + writer.write_u64::(*v)?; + return Ok(()); } } DataValue::Utf8 { value: v, ty, unit } => { if let Some(v) = v { match ty { Utf8Type::Variable(_) => { - let string_bytes = v.as_bytes(); - let len = string_bytes.len(); + let bytes = v.as_bytes(); - writer.write_all(string_bytes)?; - return Ok(len); + writer.write_u32::(bytes.len() as u32)?; + writer.write_all(bytes)?; + return Ok(()); } Utf8Type::Fixed(len) => match unit { CharLengthUnits::Characters => { let chars_len = *len as usize; - let string_bytes = - format!("{:len$}", v, len = chars_len).into_bytes(); - let octets_len = string_bytes.len(); + let v = format!("{:len$}", v, len = chars_len); + let bytes = v.as_bytes(); - writer.write_all(&string_bytes)?; - return Ok(octets_len); + writer.write_u32::(bytes.len() as u32)?; + writer.write_all(bytes)?; + return Ok(()); } CharLengthUnits::Octets => { let octets_len = *len as usize; - let mut string_bytes = v.clone().into_bytes(); - - string_bytes.resize(octets_len, b' '); - debug_assert_eq!(octets_len, string_bytes.len()); - writer.write_all(&string_bytes)?; - return Ok(octets_len); + let bytes = v.as_bytes(); + debug_assert!(octets_len >= bytes.len()); + + writer.write_all(bytes)?; + for _ in 0..octets_len - bytes.len() { + writer.write_u8(b' ')?; + } + return Ok(()); } }, } @@ -588,113 +597,200 @@ impl DataValue { } DataValue::Date32(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(4); + writer.write_i32::(*v)?; + return Ok(()); } } DataValue::Date64(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(8); + writer.write_i64::(*v)?; + return Ok(()); } } DataValue::Time(v) => { if let Some(v) = v { - writer.write_all(&v.to_le_bytes())?; - return Ok(4); + writer.write_u32::(*v)?; + return Ok(()); } } DataValue::Decimal(v) => { if let Some(v) = v { writer.write_all(&v.serialize())?; - return Ok(16); + return Ok(()); } } DataValue::Tuple(_) => unreachable!(), } - Ok(0) + Ok(()) } - pub fn from_raw(bytes: &[u8], ty: &LogicalType) -> Self { - match ty { + #[inline] + pub fn from_raw( + reader: &mut R, + ty: &LogicalType, + is_projection: bool, + ) -> Result, DatabaseError> { + let value = match ty { LogicalType::Invalid => panic!("invalid logical type"), - LogicalType::SqlNull => DataValue::Null, - LogicalType::Boolean => DataValue::Boolean(bytes.first().map(|v| *v != 0)), - LogicalType::Tinyint => DataValue::Int8( - (!bytes.is_empty()).then(|| i8::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::UTinyint => DataValue::UInt8( - (!bytes.is_empty()).then(|| u8::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::Smallint => DataValue::Int16( - (!bytes.is_empty()).then(|| i16::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::USmallint => DataValue::UInt16( - (!bytes.is_empty()).then(|| u16::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::Integer => DataValue::Int32( - (!bytes.is_empty()).then(|| i32::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::UInteger => DataValue::UInt32( - (!bytes.is_empty()).then(|| u32::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::Bigint => DataValue::Int64( - (!bytes.is_empty()).then(|| i64::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::UBigint => DataValue::UInt64( - (!bytes.is_empty()).then(|| u64::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::Float => DataValue::Float32((!bytes.is_empty()).then(|| { - let mut buf = [0; 4]; - buf.copy_from_slice(bytes); - f32::from_ne_bytes(buf) - })), - LogicalType::Double => DataValue::Float64((!bytes.is_empty()).then(|| { - let mut buf = [0; 8]; - buf.copy_from_slice(bytes); - f64::from_ne_bytes(buf) - })), - LogicalType::Char(len, unit) => { + LogicalType::SqlNull => { + if !is_projection { + return Ok(None); + } + DataValue::Null + } + LogicalType::Boolean => { + if !is_projection { + reader.seek(SeekFrom::Current(1))?; + return Ok(None); + } + DataValue::Boolean(Some(reader.read_u8()? != 0)) + } + LogicalType::Tinyint => { + if !is_projection { + reader.seek(SeekFrom::Current(1))?; + return Ok(None); + } + DataValue::Int8(Some(reader.read_i8()?)) + } + LogicalType::UTinyint => { + if !is_projection { + reader.seek(SeekFrom::Current(1))?; + return Ok(None); + } + DataValue::UInt8(Some(reader.read_u8()?)) + } + LogicalType::Smallint => { + if !is_projection { + reader.seek(SeekFrom::Current(2))?; + return Ok(None); + } + DataValue::Int16(Some(reader.read_i16::()?)) + } + LogicalType::USmallint => { + if !is_projection { + reader.seek(SeekFrom::Current(2))?; + return Ok(None); + } + DataValue::UInt16(Some(reader.read_u16::()?)) + } + LogicalType::Integer => { + if !is_projection { + reader.seek(SeekFrom::Current(4))?; + return Ok(None); + } + DataValue::Int32(Some(reader.read_i32::()?)) + } + LogicalType::UInteger => { + if !is_projection { + reader.seek(SeekFrom::Current(4))?; + return Ok(None); + } + DataValue::UInt32(Some(reader.read_u32::()?)) + } + LogicalType::Bigint => { + if !is_projection { + reader.seek(SeekFrom::Current(8))?; + return Ok(None); + } + DataValue::Int64(Some(reader.read_i64::()?)) + } + LogicalType::UBigint => { + if !is_projection { + reader.seek(SeekFrom::Current(8))?; + return Ok(None); + } + DataValue::UInt64(Some(reader.read_u64::()?)) + } + LogicalType::Float => { + if !is_projection { + reader.seek(SeekFrom::Current(4))?; + return Ok(None); + } + DataValue::Float32(Some(reader.read_f32::()?)) + } + LogicalType::Double => { + if !is_projection { + reader.seek(SeekFrom::Current(8))?; + return Ok(None); + } + DataValue::Float64(Some(reader.read_f64::()?)) + } + LogicalType::Char(ty_len, unit) => { // https://dev.mysql.com/doc/refman/8.0/en/char.html#:~:text=If%20a%20given%20value%20is%20stored%20into%20the%20CHAR(4)%20and%20VARCHAR(4)%20columns%2C%20the%20values%20retrieved%20from%20the%20columns%20are%20not%20always%20the%20same%20because%20trailing%20spaces%20are%20removed%20from%20CHAR%20columns%20upon%20retrieval.%20The%20following%20example%20illustrates%20this%20difference%3A - let value = (!bytes.is_empty()).then(|| { - let last_non_zero_index = match bytes.iter().rposition(|&x| x != b' ') { - Some(index) => index + 1, - None => 0, - }; - String::from_utf8(bytes[0..last_non_zero_index].to_owned()).unwrap() - }); + let len = match unit { + CharLengthUnits::Characters => reader.read_u32::()?, + CharLengthUnits::Octets => *ty_len, + } as usize; + if !is_projection { + reader.seek(SeekFrom::Current(len as i64))?; + return Ok(None); + } + let mut bytes = vec![0; len]; + reader.read_exact(&mut bytes)?; + let last_non_zero_index = match bytes.iter().rposition(|&x| x != b' ') { + Some(index) => index + 1, + None => 0, + }; + bytes.truncate(last_non_zero_index); + DataValue::Utf8 { - value, - ty: Utf8Type::Fixed(*len), + value: Some(String::from_utf8(bytes)?), + ty: Utf8Type::Fixed(*ty_len), unit: *unit, } } - LogicalType::Varchar(len, unit) => { - let value = - (!bytes.is_empty()).then(|| String::from_utf8(bytes.to_owned()).unwrap()); + LogicalType::Varchar(ty_len, unit) => { + let len = reader.read_u32::()? as usize; + if !is_projection { + reader.seek(SeekFrom::Current(len as i64))?; + return Ok(None); + } + let mut bytes = vec![0; len]; + reader.read_exact(&mut bytes)?; + DataValue::Utf8 { - value, - ty: Utf8Type::Variable(*len), + value: Some(String::from_utf8(bytes)?), + ty: Utf8Type::Variable(*ty_len), unit: *unit, } } - LogicalType::Date => DataValue::Date32( - (!bytes.is_empty()).then(|| i32::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::DateTime => DataValue::Date64( - (!bytes.is_empty()).then(|| i64::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::Time => DataValue::Time( - (!bytes.is_empty()).then(|| u32::from_le_bytes(bytes.try_into().unwrap())), - ), - LogicalType::Decimal(_, _) => DataValue::Decimal( - (!bytes.is_empty()) - .then(|| Decimal::deserialize(<[u8; 16]>::try_from(bytes).unwrap())), - ), + LogicalType::Date => { + if !is_projection { + reader.seek(SeekFrom::Current(4))?; + return Ok(None); + } + DataValue::Date32(Some(reader.read_i32::()?)) + } + LogicalType::DateTime => { + if !is_projection { + reader.seek(SeekFrom::Current(8))?; + return Ok(None); + } + DataValue::Date64(Some(reader.read_i64::()?)) + } + LogicalType::Time => { + if !is_projection { + reader.seek(SeekFrom::Current(4))?; + return Ok(None); + } + DataValue::Time(Some(reader.read_u32::()?)) + } + LogicalType::Decimal(_, _) => { + if !is_projection { + reader.seek(SeekFrom::Current(16))?; + return Ok(None); + } + let mut bytes = [0u8; 16]; + reader.read_exact(&mut bytes)?; + + DataValue::Decimal(Some(Decimal::deserialize(bytes))) + } LogicalType::Tuple(_) => unreachable!(), - } + }; + Ok(Some(value)) } + #[inline] pub fn logical_type(&self) -> LogicalType { match self { DataValue::Null => LogicalType::SqlNull, @@ -749,7 +845,8 @@ impl DataValue { // [1, 2, 3, 4, 5, 6, 7, 8] -> [1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247] // // Refer: https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format#memcomparable-format - fn encode_bytes(b: &mut Vec, data: &[u8]) { + #[inline] + fn encode_bytes(b: &mut BumpBytes, data: &[u8]) { let d_len = data.len(); let realloc_size = (d_len / ENCODE_GROUP_SIZE + 1) * (ENCODE_GROUP_SIZE + 1); Self::realloc_bytes(b, realloc_size); @@ -773,7 +870,8 @@ impl DataValue { } } - fn realloc_bytes(b: &mut Vec, size: usize) { + #[inline] + fn realloc_bytes(b: &mut BumpBytes, size: usize) { let len = b.len(); if size > len { @@ -782,7 +880,8 @@ impl DataValue { } } - pub fn memcomparable_encode(&self, b: &mut Vec) -> Result<(), DatabaseError> { + #[inline] + pub fn memcomparable_encode(&self, b: &mut BumpBytes) -> Result<(), DatabaseError> { match self { DataValue::Int8(Some(v)) => encode_u!(b, *v as u8 ^ 0x80_u8), DataValue::Int16(Some(v)) => encode_u!(b, *v as u16 ^ 0x8000_u16), @@ -844,6 +943,7 @@ impl DataValue { Ok(()) } + #[inline] pub fn is_true(&self) -> Result { if self.is_null() { return Ok(false); @@ -1410,6 +1510,7 @@ impl DataValue { Ok(value) } + #[inline] pub fn common_prefix_length(&self, target: &DataValue) -> Option { if self.is_null() && target.is_null() { return Some(0); @@ -1443,6 +1544,7 @@ impl DataValue { Some(0) } + #[inline] pub(crate) fn values_to_tuple(mut values: Vec) -> Option { if values.len() > 1 { Some(DataValue::Tuple(Some((values, false)))) @@ -1696,13 +1798,16 @@ impl fmt::Debug for DataValue { #[cfg(test)] mod test { use crate::errors::DatabaseError; + use crate::storage::table_codec::BumpBytes; use crate::types::value::DataValue; + use bumpalo::Bump; #[test] fn test_mem_comparable_int() -> Result<(), DatabaseError> { - let mut key_i8_1 = Vec::new(); - let mut key_i8_2 = Vec::new(); - let mut key_i8_3 = Vec::new(); + let arena = Bump::new(); + let mut key_i8_1 = BumpBytes::new_in(&arena); + let mut key_i8_2 = BumpBytes::new_in(&arena); + let mut key_i8_3 = BumpBytes::new_in(&arena); DataValue::Int8(Some(i8::MIN)).memcomparable_encode(&mut key_i8_1)?; DataValue::Int8(Some(-1_i8)).memcomparable_encode(&mut key_i8_2)?; @@ -1713,9 +1818,9 @@ mod test { assert!(key_i8_1 < key_i8_2); assert!(key_i8_2 < key_i8_3); - let mut key_i16_1 = Vec::new(); - let mut key_i16_2 = Vec::new(); - let mut key_i16_3 = Vec::new(); + let mut key_i16_1 = BumpBytes::new_in(&arena); + let mut key_i16_2 = BumpBytes::new_in(&arena); + let mut key_i16_3 = BumpBytes::new_in(&arena); DataValue::Int16(Some(i16::MIN)).memcomparable_encode(&mut key_i16_1)?; DataValue::Int16(Some(-1_i16)).memcomparable_encode(&mut key_i16_2)?; @@ -1726,9 +1831,9 @@ mod test { assert!(key_i16_1 < key_i16_2); assert!(key_i16_2 < key_i16_3); - let mut key_i32_1 = Vec::new(); - let mut key_i32_2 = Vec::new(); - let mut key_i32_3 = Vec::new(); + let mut key_i32_1 = BumpBytes::new_in(&arena); + let mut key_i32_2 = BumpBytes::new_in(&arena); + let mut key_i32_3 = BumpBytes::new_in(&arena); DataValue::Int32(Some(i32::MIN)).memcomparable_encode(&mut key_i32_1)?; DataValue::Int32(Some(-1_i32)).memcomparable_encode(&mut key_i32_2)?; @@ -1739,9 +1844,9 @@ mod test { assert!(key_i32_1 < key_i32_2); assert!(key_i32_2 < key_i32_3); - let mut key_i64_1 = Vec::new(); - let mut key_i64_2 = Vec::new(); - let mut key_i64_3 = Vec::new(); + let mut key_i64_1 = BumpBytes::new_in(&arena); + let mut key_i64_2 = BumpBytes::new_in(&arena); + let mut key_i64_3 = BumpBytes::new_in(&arena); DataValue::Int64(Some(i64::MIN)).memcomparable_encode(&mut key_i64_1)?; DataValue::Int64(Some(-1_i64)).memcomparable_encode(&mut key_i64_2)?; @@ -1757,9 +1862,10 @@ mod test { #[test] fn test_mem_comparable_float() -> Result<(), DatabaseError> { - let mut key_f32_1 = Vec::new(); - let mut key_f32_2 = Vec::new(); - let mut key_f32_3 = Vec::new(); + let arena = Bump::new(); + let mut key_f32_1 = BumpBytes::new_in(&arena); + let mut key_f32_2 = BumpBytes::new_in(&arena); + let mut key_f32_3 = BumpBytes::new_in(&arena); DataValue::Float32(Some(f32::MIN)).memcomparable_encode(&mut key_f32_1)?; DataValue::Float32(Some(-1_f32)).memcomparable_encode(&mut key_f32_2)?; @@ -1770,9 +1876,9 @@ mod test { assert!(key_f32_1 < key_f32_2); assert!(key_f32_2 < key_f32_3); - let mut key_f64_1 = Vec::new(); - let mut key_f64_2 = Vec::new(); - let mut key_f64_3 = Vec::new(); + let mut key_f64_1 = BumpBytes::new_in(&arena); + let mut key_f64_2 = BumpBytes::new_in(&arena); + let mut key_f64_3 = BumpBytes::new_in(&arena); DataValue::Float64(Some(f64::MIN)).memcomparable_encode(&mut key_f64_1)?; DataValue::Float64(Some(-1_f64)).memcomparable_encode(&mut key_f64_2)?; @@ -1788,9 +1894,10 @@ mod test { #[test] fn test_mem_comparable_tuple_lower() -> Result<(), DatabaseError> { - let mut key_tuple_1 = Vec::new(); - let mut key_tuple_2 = Vec::new(); - let mut key_tuple_3 = Vec::new(); + let arena = Bump::new(); + let mut key_tuple_1 = BumpBytes::new_in(&arena); + let mut key_tuple_2 = BumpBytes::new_in(&arena); + let mut key_tuple_3 = BumpBytes::new_in(&arena); DataValue::Tuple(Some(( vec![ @@ -1830,9 +1937,10 @@ mod test { #[test] fn test_mem_comparable_tuple_upper() -> Result<(), DatabaseError> { - let mut key_tuple_1 = Vec::new(); - let mut key_tuple_2 = Vec::new(); - let mut key_tuple_3 = Vec::new(); + let arena = Bump::new(); + let mut key_tuple_1 = BumpBytes::new_in(&arena); + let mut key_tuple_2 = BumpBytes::new_in(&arena); + let mut key_tuple_3 = BumpBytes::new_in(&arena); DataValue::Tuple(Some(( vec![ diff --git a/src/utils/bit_vector.rs b/src/utils/bit_vector.rs deleted file mode 100644 index 4a0fd674..00000000 --- a/src/utils/bit_vector.rs +++ /dev/null @@ -1,96 +0,0 @@ -use itertools::Itertools; - -#[derive(Debug, Default)] -pub struct BitVector { - #[allow(dead_code)] - len: u64, - bit_groups: Vec, -} - -impl BitVector { - pub fn new(len: usize) -> BitVector { - BitVector { - len: len as u64, - bit_groups: vec![0; (len + 7) / 8], - } - } - - pub fn set_bit(&mut self, index: usize, value: bool) { - let byte_index = index / 8; - let bit_index = index % 8; - - if value { - self.bit_groups[byte_index] |= 1 << bit_index; - } else { - self.bit_groups[byte_index] &= !(1 << bit_index); - } - } - - pub fn get_bit(&self, index: usize) -> bool { - self.bit_groups[index / 8] >> (index % 8) & 1 != 0 - } - - #[allow(dead_code)] - pub fn len(&self) -> usize { - self.len as usize - } - - #[allow(dead_code)] - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - #[allow(dead_code)] - pub fn to_raw(&self, bytes: &mut Vec) { - bytes.extend(self.len.to_le_bytes()); - - for bits in &self.bit_groups { - bytes.extend(bits.to_le_bytes()); - } - } - - #[allow(dead_code)] - pub fn from_raw(bytes: &[u8]) -> Self { - let len = u64::from_le_bytes(bytes[0..8].try_into().unwrap()); - let bit_groups = bytes[8..] - .iter() - .map(|bit| i8::from_le_bytes([*bit])) - .collect_vec(); - - BitVector { len, bit_groups } - } -} - -#[cfg(test)] -mod tests { - use crate::utils::bit_vector::BitVector; - - #[test] - fn bit_vector_serialization() { - let mut vector = BitVector::new(100); - - vector.set_bit(99, true); - - let mut bytes = Vec::new(); - - vector.to_raw(&mut bytes); - let vector = BitVector::from_raw(&bytes); - - for i in 0..98 { - assert!(!vector.get_bit(i)); - } - assert!(vector.get_bit(99)); - } - - #[test] - fn bit_vector_simple() { - let mut vector = BitVector::new(100); - - vector.set_bit(99, true); - - for i in 0..98 { - assert!(!vector.get_bit(i)); - } - assert!(vector.get_bit(99)); - } -} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 81efc1b4..dde0d096 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,2 +1 @@ -pub(crate) mod bit_vector; pub(crate) mod lru; diff --git a/tests/macros-test/src/main.rs b/tests/macros-test/src/main.rs index 33e246a7..1db6317d 100644 --- a/tests/macros-test/src/main.rs +++ b/tests/macros-test/src/main.rs @@ -109,8 +109,7 @@ mod test { unit: CharLengthUnits::Characters, }), ], - &Tuple::new(None, vec![]), - &vec![], + None, )?; println!("{:?}", function); diff --git a/tests/slt/copy.slt b/tests/slt/copy.slt index 5f6c73a1..937fe6be 100644 --- a/tests/slt/copy.slt +++ b/tests/slt/copy.slt @@ -16,4 +16,4 @@ SELECT * FROM test_copy query I COPY test_copy TO './copy.csv' ( DELIMITER ',' ); ---- -Copy test_copy -> ./copy.csv [a, b, c] \ No newline at end of file +Copy To ./copy.csv [a, b, c] \ No newline at end of file diff --git a/tpcc/Cargo.toml b/tpcc/Cargo.toml index 2a39d5bb..7bba2487 100644 --- a/tpcc/Cargo.toml +++ b/tpcc/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] clap = { version = "4", features = ["derive"] } chrono = { version = "0.4" } -fnck_sql = { version = "0.0.8", path = "..", package = "fnck_sql" } +fnck_sql = { version = "0.0.9", path = "..", package = "fnck_sql" } indicatif = { version = "0.17" } ordered-float = { version = "4" } rand = { version = "0.8" } diff --git a/tpcc/README.md b/tpcc/README.md index 58474070..4e3e929d 100644 --- a/tpcc/README.md +++ b/tpcc/README.md @@ -6,11 +6,11 @@ run `cargo run -p tpcc --release` to run tpcc - YMTC PC411-1024GB-B - Tips: TPCC currently only supports single thread ```shell -|New-Order| sc: 88139 lt: 0 fl: 897 -|Payment| sc: 88120 lt: 0 fl: 0 -|Order-Status| sc: 8812 lt: 0 fl: 388 -|Delivery| sc: 8812 lt: 0 fl: 0 -|Stock-Level| sc: 8812 lt: 0 fl: 0 +|New-Order| sc: 93779 lt: 0 fl: 926 +|Payment| sc: 93759 lt: 0 fl: 0 +|Order-Status| sc: 9376 lt: 0 fl: 417 +|Delivery| sc: 9375 lt: 0 fl: 0 +|Stock-Level| sc: 9375 lt: 0 fl: 0 in 720 sec. (all must be [OK]) [transaction percentage] @@ -24,134 +24,129 @@ in 720 sec. Order-Status: 100.0 [OK] Delivery: 100.0 [OK] Stock-Level: 100.0 [OK] - New-Order Total: 88139 - Payment Total: 88120 - Order-Status Total: 8812 - Delivery Total: 8812 - Stock-Level Total: 8812 + New-Order Total: 93779 + Payment Total: 93759 + Order-Status Total: 9376 + Delivery Total: 9375 + Stock-Level Total: 9375 1.New-Order -0.001, 5110 -0.002, 63448 -0.003, 19415 -0.004, 78 -0.005, 3 -0.006, 1 -0.013, 2 +0.001, 20973 +0.002, 71372 +0.003, 1306 +0.004, 15 +0.005, 2 2.Payment -0.001, 81269 -0.002, 6794 -0.003, 12 -0.004, 1 +0.001, 90277 +0.002, 3307 +0.003, 11 +0.004, 3 3.Order-Status -0.014, 34 -0.015, 143 +0.013, 24 +0.014, 108 +0.015, 189 0.016, 207 -0.017, 225 -0.018, 221 -0.019, 196 -0.020, 162 -0.021, 170 -0.022, 166 -0.023, 206 -0.024, 190 -0.025, 134 -0.026, 151 -0.027, 287 -0.028, 274 -0.029, 273 -0.030, 206 -0.031, 169 -0.032, 170 -0.033, 149 -0.034, 136 -0.035, 181 -0.036, 244 -0.037, 295 -0.038, 294 -0.039, 232 -0.040, 201 -0.041, 181 -0.042, 173 -0.043, 165 -0.044, 154 -0.045, 175 -0.046, 267 -0.047, 286 -0.048, 233 -0.049, 190 -0.050, 153 -0.051, 183 -0.052, 199 -0.053, 155 -0.054, 190 -0.055, 237 -0.056, 190 -0.057, 151 -0.058, 82 -0.059, 50 -0.060, 14 -0.061, 5 +0.017, 201 +0.018, 207 +0.019, 221 +0.020, 198 +0.021, 187 +0.022, 163 +0.023, 175 +0.024, 259 +0.025, 298 +0.026, 300 +0.027, 239 +0.028, 207 +0.029, 165 +0.030, 142 +0.031, 135 +0.032, 205 +0.033, 303 +0.034, 270 +0.035, 272 +0.036, 170 +0.037, 138 +0.038, 156 +0.039, 175 +0.040, 193 +0.041, 207 +0.042, 211 +0.043, 264 +0.044, 304 +0.045, 265 +0.046, 224 +0.047, 172 +0.048, 176 +0.049, 172 +0.050, 207 +0.051, 238 +0.052, 269 +0.053, 219 +0.054, 242 +0.055, 156 +0.056, 134 +0.057, 94 +0.058, 88 +0.059, 75 +0.060, 49 +0.061, 13 0.062, 4 -0.063, 2 -0.064, 2 -0.065, 1 -0.073, 1 -0.075, 1 -0.078, 1 -0.087, 2 -0.102, 2 -0.131, 3 -0.188, 1 +0.063, 1 +0.064, 1 +0.076, 1 +0.080, 1 +0.117, 1 +0.127, 1 +0.150, 1 +0.158, 1 +0.172, 1 +0.176, 1 4.Delivery -0.012, 96 -0.013, 580 -0.014, 786 -0.015, 882 -0.016, 893 -0.017, 1087 -0.018, 1200 -0.019, 1038 -0.020, 842 -0.021, 576 -0.022, 416 -0.023, 247 -0.024, 94 -0.025, 13 -0.027, 1 -0.028, 2 -0.031, 1 -0.034, 1 -0.050, 1 +0.011, 117 +0.012, 483 +0.013, 659 +0.014, 704 +0.015, 790 +0.016, 911 +0.017, 969 +0.018, 974 +0.019, 895 +0.020, 927 +0.021, 782 +0.022, 542 +0.023, 362 +0.024, 209 +0.025, 45 +0.026, 1 +0.027, 3 5.Stock-Level -0.001, 1299 -0.002, 2836 -0.003, 3192 -0.004, 1150 -0.005, 172 -0.006, 7 +0.001, 1815 +0.002, 3454 +0.003, 3646 +0.004, 377 +0.005, 28 <90th Percentile RT (MaxRT)> - New-Order : 0.003 (0.012) - Payment : 0.001 (0.003) -Order-Status : 0.054 (0.188) - Delivery : 0.021 (0.049) - Stock-Level : 0.004 (0.006) + New-Order : 0.002 (0.004) + Payment : 0.001 (0.025) +Order-Status : 0.053 (0.175) + Delivery : 0.022 (0.027) + Stock-Level : 0.003 (0.019) -7345 Tpmc - +7815 tpmC ``` ## Explain diff --git a/tpcc/src/delivery.rs b/tpcc/src/delivery.rs index 10a1b7ec..0e137100 100644 --- a/tpcc/src/delivery.rs +++ b/tpcc/src/delivery.rs @@ -37,7 +37,7 @@ impl TpccTransaction for Delivery { let tuple = tx .execute( &statements[0], - vec![ + &[ ("?1", DataValue::Int8(Some(d_id as i8))), ("?2", DataValue::Int16(Some(args.w_id as i16))), ], @@ -52,7 +52,7 @@ impl TpccTransaction for Delivery { // "DELETE FROM new_orders WHERE no_o_id = ? AND no_d_id = ? AND no_w_id = ?" tx.execute( &statements[1], - vec![ + &[ ("?1", DataValue::Int32(Some(no_o_id))), ("?2", DataValue::Int8(Some(d_id as i8))), ("?3", DataValue::Int16(Some(args.w_id as i16))), @@ -63,7 +63,7 @@ impl TpccTransaction for Delivery { let tuple = tx .execute( &statements[2], - vec![ + &[ ("?1", DataValue::Int32(Some(no_o_id))), ("?2", DataValue::Int8(Some(d_id as i8))), ("?3", DataValue::Int16(Some(args.w_id as i16))), @@ -75,7 +75,7 @@ impl TpccTransaction for Delivery { // "UPDATE orders SET o_carrier_id = ? WHERE o_id = ? AND o_d_id = ? AND o_w_id = ?" tx.execute( &statements[3], - vec![ + &[ ("?1", DataValue::Int8(Some(args.o_carrier_id as i8))), ("?2", DataValue::Int32(Some(no_o_id))), ("?3", DataValue::Int8(Some(d_id as i8))), @@ -86,7 +86,7 @@ impl TpccTransaction for Delivery { // "UPDATE order_line SET ol_delivery_d = ? WHERE ol_o_id = ? AND ol_d_id = ? AND ol_w_id = ?" tx.execute( &statements[4], - vec![ + &[ ("?1", DataValue::from(&now)), ("?2", DataValue::Int32(Some(no_o_id))), ("?3", DataValue::Int8(Some(d_id as i8))), @@ -98,7 +98,7 @@ impl TpccTransaction for Delivery { let tuple = tx .execute( &statements[5], - vec![ + &[ ("?1", DataValue::Int32(Some(no_o_id))), ("?2", DataValue::Int8(Some(d_id as i8))), ("?3", DataValue::Int16(Some(args.w_id as i16))), @@ -110,7 +110,7 @@ impl TpccTransaction for Delivery { // "UPDATE customer SET c_balance = c_balance + ? , c_delivery_cnt = c_delivery_cnt + 1 WHERE c_id = ? AND c_d_id = ? AND c_w_id = ?" tx.execute( &statements[6], - vec![ + &[ ("?1", DataValue::Decimal(Some(ol_total))), ("?2", DataValue::Int32(Some(c_id))), ("?3", DataValue::Int8(Some(d_id as i8))), diff --git a/tpcc/src/load.rs b/tpcc/src/load.rs index 8122393c..f3c837c3 100644 --- a/tpcc/src/load.rs +++ b/tpcc/src/load.rs @@ -16,7 +16,6 @@ pub(crate) const DIST_PER_WARE: usize = 10; pub(crate) const ORD_PER_DIST: usize = 3000; pub(crate) static MAX_NUM_ITEMS: usize = 15; -pub(crate) static MAX_ITEM_LEN: usize = 24; fn generate_string(rng: &mut ThreadRng, min: usize, max: usize) -> String { let chars: Vec = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" diff --git a/tpcc/src/main.rs b/tpcc/src/main.rs index 3ab4864f..5c3f3697 100644 --- a/tpcc/src/main.rs +++ b/tpcc/src/main.rs @@ -321,7 +321,6 @@ pub enum TpccError { #[ignore] #[test] fn explain_tpcc() -> Result<(), DatabaseError> { - use fnck_sql::db::ResultIter; use fnck_sql::types::tuple::create_table; let database = DataBaseBuilder::path("./fnck_sql_tpcc").build()?; diff --git a/tpcc/src/new_ord.rs b/tpcc/src/new_ord.rs index 9af9c222..b1ece09e 100644 --- a/tpcc/src/new_ord.rs +++ b/tpcc/src/new_ord.rs @@ -71,7 +71,7 @@ impl TpccTransaction for NewOrd { let tuple = tx .execute( &statements[0], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int16(Some(args.w_id as i16))), ("?3", DataValue::Int8(Some(args.d_id as i8))), @@ -91,7 +91,7 @@ impl TpccTransaction for NewOrd { let tuple = tx .execute( &statements[1], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::Int32(Some(args.c_id as i32))), @@ -106,7 +106,7 @@ impl TpccTransaction for NewOrd { let tuple = tx .execute( &statements[2], - vec![("?1", DataValue::Int16(Some(args.w_id as i16)))], + &[("?1", DataValue::Int16(Some(args.w_id as i16)))], )? .next() .unwrap()?; @@ -118,7 +118,7 @@ impl TpccTransaction for NewOrd { let tuple = tx .execute( &statements[3], - vec![ + &[ ("?1", DataValue::Int8(Some(args.d_id as i8))), ("?2", DataValue::Int16(Some(args.w_id as i16))), ], @@ -130,7 +130,7 @@ impl TpccTransaction for NewOrd { // "UPDATE district SET d_next_o_id = ? + 1 WHERE d_id = ? AND d_w_id = ?" tx.execute( &statements[4], - vec![ + &[ ("?1", DataValue::Int32(Some(d_next_o_id))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::Int16(Some(args.w_id as i16))), @@ -141,7 +141,7 @@ impl TpccTransaction for NewOrd { // "INSERT INTO orders (o_id, o_d_id, o_w_id, o_c_id, o_entry_d, o_ol_cnt, o_all_local) VALUES(?, ?, ?, ?, ?, ?, ?)" tx.execute( &statements[5], - vec![ + &[ ("?1", DataValue::Int32(Some(o_id))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::Int16(Some(args.w_id as i16))), @@ -155,7 +155,7 @@ impl TpccTransaction for NewOrd { // "INSERT INTO new_orders (no_o_id, no_d_id, no_w_id) VALUES (?,?,?) tx.execute( &statements[6], - vec![ + &[ ("?1", DataValue::Int32(Some(o_id))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::Int16(Some(args.w_id as i16))), diff --git a/tpcc/src/order_stat.rs b/tpcc/src/order_stat.rs index 5f81e078..47f3e027 100644 --- a/tpcc/src/order_stat.rs +++ b/tpcc/src/order_stat.rs @@ -50,7 +50,7 @@ impl TpccTransaction for OrderStat { let tuple = tx .execute( &statements[0], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::from(args.c_last.clone())), @@ -62,7 +62,7 @@ impl TpccTransaction for OrderStat { // SELECT c_balance, c_first, c_middle, c_last FROM customer WHERE c_w_id = ? AND c_d_id = ? AND c_last = ? ORDER BY c_first" let mut tuple_iter = tx.execute( &statements[1], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::from(args.c_last.clone())), @@ -91,7 +91,7 @@ impl TpccTransaction for OrderStat { let tuple = tx .execute( &statements[2], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::Int32(Some(args.c_id as i32))), @@ -110,7 +110,7 @@ impl TpccTransaction for OrderStat { let tuple = tx .execute( &statements[3], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::Int32(Some(args.c_id as i32))), @@ -131,7 +131,7 @@ impl TpccTransaction for OrderStat { let tuple = tx .execute( &statements[4], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::Int32(Some(o_id))), diff --git a/tpcc/src/payment.rs b/tpcc/src/payment.rs index 449e9400..f1edf102 100644 --- a/tpcc/src/payment.rs +++ b/tpcc/src/payment.rs @@ -61,7 +61,7 @@ impl TpccTransaction for Payment { // "UPDATE warehouse SET w_ytd = w_ytd + ? WHERE w_id = ?" tx.execute( &statements[0], - vec![ + &[ ("?1", DataValue::Decimal(Some(args.h_amount))), ("?2", DataValue::Int16(Some(args.w_id as i16))), ], @@ -71,7 +71,7 @@ impl TpccTransaction for Payment { let tuple = tx .execute( &statements[1], - vec![("?1", DataValue::Int16(Some(args.w_id as i16)))], + &[("?1", DataValue::Int16(Some(args.w_id as i16)))], )? .next() .unwrap()?; @@ -85,7 +85,7 @@ impl TpccTransaction for Payment { // "UPDATE district SET d_ytd = d_ytd + ? WHERE d_w_id = ? AND d_id = ?" tx.execute( &statements[2], - vec![ + &[ ("?1", DataValue::Decimal(Some(args.h_amount))), ("?2", DataValue::Int16(Some(args.w_id as i16))), ("?3", DataValue::Int8(Some(args.d_id as i8))), @@ -97,7 +97,7 @@ impl TpccTransaction for Payment { let tuple = tx .execute( &statements[3], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ], @@ -117,7 +117,7 @@ impl TpccTransaction for Payment { let tuple = tx .execute( &statements[4], - vec![ + &[ ("?1", DataValue::Int16(Some(args.c_w_id as i16))), ("?2", DataValue::Int8(Some(args.c_d_id as i8))), ("?3", DataValue::from(args.c_last.clone())), @@ -130,7 +130,7 @@ impl TpccTransaction for Payment { // "SELECT c_id FROM customer WHERE c_w_id = ? AND c_d_id = ? AND c_last = ? ORDER BY c_first" let mut tuple_iter = tx.execute( &statements[5], - vec![ + &[ ("?1", DataValue::Int16(Some(args.c_w_id as i16))), ("?2", DataValue::Int8(Some(args.c_d_id as i8))), ("?3", DataValue::from(args.c_last.clone())), @@ -148,7 +148,7 @@ impl TpccTransaction for Payment { let tuple = tx .execute( &statements[6], - vec![ + &[ ("?1", DataValue::Int16(Some(args.c_w_id as i16))), ("?2", DataValue::Int8(Some(args.c_d_id as i8))), ("?3", DataValue::Int32(Some(c_id))), @@ -178,7 +178,7 @@ impl TpccTransaction for Payment { let tuple = tx .execute( &statements[7], - vec![ + &[ ("?1", DataValue::Int16(Some(args.c_w_id as i16))), ("?2", DataValue::Int8(Some(args.c_d_id as i8))), ("?3", DataValue::Int32(Some(c_id))), @@ -194,7 +194,7 @@ impl TpccTransaction for Payment { // "UPDATE customer SET c_balance = ?, c_data = ? WHERE c_w_id = ? AND c_d_id = ? AND c_id = ?" tx.execute( &statements[8], - vec![ + &[ ("?1", DataValue::Decimal(Some(c_balance))), ("?2", DataValue::from(c_data)), ("?3", DataValue::Int16(Some(args.c_w_id as i16))), @@ -207,7 +207,7 @@ impl TpccTransaction for Payment { // "UPDATE customer SET c_balance = ? WHERE c_w_id = ? AND c_d_id = ? AND c_id = ?" tx.execute( &statements[9], - vec![ + &[ ("?1", DataValue::Decimal(Some(c_balance))), ("?2", DataValue::Int16(Some(args.c_w_id as i16))), ("?3", DataValue::Int8(Some(args.c_d_id as i8))), @@ -220,7 +220,7 @@ impl TpccTransaction for Payment { // "UPDATE customer SET c_balance = ? WHERE c_w_id = ? AND c_d_id = ? AND c_id = ?" tx.execute( &statements[9], - vec![ + &[ ("?1", DataValue::Decimal(Some(c_balance))), ("?2", DataValue::Int16(Some(args.c_w_id as i16))), ("?3", DataValue::Int8(Some(args.c_d_id as i8))), @@ -233,7 +233,7 @@ impl TpccTransaction for Payment { // "INSERT INTO history(h_c_d_id, h_c_w_id, h_c_id, h_d_id, h_w_id, h_date, h_amount, h_data) VALUES(?, ?, ?, ?, ?, ?, ?, ?)" tx.execute( &statements[10], - vec![ + &[ ("?1", DataValue::Int8(Some(args.c_d_id as i8))), ("?2", DataValue::Int16(Some(args.c_w_id as i16))), ("?3", DataValue::Int32(Some(c_id))), diff --git a/tpcc/src/slev.rs b/tpcc/src/slev.rs index 55b7565f..76d69a8e 100644 --- a/tpcc/src/slev.rs +++ b/tpcc/src/slev.rs @@ -34,7 +34,7 @@ impl TpccTransaction for Slev { let tuple = tx .execute( &statements[0], - vec![ + &[ ("?1", DataValue::Int8(Some(args.d_id as i8))), ("?2", DataValue::Int16(Some(args.w_id as i16))), ], @@ -46,7 +46,7 @@ impl TpccTransaction for Slev { let tuple = tx .execute( &statements[1], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int8(Some(args.d_id as i8))), ("?3", DataValue::Int32(Some(d_next_o_id))), @@ -60,7 +60,7 @@ impl TpccTransaction for Slev { let tuple = tx .execute( &statements[2], - vec![ + &[ ("?1", DataValue::Int16(Some(args.w_id as i16))), ("?2", DataValue::Int8(Some(ol_i_id as i8))), ("?3", DataValue::Int16(Some(args.level as i16))),