From a5690326662299c114ef0446cd82be51a1ed2705 Mon Sep 17 00:00:00 2001 From: Bei Chu <914745487@qq.com> Date: Fri, 10 Mar 2023 16:13:19 +0800 Subject: [PATCH] feat: Separate cache operation log environment and index environments (#1199) --- dozer-api/src/errors.rs | 4 +- dozer-api/src/grpc/common/service.rs | 11 +- dozer-api/src/grpc/shared_impl/mod.rs | 7 +- dozer-api/src/grpc/typed/service.rs | 21 +- dozer-api/src/rest/api_generator.rs | 12 +- dozer-api/src/test_utils.rs | 6 +- .../src/cache/lmdb/cache/main_environment.rs | 405 --------------- .../cache/lmdb/cache/main_environment/mod.rs | 287 +++++++++++ .../operation_log/lmdb_val_impl.rs | 160 ++++++ .../main_environment/operation_log/mod.rs | 469 ++++++++++++++++++ dozer-cache/src/cache/lmdb/cache/mod.rs | 364 +++++--------- .../src/cache/lmdb/cache/query/handler.rs | 439 +++++----------- dozer-cache/src/cache/lmdb/cache/query/mod.rs | 1 + .../src/cache/lmdb/cache/query/secondary.rs | 280 +++++++++++ .../src/cache/lmdb/cache/query/tests.rs | 23 +- .../src/cache/lmdb/cache/schema_database.rs | 94 ---- .../cache/lmdb/cache/secondary_environment.rs | 278 +++++++++++ .../lmdb/cache/secondary_index_database.rs | 50 -- dozer-cache/src/cache/lmdb/cache_manager.rs | 15 +- dozer-cache/src/cache/lmdb/comparator.rs | 14 +- dozer-cache/src/cache/lmdb/indexer.rs | 218 ++++---- dozer-cache/src/cache/lmdb/tests/basic.rs | 2 +- .../src/cache/lmdb/tests/read_write.rs | 11 +- dozer-cache/src/cache/lmdb/tests/utils.rs | 35 +- dozer-cache/src/cache/lmdb/utils.rs | 138 +++--- dozer-cache/src/cache/mod.rs | 4 +- dozer-cache/src/cache/plan/mod.rs | 1 - dozer-cache/src/cache/plan/planner.rs | 24 +- dozer-cache/src/cache/plan/tests.rs | 65 +-- dozer-cache/src/cache/test_utils.rs | 12 +- dozer-cache/src/errors.rs | 28 +- dozer-cache/src/reader.rs | 4 +- dozer-core/src/dag_metadata.rs | 3 +- dozer-core/src/executor/execution_dag.rs | 1 - dozer-core/src/record_store.rs | 1 - dozer-orchestrator/src/pipeline/sinks.rs | 11 +- dozer-storage/src/lib.rs | 4 + dozer-storage/src/lmdb_counter.rs | 32 +- dozer-storage/src/lmdb_database/lmdb_val.rs | 44 +- .../src/lmdb_database/raw_iterator.rs | 4 +- dozer-storage/src/lmdb_map.rs | 36 +- dozer-storage/src/lmdb_multimap.rs | 33 +- dozer-storage/src/lmdb_option.rs | 39 ++ dozer-storage/src/lmdb_set.rs | 22 +- dozer-storage/src/lmdb_storage.rs | 74 +-- dozer-storage/src/lmdb_transaction.rs | 41 ++ dozer-types/src/borrow/mod.rs | 4 +- dozer-types/src/types/mod.rs | 2 + 48 files changed, 2200 insertions(+), 1633 deletions(-) delete mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment.rs create mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/mod.rs create mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/lmdb_val_impl.rs create mode 100644 dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/mod.rs create mode 100644 dozer-cache/src/cache/lmdb/cache/query/secondary.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/schema_database.rs create mode 100644 dozer-cache/src/cache/lmdb/cache/secondary_environment.rs delete mode 100644 dozer-cache/src/cache/lmdb/cache/secondary_index_database.rs create mode 100644 dozer-storage/src/lmdb_option.rs create mode 100644 dozer-storage/src/lmdb_transaction.rs diff --git a/dozer-api/src/errors.rs b/dozer-api/src/errors.rs index 9154d48719..6476579aef 100644 --- a/dozer-api/src/errors.rs +++ b/dozer-api/src/errors.rs @@ -21,8 +21,6 @@ pub enum ApiError { OpenCache(#[source] CacheError), #[error("Failed to open cache: {0}")] CacheNotFound(String), - #[error("Cannot find schema by name")] - SchemaNotFound(#[source] CacheError), #[error("Get by primary key is not supported when there is no primary key")] NoPrimaryKey, #[error("Get by primary key is not supported when it is composite: {0:?}")] @@ -141,7 +139,7 @@ impl actix_web::error::ResponseError for ApiError { ApiError::TypeError(_) => StatusCode::BAD_REQUEST, ApiError::ApiAuthError(_) => StatusCode::UNAUTHORIZED, ApiError::NotFound(_) => StatusCode::NOT_FOUND, - ApiError::SchemaNotFound(_) | ApiError::NoPrimaryKey | ApiError::MultiIndexFetch(_) => { + ApiError::NoPrimaryKey | ApiError::MultiIndexFetch(_) => { StatusCode::UNPROCESSABLE_ENTITY } ApiError::InternalError(_) diff --git a/dozer-api/src/grpc/common/service.rs b/dozer-api/src/grpc/common/service.rs index 567ed158c4..b58c34dea1 100644 --- a/dozer-api/src/grpc/common/service.rs +++ b/dozer-api/src/grpc/common/service.rs @@ -86,10 +86,7 @@ impl CommonGrpcService for CommonService { let cache_reader = cache_endpoint.cache_reader(); let records = shared_impl::query(&cache_reader, query_request.query.as_deref(), access)?; - let schema = &cache_reader - .get_schema() - .map_err(|_| Status::invalid_argument(&cache_endpoint.endpoint.name))? - .0; + let schema = &cache_reader.get_schema().0; let fields = map_field_definitions(schema.fields.clone()); let records = records.into_iter().map(map_record).collect(); @@ -113,7 +110,6 @@ impl CommonGrpcService for CommonService { shared_impl::on_event( &cache_endpoint.cache_reader(), - &cache_endpoint.endpoint.name, query_request.filter.as_deref(), self.event_notifier.as_ref().map(|r| r.resubscribe()), access.cloned(), @@ -147,10 +143,7 @@ impl CommonGrpcService for CommonService { .map_or(Err(Status::invalid_argument(&endpoint)), Ok)?; let cache_reader = cache_endpoint.cache_reader(); - let schema = &cache_reader - .get_schema() - .map_err(|_| Status::invalid_argument(endpoint))? - .0; + let schema = &cache_reader.get_schema().0; let fields = map_field_definitions(schema.fields.clone()); diff --git a/dozer-api/src/grpc/shared_impl/mod.rs b/dozer-api/src/grpc/shared_impl/mod.rs index 00192c3c98..18547697d8 100644 --- a/dozer-api/src/grpc/shared_impl/mod.rs +++ b/dozer-api/src/grpc/shared_impl/mod.rs @@ -58,7 +58,6 @@ pub fn query( pub fn on_event( reader: &CacheReader, - endpoint_name: &str, filter: Option<&str>, mut broadcast_receiver: Option>, _access: Option, @@ -82,11 +81,7 @@ pub fn on_event( } None => None, }; - let schema = reader - .get_schema() - .map_err(|_| Status::invalid_argument(endpoint_name))? - .0 - .clone(); + let schema = reader.get_schema().0.clone(); let (tx, rx) = tokio::sync::mpsc::channel(1); diff --git a/dozer-api/src/grpc/typed/service.rs b/dozer-api/src/grpc/typed/service.rs index 381f0724ef..0b586f7465 100644 --- a/dozer-api/src/grpc/typed/service.rs +++ b/dozer-api/src/grpc/typed/service.rs @@ -347,20 +347,13 @@ fn on_event( .transpose()?; let endpoint_to_be_streamed = endpoint_name.to_string(); - shared_impl::on_event( - reader, - endpoint_name, - filter, - event_notifier, - access.cloned(), - move |op| { - if endpoint_to_be_streamed == op.endpoint_name { - Some(Ok(on_event_to_typed_response(op, event_desc.clone()))) - } else { - None - } - }, - ) + shared_impl::on_event(reader, filter, event_notifier, access.cloned(), move |op| { + if endpoint_to_be_streamed == op.endpoint_name { + Some(Ok(on_event_to_typed_response(op, event_desc.clone()))) + } else { + None + } + }) } fn token( diff --git a/dozer-api/src/rest/api_generator.rs b/dozer-api/src/rest/api_generator.rs index 2a2b0d0075..d6502c134a 100644 --- a/dozer-api/src/rest/api_generator.rs +++ b/dozer-api/src/rest/api_generator.rs @@ -23,7 +23,7 @@ use dozer_types::serde_json; use dozer_types::serde_json::{json, Map, Value}; fn generate_oapi3(reader: &CacheReader, endpoint: ApiEndpoint) -> Result { - let (schema, secondary_indexes) = reader.get_schema().map_err(ApiError::SchemaNotFound)?; + let (schema, secondary_indexes) = reader.get_schema(); let oapi_generator = OpenApiGenerator::new( schema, @@ -53,10 +53,7 @@ pub async fn get( path: web::Path, ) -> Result { let cache_reader = &cache_endpoint.cache_reader(); - let schema = &cache_reader - .get_schema() - .map_err(ApiError::SchemaNotFound)? - .0; + let schema = &cache_reader.get_schema().0; let key = path.as_str(); let key = if schema.primary_index.is_empty() { @@ -151,10 +148,7 @@ fn get_records_map( let mut maps = vec![]; let cache_reader = &cache_endpoint.cache_reader(); let records = get_records(cache_reader, exp, access.map(|a| a.into_inner()))?; - let schema = &cache_reader - .get_schema() - .map_err(ApiError::SchemaNotFound)? - .0; + let schema = &cache_reader.get_schema().0; for record in records.into_iter() { let map = record_to_map(record, schema)?; maps.push(map); diff --git a/dozer-api/src/test_utils.rs b/dozer-api/src/test_utils.rs index 03919cdead..04e377846a 100644 --- a/dozer-api/src/test_utils.rs +++ b/dozer-api/src/test_utils.rs @@ -1,5 +1,5 @@ use dozer_types::serde_json::{json, Value}; -use dozer_types::types::{Field, Record, SourceDefinition}; +use dozer_types::types::{Field, Record, SchemaWithIndex, SourceDefinition}; use dozer_types::{ models::api_endpoint::{ApiEndpoint, ApiIndex}, types::{FieldDefinition, FieldType, IndexDefinition, Schema, SchemaIdentifier}, @@ -7,7 +7,7 @@ use dozer_types::{ use dozer_cache::cache::{CacheManager, LmdbCacheManager, RecordWithId}; -pub fn get_schema() -> (Schema, Vec) { +pub fn get_schema() -> SchemaWithIndex { let fields = vec![ FieldDefinition { name: "film_id".to_string(), @@ -101,7 +101,7 @@ fn get_films() -> Vec { pub fn initialize_cache( schema_name: &str, - schema: Option<(Schema, Vec)>, + schema: Option, ) -> Box { let cache_manager = LmdbCacheManager::new(Default::default()).unwrap(); let (schema, secondary_indexes) = schema.unwrap_or_else(get_schema); diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment.rs b/dozer-cache/src/cache/lmdb/cache/main_environment.rs deleted file mode 100644 index 006439aba4..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/main_environment.rs +++ /dev/null @@ -1,405 +0,0 @@ -use dozer_storage::{ - errors::StorageError, - lmdb::{RoCursor, RwTransaction, Transaction}, - lmdb_storage::LmdbEnvironmentManager, - BorrowEncode, Decode, Encode, Encoded, KeyIterator, LmdbCounter, LmdbMap, LmdbSet, LmdbVal, -}; -use dozer_types::{ - borrow::{Borrow, Cow, IntoOwned}, - impl_borrow_for_clone_type, - serde::{Deserialize, Serialize}, - types::{Field, FieldType, Record, Schema}, -}; - -use crate::{ - cache::{index, RecordWithId}, - errors::CacheError, -}; - -const INITIAL_RECORD_VERSION: u32 = 1_u32; - -#[derive(Debug, Clone, Copy)] -struct RecordMetadata { - /// The record id. Consistent across `insert`s and `delete`s. - id: u64, - /// The latest record version, even if the record is deleted. - version: u32, - /// The operation id of the latest `Insert` operation. `None` if the record is deleted. - insert_operation_id: Option, -} - -#[derive(Debug, Clone, Deserialize)] -#[serde(crate = "dozer_types::serde")] -enum Operation { - Delete { - /// The operation id of an `Insert` operation, which must exist. - operation_id: u64, - }, - Insert { - record_id: u64, - record: Record, - }, -} - -#[derive(Debug)] -pub struct MainEnvironment { - /// Record primary key -> RecordMetadata, empty if schema has no primary key. - /// Length always increases. - primary_key_to_metadata: LmdbMap, RecordMetadata>, - /// Operation ids of latest `Insert`s. Used to filter out deleted records in query. Empty if schema has no primary key. - present_operation_ids: LmdbSet, - /// The next operation id. Monotonically increasing. - next_operation_id: LmdbCounter, - /// Operation_id -> operation. - operation_id_to_operation: LmdbMap, -} - -impl MainEnvironment { - pub fn new( - env: &mut LmdbEnvironmentManager, - create_if_not_exist: bool, - ) -> Result { - let primary_key_to_metadata = - LmdbMap::new_from_env(env, Some("primary_key_to_metadata"), create_if_not_exist)?; - let present_operation_ids = - LmdbSet::new_from_env(env, Some("present_operation_ids"), create_if_not_exist)?; - let next_operation_id = - LmdbCounter::new_from_env(env, Some("next_operation_id"), create_if_not_exist)?; - let operation_id_to_operation = - LmdbMap::new_from_env(env, Some("operation_id_to_operation"), create_if_not_exist)?; - Ok(Self { - primary_key_to_metadata, - present_operation_ids, - next_operation_id, - operation_id_to_operation, - }) - } - - pub fn count( - &self, - txn: &T, - schema_is_append_only: bool, - ) -> Result { - if schema_is_append_only { - self.operation_id_to_operation.count(txn) - } else { - self.present_operation_ids.count(txn) - } - .map_err(Into::into) - } - - pub fn present_operation_ids<'txn, T: Transaction>( - &self, - txn: &'txn T, - schema_is_append_only: bool, - ) -> Result, u64>, CacheError> { - if schema_is_append_only { - self.operation_id_to_operation.keys(txn) - } else { - self.present_operation_ids.iter(txn) - } - .map_err(Into::into) - } - - pub fn get(&self, txn: &T, key: &[u8]) -> Result { - let metadata = self - .primary_key_to_metadata - .get(txn, key)? - .ok_or(CacheError::PrimaryKeyNotFound)?; - let Some(insert_operation_id) = metadata.borrow().insert_operation_id else { - return Err(CacheError::PrimaryKeyNotFound); - }; - self.get_by_operation_id_unchecked(txn, insert_operation_id) - } - - pub fn get_by_operation_id( - &self, - txn: &T, - operation_id: u64, - schema_is_append_only: bool, - ) -> Result, CacheError> { - // IF schema has no primary key, then all operation ids are latest `Insert`s. - if !schema_is_append_only && !self.present_operation_ids.contains(txn, &operation_id)? { - Ok(None) - } else { - self.get_by_operation_id_unchecked(txn, operation_id) - .map(Some) - } - } - - fn get_by_operation_id_unchecked( - &self, - txn: &T, - operation_id: u64, - ) -> Result { - let Some(Cow::Owned(Operation::Insert { - record_id, - record, - })) = self.operation_id_to_operation.get(txn, &operation_id)? else { - panic!( - "Inconsistent state: primary_key_to_metadata or present_operation_ids contains an insert operation id that is not an Insert operation" - ); - }; - Ok(RecordWithId::new(record_id, record)) - } - - /// Inserts the record into the cache and sets the record version. Returns the record id and the operation id. - /// - /// Every time a record with the same primary key is inserted, its version number gets increased by 1. - pub fn insert( - &self, - txn: &mut RwTransaction, - record: &mut Record, - schema: &Schema, - ) -> Result<(u64, u64), CacheError> { - debug_check_schema_record_consistency(schema, record); - // Generation operation id. - let operation_id = self.next_operation_id.fetch_add(txn, 1)?; - // Calculate record id. - let record_id = if schema.is_append_only() { - record.version = Some(INITIAL_RECORD_VERSION); - // If the record has no primary key, record id is operation id. - operation_id - } else { - let primary_key = index::get_primary_key(&schema.primary_index, &record.values); - // Get or generate record id from `primary_key_to_metadata`. - let (record_id, record_version) = - match self.primary_key_to_metadata.get(txn, &primary_key)? { - // Primary key is never inserted before. Generate new id from `primary_key_to_metadata`. - None => ( - self.primary_key_to_metadata.count(txn)? as u64, - INITIAL_RECORD_VERSION, - ), - Some(metadata) => { - let metadata = metadata.into_owned(); - if metadata.insert_operation_id.is_some() { - // This primary key is present. It's an error. - return Err(CacheError::PrimaryKeyExists); - } else { - // This primary key was deleted. Use the record id from its first insertion. - (metadata.id, metadata.version + 1) - } - } - }; - // Update `primary_key_to_metadata` and `present_operation_ids`. - self.primary_key_to_metadata.insert_overwrite( - txn, - &primary_key, - &RecordMetadata { - id: record_id, - version: record_version, - insert_operation_id: Some(operation_id), - }, - )?; - if !self.present_operation_ids.insert(txn, &operation_id)? { - panic!("Inconsistent state: operation id already exists"); - } - // Update record version. - record.version = Some(record_version); - record_id - }; - // Record operation. The operation id must not exist. - if !self.operation_id_to_operation.insert( - txn, - &operation_id, - OperationBorrow::Insert { record_id, record }, - )? { - panic!("Inconsistent state: operation id already exists"); - } - Ok((record_id, operation_id)) - } - - /// Deletes the record and returns the record version and the deleted operation id. - pub fn delete( - &self, - txn: &mut RwTransaction, - primary_key: &[u8], - ) -> Result<(u32, u64), CacheError> { - // Find operation id by primary key. - let Some(Cow::Owned(RecordMetadata { - id: record_id, - version: record_version, - insert_operation_id: Some(insert_operation_id) - })) = self.primary_key_to_metadata.get(txn, primary_key)? else { - return Err(CacheError::PrimaryKeyNotFound); - }; - // Remove deleted operation id. - self.primary_key_to_metadata.insert_overwrite( - txn, - primary_key, - &RecordMetadata { - id: record_id, - version: record_version, - insert_operation_id: None, - }, - )?; - // The operation id must be present. - if !self - .present_operation_ids - .remove(txn, &insert_operation_id)? - { - panic!("Inconsistent state: insert operation id not found") - } - // Generate new operation id. - let operation_id = self.next_operation_id.fetch_add(txn, 1)?; - // Record delete operation. The operation id must not exist. - if !self.operation_id_to_operation.insert( - txn, - &operation_id, - OperationBorrow::Delete { - operation_id: insert_operation_id, - }, - )? { - panic!("Inconsistent state: operation id already exists"); - } - Ok((record_version, insert_operation_id)) - } -} - -impl_borrow_for_clone_type!(RecordMetadata); - -impl BorrowEncode for RecordMetadata { - type Encode<'a> = &'a RecordMetadata; -} - -impl<'a> Encode<'a> for &'a RecordMetadata { - fn encode(self) -> Result, StorageError> { - let mut result = [0; 21]; - result[0..8].copy_from_slice(&self.id.to_be_bytes()); - result[8..12].copy_from_slice(&self.version.to_be_bytes()); - if let Some(insert_operation_id) = self.insert_operation_id { - result[12] = 1; - result[13..21].copy_from_slice(&insert_operation_id.to_be_bytes()); - } else { - result[12] = 0; - } - Ok(Encoded::U8x21(result)) - } -} - -impl Decode for RecordMetadata { - fn decode(bytes: &[u8]) -> Result, StorageError> { - let id = u64::from_be_bytes(bytes[0..8].try_into().unwrap()); - let version = u32::from_be_bytes(bytes[8..12].try_into().unwrap()); - let insert_operation_id = if bytes[12] == 1 { - Some(u64::from_be_bytes(bytes[13..21].try_into().unwrap())) - } else { - None - }; - Ok(Cow::Owned(RecordMetadata { - id, - version, - insert_operation_id, - })) - } -} - -unsafe impl LmdbVal for RecordMetadata {} - -#[derive(Debug, Clone, Copy, Serialize)] -#[serde(crate = "dozer_types::serde")] -enum OperationBorrow<'a> { - Delete { - /// The operation id of an `Insert` operation, which must exist. - operation_id: u64, - }, - Insert { - record_id: u64, - record: &'a Record, - }, -} - -impl<'a> IntoOwned for OperationBorrow<'a> { - fn into_owned(self) -> Operation { - match self { - Self::Delete { operation_id } => Operation::Delete { operation_id }, - Self::Insert { record_id, record } => Operation::Insert { - record_id, - record: record.clone(), - }, - } - } -} - -impl Borrow for Operation { - type Borrowed<'a> = OperationBorrow<'a>; - - fn borrow(&self) -> Self::Borrowed<'_> { - match self { - Self::Delete { operation_id } => OperationBorrow::Delete { - operation_id: *operation_id, - }, - Self::Insert { record_id, record } => OperationBorrow::Insert { - record_id: *record_id, - record, - }, - } - } - - fn upcast<'b, 'a: 'b>(borrow: Self::Borrowed<'a>) -> Self::Borrowed<'b> { - match borrow { - OperationBorrow::Delete { operation_id } => OperationBorrow::Delete { operation_id }, - OperationBorrow::Insert { record_id, record } => { - OperationBorrow::Insert { record_id, record } - } - } - } -} - -impl BorrowEncode for Operation { - type Encode<'a> = OperationBorrow<'a>; -} - -impl<'a> Encode<'a> for OperationBorrow<'a> { - fn encode(self) -> Result, StorageError> { - dozer_types::bincode::serialize(&self) - .map(Encoded::Vec) - .map_err(|e| StorageError::SerializationError { - typ: "Operation", - reason: Box::new(e), - }) - } -} - -impl Decode for Operation { - fn decode(bytes: &[u8]) -> Result, StorageError> { - dozer_types::bincode::deserialize(bytes) - .map(Cow::Owned) - .map_err(|e| StorageError::DeserializationError { - typ: "Operation", - reason: Box::new(e), - }) - } -} - -unsafe impl LmdbVal for Operation {} - -fn debug_check_schema_record_consistency(schema: &Schema, record: &Record) { - debug_assert_eq!(schema.identifier, record.schema_id); - debug_assert_eq!(schema.fields.len(), record.values.len()); - for (field, value) in schema.fields.iter().zip(record.values.iter()) { - if field.nullable && value == &Field::Null { - continue; - } - match field.typ { - FieldType::UInt => { - debug_assert!(value.as_uint().is_some()) - } - FieldType::Int => { - debug_assert!(value.as_int().is_some()) - } - FieldType::Float => { - debug_assert!(value.as_float().is_some()) - } - FieldType::Boolean => debug_assert!(value.as_boolean().is_some()), - FieldType::String => debug_assert!(value.as_string().is_some()), - FieldType::Text => debug_assert!(value.as_text().is_some()), - FieldType::Binary => debug_assert!(value.as_binary().is_some()), - FieldType::Decimal => debug_assert!(value.as_decimal().is_some()), - FieldType::Timestamp => debug_assert!(value.as_timestamp().is_some()), - FieldType::Date => debug_assert!(value.as_date().is_some()), - FieldType::Bson => debug_assert!(value.as_bson().is_some()), - FieldType::Point => debug_assert!(value.as_point().is_some()), - } - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/mod.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/mod.rs new file mode 100644 index 0000000000..9436a0f98d --- /dev/null +++ b/dozer-cache/src/cache/lmdb/cache/main_environment/mod.rs @@ -0,0 +1,287 @@ +use dozer_storage::{ + errors::StorageError, + lmdb::RoTransaction, + lmdb_storage::{LmdbEnvironmentManager, SharedTransaction}, + BeginTransaction, LmdbOption, ReadTransaction, +}; +use dozer_types::{ + borrow::IntoOwned, + types::{Field, FieldType, Record, Schema, SchemaWithIndex}, +}; + +use crate::{ + cache::{index, lmdb::utils::init_env, RecordWithId}, + errors::CacheError, +}; + +use super::{CacheCommonOptions, CacheWriteOptions}; + +mod operation_log; + +pub use operation_log::{Operation, OperationLog}; + +pub trait MainEnvironment: BeginTransaction { + fn common(&self) -> &MainEnvironmentCommon; + + fn schema(&self) -> &SchemaWithIndex; + + fn name(&self) -> &str { + &self.common().name + } + + fn operation_log(&self) -> OperationLog { + self.common().operation_log + } + + fn intersection_chunk_size(&self) -> usize { + self.common().intersection_chunk_size + } + + fn count(&self) -> Result { + let txn = self.begin_txn()?; + self.operation_log() + .count_present_records(&txn, self.schema().0.is_append_only()) + .map_err(Into::into) + } + + fn get(&self, key: &[u8]) -> Result { + let txn = self.begin_txn()?; + self.operation_log() + .get_record(&txn, key)? + .ok_or(CacheError::PrimaryKeyNotFound) + } +} + +#[derive(Debug)] +pub struct MainEnvironmentCommon { + /// The environment name. + name: String, + /// The operation log. + operation_log: OperationLog, + intersection_chunk_size: usize, +} + +#[derive(Debug)] +pub struct RwMainEnvironment { + txn: SharedTransaction, + common: MainEnvironmentCommon, + schema: SchemaWithIndex, +} + +impl BeginTransaction for RwMainEnvironment { + type Transaction<'a> = ReadTransaction<'a>; + + fn begin_txn(&self) -> Result, StorageError> { + self.txn.begin_txn() + } +} + +impl MainEnvironment for RwMainEnvironment { + fn common(&self) -> &MainEnvironmentCommon { + &self.common + } + + fn schema(&self) -> &SchemaWithIndex { + &self.schema + } +} + +impl RwMainEnvironment { + pub fn open( + common_options: &CacheCommonOptions, + write_options: CacheWriteOptions, + ) -> Result { + let (env, common, schema) = open_env_with_schema(common_options, Some(write_options))?; + + Ok(Self { + txn: env.create_txn()?, + common, + schema, + }) + } + + pub fn create( + schema: &SchemaWithIndex, + common_options: &CacheCommonOptions, + write_options: CacheWriteOptions, + ) -> Result { + let (env, common, schema_option, old_schema) = + open_env(common_options, Some(write_options))?; + let txn = env.create_txn()?; + + let schema = if let Some(old_schema) = old_schema { + if &old_schema != schema { + return Err(CacheError::SchemaMismatch { + name: common.name, + given: Box::new(schema.clone()), + stored: Box::new(old_schema), + }); + } + old_schema + } else { + let mut txn = txn.write(); + schema_option.store(txn.txn_mut(), schema)?; + txn.commit_and_renew()?; + schema.clone() + }; + + Ok(Self { + txn, + common, + schema, + }) + } + + /// Inserts the record into the cache and sets the record version. Returns the record id. + /// + /// Every time a record with the same primary key is inserted, its version number gets increased by 1. + pub fn insert(&self, record: &mut Record) -> Result { + debug_check_schema_record_consistency(&self.schema.0, record); + + let primary_key = if self.schema.0.is_append_only() { + None + } else { + Some(index::get_primary_key( + &self.schema.0.primary_index, + &record.values, + )) + }; + + let mut txn = self.txn.write(); + let txn = txn.txn_mut(); + self.common + .operation_log + .insert(txn, record, primary_key.as_deref())? + .ok_or(CacheError::PrimaryKeyExists) + } + + /// Deletes the record and returns the record version. + pub fn delete(&self, primary_key: &[u8]) -> Result { + let mut txn = self.txn.write(); + let txn = txn.txn_mut(); + self.common + .operation_log + .delete(txn, primary_key)? + .ok_or(CacheError::PrimaryKeyNotFound) + } + + pub fn commit(&self) -> Result<(), CacheError> { + self.txn.write().commit_and_renew().map_err(Into::into) + } +} + +#[derive(Debug)] +pub struct RoMainEnvironment { + env: LmdbEnvironmentManager, + common: MainEnvironmentCommon, + schema: SchemaWithIndex, +} + +impl BeginTransaction for RoMainEnvironment { + type Transaction<'a> = RoTransaction<'a>; + + fn begin_txn(&self) -> Result, StorageError> { + self.env.begin_txn() + } +} + +impl MainEnvironment for RoMainEnvironment { + fn common(&self) -> &MainEnvironmentCommon { + &self.common + } + + fn schema(&self) -> &SchemaWithIndex { + &self.schema + } +} + +impl RoMainEnvironment { + pub fn new(common_options: &CacheCommonOptions) -> Result { + let (env, common, schema) = open_env_with_schema(common_options, None)?; + Ok(Self { + env, + common, + schema, + }) + } +} + +fn open_env( + common_options: &CacheCommonOptions, + write_options: Option, +) -> Result< + ( + LmdbEnvironmentManager, + MainEnvironmentCommon, + LmdbOption, + Option, + ), + CacheError, +> { + let (mut env, name) = init_env(common_options, write_options)?; + + let create_if_not_exist = write_options.is_some(); + let operation_log = OperationLog::new(&mut env, create_if_not_exist)?; + let schema_option = LmdbOption::new(&mut env, Some("schema"), create_if_not_exist)?; + + let schema = schema_option + .load(&env.begin_txn()?)? + .map(IntoOwned::into_owned); + + Ok(( + env, + MainEnvironmentCommon { + name, + operation_log, + intersection_chunk_size: common_options.intersection_chunk_size, + }, + schema_option, + schema, + )) +} + +fn open_env_with_schema( + common_options: &CacheCommonOptions, + write_options: Option, +) -> Result< + ( + LmdbEnvironmentManager, + MainEnvironmentCommon, + SchemaWithIndex, + ), + CacheError, +> { + let (env, common, _, schema) = open_env(common_options, write_options)?; + let schema = schema.ok_or(CacheError::SchemaNotFound)?; + Ok((env, common, schema)) +} + +fn debug_check_schema_record_consistency(schema: &Schema, record: &Record) { + debug_assert_eq!(schema.identifier, record.schema_id); + debug_assert_eq!(schema.fields.len(), record.values.len()); + for (field, value) in schema.fields.iter().zip(record.values.iter()) { + if field.nullable && value == &Field::Null { + continue; + } + match field.typ { + FieldType::UInt => { + debug_assert!(value.as_uint().is_some()) + } + FieldType::Int => { + debug_assert!(value.as_int().is_some()) + } + FieldType::Float => { + debug_assert!(value.as_float().is_some()) + } + FieldType::Boolean => debug_assert!(value.as_boolean().is_some()), + FieldType::String => debug_assert!(value.as_string().is_some()), + FieldType::Text => debug_assert!(value.as_text().is_some()), + FieldType::Binary => debug_assert!(value.as_binary().is_some()), + FieldType::Decimal => debug_assert!(value.as_decimal().is_some()), + FieldType::Timestamp => debug_assert!(value.as_timestamp().is_some()), + FieldType::Date => debug_assert!(value.as_date().is_some()), + FieldType::Bson => debug_assert!(value.as_bson().is_some()), + FieldType::Point => debug_assert!(value.as_point().is_some()), + } + } +} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/lmdb_val_impl.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/lmdb_val_impl.rs new file mode 100644 index 0000000000..601635e9a7 --- /dev/null +++ b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/lmdb_val_impl.rs @@ -0,0 +1,160 @@ +use dozer_storage::{errors::StorageError, BorrowEncode, Decode, Encode, Encoded, LmdbVal}; +use dozer_types::{ + borrow::{Borrow, Cow, IntoOwned}, + impl_borrow_for_clone_type, +}; + +use super::{Operation, OperationBorrow, RecordMetadata}; + +impl_borrow_for_clone_type!(RecordMetadata); + +impl BorrowEncode for RecordMetadata { + type Encode<'a> = &'a RecordMetadata; +} + +impl<'a> Encode<'a> for &'a RecordMetadata { + fn encode(self) -> Result, StorageError> { + let mut result = [0; 21]; + result[0..8].copy_from_slice(&self.id.to_be_bytes()); + result[8..12].copy_from_slice(&self.version.to_be_bytes()); + if let Some(insert_operation_id) = self.insert_operation_id { + result[12] = 1; + result[13..21].copy_from_slice(&insert_operation_id.to_be_bytes()); + } else { + result[12] = 0; + } + Ok(Encoded::U8x21(result)) + } +} + +impl Decode for RecordMetadata { + fn decode(bytes: &[u8]) -> Result, StorageError> { + let id = u64::from_be_bytes(bytes[0..8].try_into().unwrap()); + let version = u32::from_be_bytes(bytes[8..12].try_into().unwrap()); + let insert_operation_id = if bytes[12] == 1 { + Some(u64::from_be_bytes(bytes[13..21].try_into().unwrap())) + } else { + None + }; + Ok(Cow::Owned(RecordMetadata { + id, + version, + insert_operation_id, + })) + } +} + +unsafe impl LmdbVal for RecordMetadata {} + +impl<'a> IntoOwned for OperationBorrow<'a> { + fn into_owned(self) -> Operation { + match self { + Self::Delete { operation_id } => Operation::Delete { operation_id }, + Self::Insert { record_id, record } => Operation::Insert { + record_id, + record: record.clone(), + }, + } + } +} + +impl Borrow for Operation { + type Borrowed<'a> = OperationBorrow<'a>; + + fn borrow(&self) -> Self::Borrowed<'_> { + match self { + Self::Delete { operation_id } => OperationBorrow::Delete { + operation_id: *operation_id, + }, + Self::Insert { record_id, record } => OperationBorrow::Insert { + record_id: *record_id, + record, + }, + } + } + + fn upcast<'b, 'a: 'b>(borrow: Self::Borrowed<'a>) -> Self::Borrowed<'b> { + match borrow { + OperationBorrow::Delete { operation_id } => OperationBorrow::Delete { operation_id }, + OperationBorrow::Insert { record_id, record } => { + OperationBorrow::Insert { record_id, record } + } + } + } +} + +impl BorrowEncode for Operation { + type Encode<'a> = OperationBorrow<'a>; +} + +impl<'a> Encode<'a> for OperationBorrow<'a> { + fn encode(self) -> Result, StorageError> { + dozer_types::bincode::serialize(&self) + .map(Encoded::Vec) + .map_err(|e| StorageError::SerializationError { + typ: "Operation", + reason: Box::new(e), + }) + } +} + +impl Decode for Operation { + fn decode(bytes: &[u8]) -> Result, StorageError> { + dozer_types::bincode::deserialize(bytes) + .map(Cow::Owned) + .map_err(|e| StorageError::DeserializationError { + typ: "Operation", + reason: Box::new(e), + }) + } +} + +unsafe impl LmdbVal for Operation {} + +#[cfg(test)] +mod tests { + use dozer_types::types::Record; + + use super::*; + + #[test] + fn test_record_metadata_encode_decode() { + let record_metadata = RecordMetadata { + id: 1, + version: 2, + insert_operation_id: Some(3), + }; + let encoded = record_metadata.encode().unwrap(); + let decoded = RecordMetadata::decode(encoded.as_ref()) + .unwrap() + .into_owned(); + assert_eq!(record_metadata, decoded); + + let record_metadata = RecordMetadata { + id: 1, + version: 2, + insert_operation_id: None, + }; + let encoded = record_metadata.encode().unwrap(); + let decoded = RecordMetadata::decode(encoded.as_ref()) + .unwrap() + .into_owned(); + assert_eq!(record_metadata, decoded); + } + + #[test] + fn test_operation_encode_decode() { + let operation = Operation::Delete { operation_id: 1 }; + let encoded = operation.borrow().encode().unwrap(); + let decoded = Operation::decode(encoded.as_ref()).unwrap().into_owned(); + assert_eq!(operation, decoded); + + let operation = Operation::Insert { + record_id: 1, + record: Record::new(None, vec![1.into(), 2.into(), 3.into()], Some(1)), + }; + let encoded = operation.borrow().encode().unwrap(); + let decoded = Operation::decode(encoded.as_ref()).unwrap().into_owned(); + assert_eq!(operation, decoded); + } +} diff --git a/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/mod.rs b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/mod.rs new file mode 100644 index 0000000000..3a12a1e657 --- /dev/null +++ b/dozer-cache/src/cache/lmdb/cache/main_environment/operation_log/mod.rs @@ -0,0 +1,469 @@ +use dozer_storage::{ + errors::StorageError, + lmdb::{RoCursor, RwTransaction, Transaction}, + lmdb_storage::CreateDatabase, + KeyIterator, LmdbCounter, LmdbMap, LmdbSet, +}; +use dozer_types::{ + borrow::{Borrow, Cow, IntoOwned}, + serde::{Deserialize, Serialize}, + types::Record, +}; + +use crate::cache::RecordWithId; + +#[derive(Debug, Clone, PartialEq, Deserialize)] +#[serde(crate = "dozer_types::serde")] +pub enum Operation { + Delete { + /// The operation id of an `Insert` operation, which must exist. + operation_id: u64, + }, + Insert { + record_id: u64, + record: Record, + }, +} + +#[derive(Debug, Clone, Copy, Serialize)] +#[serde(crate = "dozer_types::serde")] +pub enum OperationBorrow<'a> { + Delete { + /// The operation id of an `Insert` operation, which must exist. + operation_id: u64, + }, + Insert { + record_id: u64, + record: &'a Record, + }, +} + +#[derive(Debug, Clone, Copy)] +pub struct OperationLog { + /// Record primary key -> RecordMetadata, empty if schema has no primary key. + /// Length always increases. + primary_key_to_metadata: LmdbMap, RecordMetadata>, + /// Operation ids of latest `Insert`s. Used to filter out deleted records in query. Empty if schema has no primary key. + present_operation_ids: LmdbSet, + /// The next operation id. Monotonically increasing. + next_operation_id: LmdbCounter, + /// Operation_id -> operation. + operation_id_to_operation: LmdbMap, +} + +impl OperationLog { + pub fn new( + c: &mut C, + create_if_not_exist: bool, + ) -> Result { + let primary_key_to_metadata = + LmdbMap::new(c, Some("primary_key_to_metadata"), create_if_not_exist)?; + let present_operation_ids = + LmdbSet::new(c, Some("present_operation_ids"), create_if_not_exist)?; + let next_operation_id = + LmdbCounter::new(c, Some("next_operation_id"), create_if_not_exist)?; + let operation_id_to_operation = + LmdbMap::new(c, Some("operation_id_to_operation"), create_if_not_exist)?; + Ok(Self { + primary_key_to_metadata, + present_operation_ids, + next_operation_id, + operation_id_to_operation, + }) + } + + pub fn count_present_records( + &self, + txn: &T, + schema_is_append_only: bool, + ) -> Result { + if schema_is_append_only { + self.operation_id_to_operation.count(txn) + } else { + self.present_operation_ids.count(txn) + } + .map_err(Into::into) + } + + pub fn get_record( + &self, + txn: &T, + key: &[u8], + ) -> Result, StorageError> { + let Some(metadata) = self.primary_key_to_metadata.get(txn, key)? else { + return Ok(None); + }; + let Some(insert_operation_id) = metadata.borrow().insert_operation_id else { + return Ok(None); + }; + self.get_record_by_operation_id_unchecked(txn, insert_operation_id) + .map(Some) + } + + pub fn next_operation_id(&self, txn: &T) -> Result { + self.next_operation_id.load(txn).map_err(Into::into) + } + + pub fn present_operation_ids<'txn, T: Transaction>( + &self, + txn: &'txn T, + schema_is_append_only: bool, + ) -> Result, u64>, StorageError> { + // If schema is append only, then all operation ids are latest `Insert`s. + if schema_is_append_only { + self.operation_id_to_operation.keys(txn) + } else { + self.present_operation_ids.iter(txn) + } + } + + pub fn contains_operation_id( + &self, + txn: &T, + schema_is_append_only: bool, + operation_id: u64, + ) -> Result { + // If schema is append only, then all operation ids are latest `Insert`s. + if schema_is_append_only { + Ok(true) + } else { + self.present_operation_ids.contains(txn, &operation_id) + } + .map_err(Into::into) + } + + pub fn get_record_by_operation_id_unchecked( + &self, + txn: &T, + operation_id: u64, + ) -> Result { + let Some(Cow::Owned(Operation::Insert { + record_id, + record, + })) = self.operation_id_to_operation.get(txn, &operation_id)? else { + panic!( + "Inconsistent state: primary_key_to_metadata or present_operation_ids contains an insert operation id that is not an Insert operation" + ); + }; + Ok(RecordWithId::new(record_id, record)) + } + + pub fn get_operation( + &self, + txn: &T, + operation_id: u64, + ) -> Result { + Ok(self + .operation_id_to_operation + .get(txn, &operation_id)? + .unwrap_or_else(|| panic!("Operation id {} out of range", operation_id)) + .into_owned()) + } + + /// Inserts the record and sets the record version. Returns the record id. + /// + /// If the record's primary key collides with an existing record, returns `None`. + /// + /// Every time a record with the same primary key is inserted, its version number gets increased by 1. + pub fn insert( + &self, + txn: &mut RwTransaction, + record: &mut Record, + primary_key: Option<&[u8]>, + ) -> Result, StorageError> { + // Calculate operation id and record id. + let (operation_id, record_id) = if let Some(primary_key) = primary_key { + // Get or generate record id from `primary_key_to_metadata`. + let (record_id, record_version) = + match self.primary_key_to_metadata.get(txn, primary_key)? { + // Primary key is never inserted before. Generate new id from `primary_key_to_metadata`. + None => ( + self.primary_key_to_metadata.count(txn)? as u64, + INITIAL_RECORD_VERSION, + ), + Some(metadata) => { + let metadata = metadata.borrow(); + if metadata.insert_operation_id.is_some() { + // Primary key collision. + return Ok(None); + } else { + // This primary key was deleted. Use the record id from its first insertion. + (metadata.id, metadata.version + 1) + } + } + }; + // Generation operation id. + let operation_id = self.next_operation_id.fetch_add(txn, 1)?; + // Update `primary_key_to_metadata` and `present_operation_ids`. + self.primary_key_to_metadata.insert_overwrite( + txn, + primary_key, + &RecordMetadata { + id: record_id, + version: record_version, + insert_operation_id: Some(operation_id), + }, + )?; + if !self.present_operation_ids.insert(txn, &operation_id)? { + panic!("Inconsistent state: operation id already exists"); + } + // Update record version. + record.version = Some(record_version); + (operation_id, record_id) + } else { + record.version = Some(INITIAL_RECORD_VERSION); + // Generation operation id. + let operation_id = self.next_operation_id.fetch_add(txn, 1)?; + // If the record has no primary key, record id is operation id. + (operation_id, operation_id) + }; + // Record operation. The operation id must not exist. + if !self.operation_id_to_operation.insert( + txn, + &operation_id, + OperationBorrow::Insert { record_id, record }, + )? { + panic!("Inconsistent state: operation id already exists"); + } + Ok(Some(record_id)) + } + + /// Deletes the record and returns the record version. Returns `None` if the record does not exist. + pub fn delete( + &self, + txn: &mut RwTransaction, + primary_key: &[u8], + ) -> Result, StorageError> { + // Find operation id by primary key. + let Some(metadata) = self.primary_key_to_metadata.get(txn, primary_key)? else { + return Ok(None); + }; + let metadata = metadata.into_owned(); + let Some(insert_operation_id) = metadata.insert_operation_id else { + return Ok(None); + }; + // Remove deleted operation id. + self.primary_key_to_metadata.insert_overwrite( + txn, + primary_key, + &RecordMetadata { + id: metadata.id, + version: metadata.version, + insert_operation_id: None, + }, + )?; + // The operation id must be present. + if !self + .present_operation_ids + .remove(txn, &insert_operation_id)? + { + panic!("Inconsistent state: insert operation id not found") + } + // Generate new operation id. + let operation_id = self.next_operation_id.fetch_add(txn, 1)?; + // Record delete operation. The operation id must not exist. + if !self.operation_id_to_operation.insert( + txn, + &operation_id, + OperationBorrow::Delete { + operation_id: insert_operation_id, + }, + )? { + panic!("Inconsistent state: operation id already exists"); + } + Ok(Some(metadata.version)) + } +} + +const INITIAL_RECORD_VERSION: u32 = 1_u32; + +#[derive(Debug, Clone, Copy, PartialEq)] +struct RecordMetadata { + /// The record id. Consistent across `insert`s and `delete`s. + id: u64, + /// The latest record version, even if the record is deleted. + version: u32, + /// The operation id of the latest `Insert` operation. `None` if the record is deleted. + insert_operation_id: Option, +} + +mod lmdb_val_impl; + +#[cfg(test)] +mod tests { + use crate::cache::lmdb::utils::init_env; + + use super::*; + + #[test] + fn test_operation_log_append_only() { + let mut env = init_env(&Default::default(), Some(Default::default())) + .unwrap() + .0; + let log = OperationLog::new(&mut env, true).unwrap(); + let mut txn = env.begin_rw_txn().unwrap(); + let append_only = true; + + let mut records = vec![Record::new(None, vec![], None); 10]; + for (index, record) in records.iter_mut().enumerate() { + let record_id = log.insert(&mut txn, record, None).unwrap().unwrap(); + assert_eq!(record_id, index as u64); + assert_eq!(record.version, Some(INITIAL_RECORD_VERSION)); + assert_eq!( + log.count_present_records(&txn, append_only).unwrap(), + index + 1 + ); + assert_eq!(log.next_operation_id(&txn).unwrap(), index as u64 + 1); + assert_eq!( + log.present_operation_ids(&txn, append_only) + .unwrap() + .map(|result| result.map(IntoOwned::into_owned)) + .collect::, _>>() + .unwrap(), + (0..=index as u64).collect::>() + ); + assert_eq!( + log.contains_operation_id(&txn, append_only, index as _) + .unwrap(), + true + ); + assert_eq!( + log.get_record_by_operation_id_unchecked(&txn, index as _) + .unwrap(), + RecordWithId::new(record_id, record.clone()) + ); + assert_eq!( + log.get_operation(&txn, index as _).unwrap(), + Operation::Insert { + record_id, + record: record.clone(), + } + ); + } + } + + #[test] + fn test_operation_log_with_primary_key() { + let mut env = init_env(&Default::default(), Some(Default::default())) + .unwrap() + .0; + let log = OperationLog::new(&mut env, true).unwrap(); + let mut txn = env.begin_rw_txn().unwrap(); + let append_only = false; + + // Insert a record. + let mut record = Record::new(None, vec![], None); + let primary_key = b"primary_key"; + let record_id = log + .insert(&mut txn, &mut record, Some(primary_key)) + .unwrap() + .unwrap(); + assert_eq!(record_id, 0); + assert_eq!(record.version, Some(INITIAL_RECORD_VERSION)); + assert_eq!(log.count_present_records(&txn, append_only).unwrap(), 1); + assert_eq!( + log.get_record(&txn, primary_key).unwrap().unwrap(), + RecordWithId::new(record_id, record.clone()) + ); + assert_eq!(log.next_operation_id(&txn).unwrap(), 1); + assert_eq!( + log.present_operation_ids(&txn, append_only) + .unwrap() + .map(|result| result.map(IntoOwned::into_owned)) + .collect::, _>>() + .unwrap(), + vec![0] + ); + assert_eq!( + log.contains_operation_id(&txn, append_only, 0).unwrap(), + true + ); + assert_eq!( + log.get_record_by_operation_id_unchecked(&txn, 0).unwrap(), + RecordWithId::new(record_id, record.clone()) + ); + assert_eq!( + log.get_operation(&txn, 0).unwrap(), + Operation::Insert { + record_id, + record: record.clone(), + } + ); + + // Insert again with the same primary key should fail. + assert_eq!( + log.insert(&mut txn, &mut record, Some(primary_key)) + .unwrap(), + None + ); + + // Delete the record. + let version = log.delete(&mut txn, primary_key).unwrap().unwrap(); + assert_eq!(version, INITIAL_RECORD_VERSION); + assert_eq!(log.count_present_records(&txn, append_only).unwrap(), 0); + assert_eq!(log.get_record(&txn, primary_key).unwrap(), None); + assert_eq!(log.next_operation_id(&txn).unwrap(), 2); + assert_eq!( + log.present_operation_ids(&txn, append_only) + .unwrap() + .map(|result| result.map(IntoOwned::into_owned)) + .collect::, _>>() + .unwrap(), + Vec::::new(), + ); + assert_eq!( + log.contains_operation_id(&txn, append_only, 0).unwrap(), + false + ); + assert_eq!( + log.get_operation(&txn, 1).unwrap(), + Operation::Delete { operation_id: 0 } + ); + + // Delete a non-existing record should fail. + assert_eq!( + log.delete(&mut txn, b"non_existing_primary_key").unwrap(), + None + ); + + // Delete an deleted record should fail. + assert_eq!(log.delete(&mut txn, primary_key).unwrap(), None); + + // Insert with that primary key again. + let record_id = log + .insert(&mut txn, &mut record, Some(primary_key)) + .unwrap() + .unwrap(); + assert_eq!(record_id, 0); + assert_eq!(record.version, Some(INITIAL_RECORD_VERSION + 1)); + assert_eq!(log.count_present_records(&txn, append_only).unwrap(), 1); + assert_eq!( + log.get_record(&txn, primary_key).unwrap().unwrap(), + RecordWithId::new(record_id, record.clone()) + ); + assert_eq!(log.next_operation_id(&txn).unwrap(), 3); + assert_eq!( + log.present_operation_ids(&txn, append_only) + .unwrap() + .map(|result| result.map(IntoOwned::into_owned)) + .collect::, _>>() + .unwrap(), + vec![2] + ); + assert_eq!( + log.contains_operation_id(&txn, append_only, 2).unwrap(), + true + ); + assert_eq!( + log.get_record_by_operation_id_unchecked(&txn, 2).unwrap(), + RecordWithId::new(record_id, record.clone()) + ); + assert_eq!( + log.get_operation(&txn, 2).unwrap(), + Operation::Insert { + record_id, + record: record.clone(), + } + ); + } +} diff --git a/dozer-cache/src/cache/lmdb/cache/mod.rs b/dozer-cache/src/cache/lmdb/cache/mod.rs index 02640410cc..3fb99a27e7 100644 --- a/dozer-cache/src/cache/lmdb/cache/mod.rs +++ b/dozer-cache/src/cache/lmdb/cache/mod.rs @@ -1,38 +1,24 @@ use std::fmt::Debug; use std::path::PathBuf; -use dozer_storage::lmdb::{RoTransaction, RwTransaction, Transaction}; -use dozer_storage::lmdb_storage::{ - LmdbEnvironmentManager, LmdbExclusiveTransaction, SharedTransaction, -}; -use dozer_storage::LmdbMultimap; +use dozer_storage::BeginTransaction; -use dozer_types::parking_lot::RwLockReadGuard; - -use dozer_types::types::Schema; -use dozer_types::types::{IndexDefinition, Record}; - -use self::secondary_index_database::{ - new_secondary_index_database_from_env, new_secondary_index_database_from_txn, -}; +use dozer_types::types::{Record, SchemaWithIndex}; use super::super::{RoCache, RwCache}; -use super::indexer::Indexer; -use super::utils::{self, CacheReadOptions}; -use super::utils::{CacheOptions, CacheOptionsKind}; use crate::cache::expression::QueryExpression; use crate::cache::RecordWithId; use crate::errors::CacheError; -use query::LmdbQueryHandler; mod helper; mod main_environment; mod query; -mod schema_database; -mod secondary_index_database; +mod secondary_environment; -use main_environment::MainEnvironment; -use schema_database::SchemaDatabase; +use main_environment::{MainEnvironment, RoMainEnvironment, RwMainEnvironment}; +use query::LmdbQueryHandler; +pub use secondary_environment::SecondaryEnvironment; +use secondary_environment::{RoSecondaryEnvironment, RwSecondaryEnvironment}; #[derive(Clone, Debug)] pub struct CacheCommonOptions { @@ -62,22 +48,24 @@ impl Default for CacheCommonOptions { #[derive(Debug)] pub struct LmdbRoCache { - common: LmdbCacheCommon, - env: LmdbEnvironmentManager, + main_env: RoMainEnvironment, + secondary_envs: Vec, } impl LmdbRoCache { - pub fn new(options: CacheCommonOptions) -> Result { - let (mut env, name) = utils::init_env(&CacheOptions { - common: options.clone(), - kind: CacheOptionsKind::ReadOnly(CacheReadOptions {}), - })?; - let common = LmdbCacheCommon::new(&mut env, options, name, false)?; - Ok(Self { common, env }) + pub fn new(options: &CacheCommonOptions) -> Result { + let main_env = RoMainEnvironment::new(options)?; + let secondary_envs = (0..main_env.schema().1.len()) + .map(|index| RoSecondaryEnvironment::new(secondary_environment_name(index), options)) + .collect::>()?; + Ok(Self { + main_env, + secondary_envs, + }) } } -#[derive(Clone, Debug)] +#[derive(Debug, Clone, Copy)] pub struct CacheWriteOptions { // Total size allocated for data in a memory mapped file. // This size is allocated at initialization. @@ -94,291 +82,163 @@ impl Default for CacheWriteOptions { #[derive(Debug)] pub struct LmdbRwCache { - common: LmdbCacheCommon, - txn: SharedTransaction, + main_env: RwMainEnvironment, + secondary_envs: Vec, } impl LmdbRwCache { - pub fn create( - schema: Schema, - indexes: Vec, - common_options: CacheCommonOptions, + pub fn open( + common_options: &CacheCommonOptions, write_options: CacheWriteOptions, ) -> Result { - let mut cache = Self::open(common_options, write_options)?; - - let mut txn = cache.txn.write(); - cache.common.insert_schema(&mut txn, schema, indexes)?; - - txn.commit_and_renew()?; - drop(txn); - - Ok(cache) + let main_env = RwMainEnvironment::open(common_options, write_options)?; + let secondary_envs = (0..main_env.schema().1.len()) + .map(|index| { + RwSecondaryEnvironment::open( + secondary_environment_name(index), + common_options, + write_options, + ) + }) + .collect::>()?; + Ok(Self { + main_env, + secondary_envs, + }) } - pub fn open( - common_options: CacheCommonOptions, + pub fn create( + schema: &SchemaWithIndex, + common_options: &CacheCommonOptions, write_options: CacheWriteOptions, ) -> Result { - let (mut env, name) = utils::init_env(&CacheOptions { - common: common_options.clone(), - kind: CacheOptionsKind::Write(write_options), - })?; - let common = LmdbCacheCommon::new(&mut env, common_options, name, true)?; - let txn = env.create_txn()?; - Ok(Self { common, txn }) + let main_env = RwMainEnvironment::create(schema, common_options, write_options)?; + let secondary_envs = main_env + .schema() + .1 + .iter() + .enumerate() + .map(|(index, index_definition)| { + RwSecondaryEnvironment::create( + index_definition, + secondary_environment_name(index), + common_options, + write_options, + ) + }) + .collect::>()?; + Ok(Self { + main_env, + secondary_envs, + }) } } impl RoCache for C { fn name(&self) -> &str { - &self.common().name + self.main_env().name() } fn get(&self, key: &[u8]) -> Result { - let txn = self.begin_txn()?; - let txn = txn.as_txn(); - self.common().main_environment.get(txn, key) + self.main_env().get(key) } fn count(&self, query: &QueryExpression) -> Result { - let txn = self.begin_txn()?; - let handler = self.create_query_handler(&txn, query)?; - handler.count() + LmdbQueryHandler::new(self, query).count() } fn query(&self, query: &QueryExpression) -> Result, CacheError> { - let txn = self.begin_txn()?; - let handler = self.create_query_handler(&txn, query)?; - handler.query() + LmdbQueryHandler::new(self, query).query() } - fn get_schema(&self) -> Result<&(Schema, Vec), CacheError> { - self.get_schema_impl() + fn get_schema(&self) -> &SchemaWithIndex { + self.main_env().schema() } } impl RwCache for LmdbRwCache { fn insert(&self, record: &mut Record) -> Result { - let (schema, secondary_indexes) = self.get_schema()?; - self.insert_impl(record, schema, secondary_indexes) - } - - fn delete(&self, key: &[u8]) -> Result { - let (_, _, version) = self.delete_impl(key)?; - Ok(version) - } - - fn update(&self, key: &[u8], record: &mut Record) -> Result { - let (schema, secondary_indexes, old_version) = self.delete_impl(key)?; - self.insert_impl(record, schema, secondary_indexes)?; - Ok(old_version) - } - - fn commit(&self) -> Result<(), CacheError> { - let mut txn = self.txn.write(); - txn.commit_and_renew()?; - Ok(()) - } -} - -impl LmdbRwCache { - fn delete_impl(&self, key: &[u8]) -> Result<(&Schema, &[IndexDefinition], u32), CacheError> { - let record = self.get(key)?; - let (schema, secondary_indexes) = self.get_schema()?; - - let mut txn = self.txn.write(); - let txn = txn.txn_mut(); - - let (version, operation_id) = self.common.main_environment.delete(txn, key)?; - - let indexer = Indexer { - secondary_indexes: &self.common.secondary_indexes, - }; - indexer.delete_indexes(txn, &record.record, secondary_indexes, operation_id)?; - Ok((schema, secondary_indexes, version)) - } - - /// Inserts the record, sets the record version, builds the secondary index, and returns the record id. - fn insert_impl( - &self, - record: &mut Record, - schema: &Schema, - secondary_indexes: &[IndexDefinition], - ) -> Result { let span = dozer_types::tracing::span!(dozer_types::tracing::Level::TRACE, "insert_cache"); let _enter = span.enter(); - let mut txn = self.txn.write(); - let txn = txn.txn_mut(); - - let (record_id, operation_id) = self.common.main_environment.insert(txn, record, schema)?; - - let indexer = Indexer { - secondary_indexes: &self.common.secondary_indexes, - }; + let record_id = self.main_env.insert(record)?; let span = dozer_types::tracing::span!( dozer_types::tracing::Level::TRACE, "build_indexes", record_id = record_id, - operation_id = operation_id ); let _enter = span.enter(); - - indexer.build_indexes(txn, record, secondary_indexes, operation_id)?; + self.index()?; Ok(record_id) } -} - -/// This trait abstracts the behavior of getting a transaction from a `LmdbExclusiveTransaction` or a `lmdb::Transaction`. -trait AsTransaction { - type Transaction<'a>: Transaction - where - Self: 'a; - - fn as_txn(&self) -> &Self::Transaction<'_>; -} - -impl<'a> AsTransaction for RoTransaction<'a> { - type Transaction<'env> = RoTransaction<'env> where Self: 'env; - fn as_txn(&self) -> &Self::Transaction<'_> { - self + fn delete(&self, key: &[u8]) -> Result { + let version = self.main_env.delete(key)?; + self.index()?; + Ok(version) } -} - -impl<'a> AsTransaction for RwLockReadGuard<'a, LmdbExclusiveTransaction> { - type Transaction<'env> = RwTransaction<'env> where Self: 'env; - fn as_txn(&self) -> &Self::Transaction<'_> { - self.txn() + fn update(&self, key: &[u8], record: &mut Record) -> Result { + let version = self.delete(key)?; + self.insert(record)?; + self.index()?; + Ok(version) } -} -/// This trait abstracts the behavior of locking a `SharedTransaction` for reading -/// and beginning a `RoTransaction` from `LmdbEnvironmentManager`. -trait LmdbCache: Send + Sync + Debug { - type AsTransaction<'a>: AsTransaction - where - Self: 'a; - - fn common(&self) -> &LmdbCacheCommon; - fn begin_txn(&self) -> Result, CacheError>; - - fn get_schema_impl(&self) -> Result<&(Schema, Vec), CacheError> { - self.common() - .schema_db - .get_schema() - .ok_or(CacheError::SchemaNotFound) + fn commit(&self) -> Result<(), CacheError> { + self.main_env.commit()?; + for secondary_env in &self.secondary_envs { + secondary_env.commit()?; + } + Ok(()) } +} - fn create_query_handler<'a, 'as_txn>( - &'a self, - txn: &'a Self::AsTransaction<'as_txn>, - query: &'a QueryExpression, - ) -> Result< - LmdbQueryHandler<'a, as AsTransaction>::Transaction<'a>>, - CacheError, - > { - let txn = txn.as_txn(); - let (schema, secondary_indexes) = self.get_schema_impl()?; - let handler = LmdbQueryHandler::new(self.common(), txn, schema, secondary_indexes, query); - Ok(handler) +impl LmdbRwCache { + fn index(&self) -> Result<(), CacheError> { + let main_txn = self.main_env.begin_txn()?; + for secondary_env in &self.secondary_envs { + secondary_env.index(&main_txn, self.main_env.operation_log())?; + } + Ok(()) } } -impl LmdbCache for LmdbRoCache { - type AsTransaction<'a> = RoTransaction<'a>; +pub trait LmdbCache: Send + Sync + Debug { + type MainEnvironment: MainEnvironment; - fn common(&self) -> &LmdbCacheCommon { - &self.common - } + fn main_env(&self) -> &Self::MainEnvironment; - fn begin_txn(&self) -> Result, CacheError> { - Ok(self.env.begin_ro_txn()?) - } -} + type SecondaryEnvironment: SecondaryEnvironment; -impl LmdbCache for LmdbRwCache { - type AsTransaction<'a> = RwLockReadGuard<'a, LmdbExclusiveTransaction>; + fn secondary_env(&self, index: usize) -> &Self::SecondaryEnvironment; +} - fn common(&self) -> &LmdbCacheCommon { - &self.common +impl LmdbCache for LmdbRoCache { + type MainEnvironment = RoMainEnvironment; + fn main_env(&self) -> &Self::MainEnvironment { + &self.main_env } - fn begin_txn(&self) -> Result, CacheError> { - Ok(self.txn.read()) + type SecondaryEnvironment = RoSecondaryEnvironment; + fn secondary_env(&self, index: usize) -> &Self::SecondaryEnvironment { + &self.secondary_envs[index] } } -#[derive(Debug)] -pub struct LmdbCacheCommon { - main_environment: MainEnvironment, - secondary_indexes: Vec, u64>>, - schema_db: SchemaDatabase, - cache_options: CacheCommonOptions, - /// File name of the database. - name: String, -} - -impl LmdbCacheCommon { - fn new( - env: &mut LmdbEnvironmentManager, - options: CacheCommonOptions, - name: String, - create_db_if_not_exist: bool, - ) -> Result { - // Create or open must have databases. - let main_environment = MainEnvironment::new(env, create_db_if_not_exist)?; - let schema_db = SchemaDatabase::new(env, create_db_if_not_exist)?; - - // Open existing secondary index databases. - let mut secondary_indexe_databases = vec![]; - if let Some((_, secondary_indexes)) = schema_db.get_schema() { - for (index, index_definition) in secondary_indexes.iter().enumerate() { - let db = - new_secondary_index_database_from_env(env, index, index_definition, false)?; - secondary_indexe_databases.push(db); - } - } - - Ok(Self { - main_environment, - secondary_indexes: secondary_indexe_databases, - schema_db, - cache_options: options, - name, - }) +impl LmdbCache for LmdbRwCache { + type MainEnvironment = RwMainEnvironment; + fn main_env(&self) -> &Self::MainEnvironment { + &self.main_env } - fn insert_schema( - &mut self, - txn: &mut LmdbExclusiveTransaction, - schema: Schema, - secondary_indexes: Vec, - ) -> Result<(), CacheError> { - for (index, index_definition) in secondary_indexes.iter().enumerate() { - let db = new_secondary_index_database_from_txn(txn, index, index_definition, true)?; - self.secondary_indexes.push(db); - } - - self.schema_db - .insert(txn.txn_mut(), schema, secondary_indexes)?; - Ok(()) + type SecondaryEnvironment = RwSecondaryEnvironment; + fn secondary_env(&self, index: usize) -> &Self::SecondaryEnvironment { + &self.secondary_envs[index] } } -/// Methods for testing. -#[cfg(test)] -mod tests { - use super::*; - - impl LmdbRwCache { - pub fn get_txn_and_secondary_indexes( - &self, - ) -> (&SharedTransaction, &[LmdbMultimap, u64>]) { - (&self.txn, &self.common.secondary_indexes) - } - } +fn secondary_environment_name(index: usize) -> String { + format!("{index}") } diff --git a/dozer-cache/src/cache/lmdb/cache/query/handler.rs b/dozer-cache/src/cache/lmdb/cache/query/handler.rs index 6435429db7..78c650fe95 100644 --- a/dozer-cache/src/cache/lmdb/cache/query/handler.rs +++ b/dozer-cache/src/cache/lmdb/cache/query/handler.rs @@ -1,83 +1,93 @@ -use std::cmp::Ordering; -use std::ops::Bound; - use super::intersection::intersection; use crate::cache::expression::Skip; -use crate::cache::lmdb::cache::helper::lmdb_cmp; -use crate::cache::lmdb::cache::LmdbCacheCommon; +use crate::cache::lmdb::cache::main_environment::MainEnvironment; +use crate::cache::lmdb::cache::query::secondary::build_index_scan; +use crate::cache::lmdb::cache::LmdbCache; use crate::cache::RecordWithId; use crate::cache::{ - expression::{Operator, QueryExpression, SortDirection}, - index, - plan::{IndexScan, IndexScanKind, Plan, QueryPlanner, SortedInvertedRangeQuery}, + expression::QueryExpression, + plan::{IndexScan, Plan, QueryPlanner}, }; -use crate::errors::{CacheError, IndexError}; +use crate::errors::{CacheError, PlanError}; +use dozer_storage::errors::StorageError; use dozer_storage::lmdb::Transaction; -use dozer_types::borrow::{Borrow, IntoOwned}; -use dozer_types::types::{Field, IndexDefinition, Schema}; +use dozer_storage::BeginTransaction; +use dozer_types::borrow::IntoOwned; use itertools::Either; -pub struct LmdbQueryHandler<'a, T: Transaction> { - common: &'a LmdbCacheCommon, - txn: &'a T, - schema: &'a Schema, - secondary_indexes: &'a [IndexDefinition], +pub struct LmdbQueryHandler<'a, C: LmdbCache> { + cache: &'a C, query: &'a QueryExpression, } -impl<'a, T: Transaction> LmdbQueryHandler<'a, T> { - pub fn new( - common: &'a LmdbCacheCommon, - txn: &'a T, - schema: &'a Schema, - secondary_indexes: &'a [IndexDefinition], - query: &'a QueryExpression, - ) -> Self { - Self { - common, - txn, - schema, - secondary_indexes, - query, - } + +impl<'a, C: LmdbCache> LmdbQueryHandler<'a, C> { + pub fn new(cache: &'a C, query: &'a QueryExpression) -> Self { + Self { cache, query } } pub fn count(&self) -> Result { - let planner = QueryPlanner::new(self.schema, self.secondary_indexes, self.query); - let execution = planner.plan()?; - match execution { - Plan::IndexScans(index_scans) => Ok(self.build_index_scan(index_scans)?.count()), + match self.plan()? { + Plan::IndexScans(index_scans) => { + let secondary_txns = self.create_secondary_txns(&index_scans)?; + let ids = self.combine_secondary_queries(&index_scans, &secondary_txns)?; + self.count_secondary_queries(ids) + } Plan::SeqScan(_) => Ok(match self.query.skip { Skip::Skip(skip) => self - .common - .main_environment - .count(self.txn, self.schema.is_append_only())? + .cache + .main_env() + .count()? .saturating_sub(skip) .min(self.query.limit.unwrap_or(usize::MAX)), - Skip::After(_) => self.all_ids()?.count(), + Skip::After(_) => self.all_ids(&self.cache.main_env().begin_txn()?)?.count(), }), Plan::ReturnEmpty => Ok(0), } } pub fn query(&self) -> Result, CacheError> { - let planner = QueryPlanner::new(self.schema, self.secondary_indexes, self.query); - let execution = planner.plan()?; - match execution { + match self.plan()? { Plan::IndexScans(index_scans) => { - self.collect_records(self.build_index_scan(index_scans)?) + let secondary_txns = self.create_secondary_txns(&index_scans)?; + let main_txn = self.cache.main_env().begin_txn()?; + #[allow(clippy::let_and_return)] // Must do let binding unless won't compile + let result = self.collect_records( + &main_txn, + self.combine_secondary_queries(&index_scans, &secondary_txns)?, + ); + result + } + Plan::SeqScan(_seq_scan) => { + let main_txn = self.cache.main_env().begin_txn()?; + #[allow(clippy::let_and_return)] // Must do let binding unless won't compile + let result = self.collect_records(&main_txn, self.all_ids(&main_txn)?); + result } - Plan::SeqScan(_seq_scan) => self.collect_records(self.all_ids()?), Plan::ReturnEmpty => Ok(vec![]), } } - pub fn all_ids( + fn plan(&self) -> Result { + let (schema, secondary_indexes) = self.cache.main_env().schema(); + let planner = QueryPlanner::new( + schema, + secondary_indexes, + self.query.filter.as_ref(), + &self.query.order_by, + ); + planner.plan() + } + + fn all_ids<'txn, T: Transaction>( &self, - ) -> Result> + '_, CacheError> { + main_txn: &'txn T, + ) -> Result> + 'txn, CacheError> { + let schema_is_append_only = self.cache.main_env().schema().0.is_append_only(); let all_ids = self - .common - .main_environment - .present_operation_ids(self.txn, self.schema.is_append_only())? + .cache + .main_env() + .operation_log() + .present_operation_ids(main_txn, schema_is_append_only)? .map(|result| { result .map(|id| id.into_owned()) @@ -86,291 +96,104 @@ impl<'a, T: Transaction> LmdbQueryHandler<'a, T> { Ok(skip(all_ids, self.query.skip).take(self.query.limit.unwrap_or(usize::MAX))) } - fn build_index_scan( + fn create_secondary_txns( &self, - index_scans: Vec, - ) -> Result> + '_, CacheError> { + index_scans: &[IndexScan], + ) -> Result::Transaction<'_>>, StorageError> + { + index_scans + .iter() + .map(|index_scan| self.cache.secondary_env(index_scan.index_id).begin_txn()) + .collect() + } + + fn combine_secondary_queries<'txn, T: Transaction>( + &self, + index_scans: &[IndexScan], + secondary_txns: &'txn [T], + ) -> Result> + 'txn, CacheError> { debug_assert!( !index_scans.is_empty(), "Planner should not generate empty index scan" ); - let full_scan = if index_scans.len() == 1 { + let combined = if index_scans.len() == 1 { // The fast path, without intersection calculation. - Either::Left(self.query_with_secondary_index(&index_scans[0])?) + Either::Left(build_index_scan( + &secondary_txns[0], + self.cache.secondary_env(index_scans[0].index_id), + &index_scans[0].kind, + )?) } else { // Intersection of multiple index scans. let iterators = index_scans .iter() - .map(|index_scan| self.query_with_secondary_index(index_scan)) + .zip(secondary_txns) + .map(|(index_scan, secondary_txn)| { + build_index_scan( + secondary_txn, + self.cache.secondary_env(index_scan.index_id), + &index_scan.kind, + ) + }) .collect::, CacheError>>()?; Either::Right(intersection( iterators, - self.common.cache_options.intersection_chunk_size, + self.cache.main_env().intersection_chunk_size(), )) }; - Ok(skip(full_scan, self.query.skip).take(self.query.limit.unwrap_or(usize::MAX))) - } - - fn query_with_secondary_index( - &'a self, - index_scan: &IndexScan, - ) -> Result> + 'a, CacheError> { - let index_db = self.common.secondary_indexes[index_scan.index_id]; - - let RangeSpec { - start, - end, - direction, - } = get_range_spec(&index_scan.kind, index_scan.is_single_field_sorted_inverted)?; - let start = match &start { - Some(KeyEndpoint::Including(key)) => Bound::Included(key.as_slice()), - Some(KeyEndpoint::Excluding(key)) => Bound::Excluded(key.as_slice()), - None => Bound::Unbounded, - }; - - Ok(index_db - .range(self.txn, start, direction == SortDirection::Ascending)? - .take_while(move |result| match result { - Ok((key, _)) => { - if let Some(end_key) = &end { - match lmdb_cmp(self.txn, index_db.database(), key.borrow(), end_key.key()) { - Ordering::Less => matches!(direction, SortDirection::Ascending), - Ordering::Equal => matches!(end_key, KeyEndpoint::Including(_)), - Ordering::Greater => matches!(direction, SortDirection::Descending), - } - } else { - true - } - } - Err(_) => true, - }) - .map(|result| { - result - .map(|(_, id)| id.into_owned()) - .map_err(CacheError::Storage) - })) + Ok(skip(combined, self.query.skip).take(self.query.limit.unwrap_or(usize::MAX))) } - fn collect_records( - &self, - ids: impl Iterator>, - ) -> Result, CacheError> { - ids.filter_map(|id| match id { - Ok(id) => self - .common - .main_environment - .get_by_operation_id(self.txn, id, self.schema.is_append_only()) - .transpose(), + fn filter_secondary_queries<'txn, T: Transaction>( + &'txn self, + main_txn: &'txn T, + ids: impl Iterator> + 'txn, + ) -> impl Iterator> + 'txn { + let schema_is_append_only = self.cache.main_env().schema().0.is_append_only(); + ids.filter_map(move |id| match id { + Ok(id) => match self.cache.main_env().operation_log().contains_operation_id( + main_txn, + schema_is_append_only, + id, + ) { + Ok(true) => Some(Ok(id)), + Ok(false) => None, + Err(err) => Some(Err(err.into())), + }, Err(err) => Some(Err(err)), }) - .collect() } -} -#[derive(Debug, Clone)] -pub enum KeyEndpoint { - Including(Vec), - Excluding(Vec), -} + fn count_secondary_queries( + &self, + ids: impl Iterator>, + ) -> Result { + let main_txn = self.cache.main_env().begin_txn()?; -impl KeyEndpoint { - pub fn key(&self) -> &[u8] { - match self { - KeyEndpoint::Including(key) => key, - KeyEndpoint::Excluding(key) => key, + let mut result = 0; + for maybe_id in self.filter_secondary_queries(&main_txn, ids) { + maybe_id?; + result += 1; } + Ok(result) } -} -#[derive(Debug)] -struct RangeSpec { - start: Option, - end: Option, - direction: SortDirection, -} - -fn get_range_spec( - index_scan_kind: &IndexScanKind, - is_single_field_sorted_inverted: bool, -) -> Result { - match &index_scan_kind { - IndexScanKind::SortedInverted { - eq_filters, - range_query, - } => { - let comparison_key = build_sorted_inverted_comparison_key( - eq_filters, - range_query.as_ref(), - is_single_field_sorted_inverted, - ); - // There're 3 cases: - // 1. Range query with operator. - // 2. Range query without operator (only order by). - // 3. No range query. - Ok(if let Some(range_query) = range_query { - match range_query.operator_and_value { - Some((operator, _)) => { - // Here we respond to case 1, examples are `a = 1 && b > 2` or `b < 2`. - let comparison_key = comparison_key.expect("here's at least a range query"); - let null_key = build_sorted_inverted_comparison_key( - eq_filters, - Some(&SortedInvertedRangeQuery { - field_index: range_query.field_index, - operator_and_value: Some((operator, Field::Null)), - sort_direction: range_query.sort_direction, - }), - is_single_field_sorted_inverted, - ) - .expect("we provided a range query"); - get_key_interval_from_range_query( - comparison_key, - null_key, - operator, - range_query.sort_direction, - ) - } - None => { - // Here we respond to case 2, examples are `a = 1 && b asc` or `b desc`. - if let Some(comparison_key) = comparison_key { - // This is the case like `a = 1 && b asc`. The comparison key is only built from `a = 1`. - // We use `a = 1 && b = null` as a sentinel, using the invariant that `null` is greater than anything. - let null_key = build_sorted_inverted_comparison_key( - eq_filters, - Some(&SortedInvertedRangeQuery { - field_index: range_query.field_index, - operator_and_value: Some((Operator::LT, Field::Null)), - sort_direction: range_query.sort_direction, - }), - is_single_field_sorted_inverted, - ) - .expect("we provided a range query"); - match range_query.sort_direction { - SortDirection::Ascending => RangeSpec { - start: Some(KeyEndpoint::Excluding(comparison_key)), - end: Some(KeyEndpoint::Including(null_key)), - direction: SortDirection::Ascending, - }, - SortDirection::Descending => RangeSpec { - start: Some(KeyEndpoint::Including(null_key)), - end: Some(KeyEndpoint::Excluding(comparison_key)), - direction: SortDirection::Descending, - }, - } - } else { - // Just all of them. - RangeSpec { - start: None, - end: None, - direction: range_query.sort_direction, - } - } - } - } - } else { - // Here we respond to case 3, examples are `a = 1` or `a = 1 && b = 2`. - let comparison_key = comparison_key - .expect("here's at least a eq filter because there's no range query"); - RangeSpec { - start: Some(KeyEndpoint::Including(comparison_key.clone())), - end: Some(KeyEndpoint::Including(comparison_key)), - direction: SortDirection::Ascending, // doesn't matter - } - }) - } - IndexScanKind::FullText { filter } => match filter.op { - Operator::Contains => { - let token = match &filter.val { - Field::String(token) => token, - Field::Text(token) => token, - _ => return Err(CacheError::Index(IndexError::ExpectedStringFullText)), - }; - let key = index::get_full_text_secondary_index(token); - Ok(RangeSpec { - start: Some(KeyEndpoint::Including(key.clone())), - end: Some(KeyEndpoint::Including(key)), - direction: SortDirection::Ascending, // doesn't matter + fn collect_records<'txn, T: Transaction>( + &'txn self, + main_txn: &'txn T, + ids: impl Iterator> + 'txn, + ) -> Result, CacheError> { + self.filter_secondary_queries(main_txn, ids) + .map(|id| { + id.and_then(|id| { + self.cache + .main_env() + .operation_log() + .get_record_by_operation_id_unchecked(main_txn, id) + .map_err(Into::into) }) - } - Operator::MatchesAll | Operator::MatchesAny => { - unimplemented!("matches all and matches any are not implemented") - } - other => panic!("operator {other:?} is not supported by full text index"), - }, - } -} - -fn build_sorted_inverted_comparison_key( - eq_filters: &[(usize, Field)], - range_query: Option<&SortedInvertedRangeQuery>, - is_single_field_index: bool, -) -> Option> { - let mut fields = vec![]; - eq_filters.iter().for_each(|filter| { - fields.push(&filter.1); - }); - if let Some(range_query) = range_query { - if let Some((_, val)) = &range_query.operator_and_value { - fields.push(val); - } - } - if fields.is_empty() { - None - } else { - Some(index::get_secondary_index(&fields, is_single_field_index)) - } -} - -/// Here we use the invariant that `null` is greater than anything. -fn get_key_interval_from_range_query( - comparison_key: Vec, - null_key: Vec, - operator: Operator, - sort_direction: SortDirection, -) -> RangeSpec { - match (operator, sort_direction) { - (Operator::LT, SortDirection::Ascending) => RangeSpec { - start: None, - end: Some(KeyEndpoint::Excluding(comparison_key)), - direction: SortDirection::Ascending, - }, - (Operator::LT, SortDirection::Descending) => RangeSpec { - start: Some(KeyEndpoint::Excluding(comparison_key)), - end: None, - direction: SortDirection::Descending, - }, - (Operator::LTE, SortDirection::Ascending) => RangeSpec { - start: None, - end: Some(KeyEndpoint::Including(comparison_key)), - direction: SortDirection::Ascending, - }, - (Operator::LTE, SortDirection::Descending) => RangeSpec { - start: Some(KeyEndpoint::Including(comparison_key)), - end: None, - direction: SortDirection::Descending, - }, - (Operator::GT, SortDirection::Ascending) => RangeSpec { - start: Some(KeyEndpoint::Excluding(comparison_key)), - end: Some(KeyEndpoint::Excluding(null_key)), - direction: SortDirection::Ascending, - }, - (Operator::GT, SortDirection::Descending) => RangeSpec { - start: Some(KeyEndpoint::Excluding(null_key)), - end: Some(KeyEndpoint::Excluding(comparison_key)), - direction: SortDirection::Descending, - }, - (Operator::GTE, SortDirection::Ascending) => RangeSpec { - start: Some(KeyEndpoint::Including(comparison_key)), - end: Some(KeyEndpoint::Excluding(null_key)), - direction: SortDirection::Ascending, - }, - (Operator::GTE, SortDirection::Descending) => RangeSpec { - start: Some(KeyEndpoint::Excluding(null_key)), - end: Some(KeyEndpoint::Including(comparison_key)), - direction: SortDirection::Descending, - }, - (other, _) => { - panic!("operator {other:?} is not supported by sorted inverted index range query") - } + }) + .collect() } } diff --git a/dozer-cache/src/cache/lmdb/cache/query/mod.rs b/dozer-cache/src/cache/lmdb/cache/query/mod.rs index da6180bf6d..f802dc3c57 100644 --- a/dozer-cache/src/cache/lmdb/cache/query/mod.rs +++ b/dozer-cache/src/cache/lmdb/cache/query/mod.rs @@ -1,5 +1,6 @@ mod handler; mod intersection; +mod secondary; pub use handler::LmdbQueryHandler; diff --git a/dozer-cache/src/cache/lmdb/cache/query/secondary.rs b/dozer-cache/src/cache/lmdb/cache/query/secondary.rs new file mode 100644 index 0000000000..7f8ba4ee46 --- /dev/null +++ b/dozer-cache/src/cache/lmdb/cache/query/secondary.rs @@ -0,0 +1,280 @@ +use std::{cmp::Ordering, ops::Bound}; + +use dozer_storage::lmdb::Transaction; +use dozer_types::{ + borrow::{Borrow, IntoOwned}, + types::{Field, IndexDefinition}, +}; + +use crate::{ + cache::{ + expression::{Operator, SortDirection}, + index, + lmdb::cache::{helper::lmdb_cmp, secondary_environment::SecondaryEnvironment}, + plan::{IndexScanKind, SortedInvertedRangeQuery}, + }, + errors::{CacheError, IndexError}, +}; + +pub fn build_index_scan<'txn, T: Transaction, S: SecondaryEnvironment>( + secondary_txn: &'txn T, + secondary_env: &S, + index_scan_kind: &IndexScanKind, +) -> Result> + 'txn, CacheError> { + let is_single_field_sorted_inverted = + is_single_field_sorted_inverted(secondary_env.index_definition()); + let range = get_range_spec(index_scan_kind, is_single_field_sorted_inverted)?; + + let start = match &range.start { + Some(KeyEndpoint::Including(key)) => Bound::Included(key.as_slice()), + Some(KeyEndpoint::Excluding(key)) => Bound::Excluded(key.as_slice()), + None => Bound::Unbounded, + }; + + let database = secondary_env.database().database(); + Ok(secondary_env + .database() + .range( + secondary_txn, + start, + range.direction == SortDirection::Ascending, + )? + .take_while(move |result| match result { + Ok((key, _)) => { + if let Some(end_key) = &range.end { + match lmdb_cmp(secondary_txn, database, key.borrow(), end_key.key()) { + Ordering::Less => { + matches!(range.direction, SortDirection::Ascending) + } + Ordering::Equal => matches!(end_key, KeyEndpoint::Including(_)), + Ordering::Greater => { + matches!(range.direction, SortDirection::Descending) + } + } + } else { + true + } + } + Err(_) => true, + }) + .map(|result| { + result + .map(|(_, id)| id.into_owned()) + .map_err(CacheError::Storage) + })) +} + +fn is_single_field_sorted_inverted(index: &IndexDefinition) -> bool { + match index { + // `fields.len() == 1` criteria must be kept the same with `comparator.rs`. + IndexDefinition::SortedInverted(fields) => fields.len() == 1, + _ => false, + } +} + +#[derive(Debug, Clone)] +pub enum KeyEndpoint { + Including(Vec), + Excluding(Vec), +} + +impl KeyEndpoint { + pub fn key(&self) -> &[u8] { + match self { + KeyEndpoint::Including(key) => key, + KeyEndpoint::Excluding(key) => key, + } + } +} + +#[derive(Debug)] +struct RangeSpec { + start: Option, + end: Option, + direction: SortDirection, +} + +fn get_range_spec( + index_scan_kind: &IndexScanKind, + is_single_field_sorted_inverted: bool, +) -> Result { + match &index_scan_kind { + IndexScanKind::SortedInverted { + eq_filters, + range_query, + } => { + let comparison_key = build_sorted_inverted_comparison_key( + eq_filters, + range_query.as_ref(), + is_single_field_sorted_inverted, + ); + // There're 3 cases: + // 1. Range query with operator. + // 2. Range query without operator (only order by). + // 3. No range query. + Ok(if let Some(range_query) = range_query { + match range_query.operator_and_value { + Some((operator, _)) => { + // Here we respond to case 1, examples are `a = 1 && b > 2` or `b < 2`. + let comparison_key = comparison_key.expect("here's at least a range query"); + let null_key = build_sorted_inverted_comparison_key( + eq_filters, + Some(&SortedInvertedRangeQuery { + field_index: range_query.field_index, + operator_and_value: Some((operator, Field::Null)), + sort_direction: range_query.sort_direction, + }), + is_single_field_sorted_inverted, + ) + .expect("we provided a range query"); + get_key_interval_from_range_query( + comparison_key, + null_key, + operator, + range_query.sort_direction, + ) + } + None => { + // Here we respond to case 2, examples are `a = 1 && b asc` or `b desc`. + if let Some(comparison_key) = comparison_key { + // This is the case like `a = 1 && b asc`. The comparison key is only built from `a = 1`. + // We use `a = 1 && b = null` as a sentinel, using the invariant that `null` is greater than anything. + let null_key = build_sorted_inverted_comparison_key( + eq_filters, + Some(&SortedInvertedRangeQuery { + field_index: range_query.field_index, + operator_and_value: Some((Operator::LT, Field::Null)), + sort_direction: range_query.sort_direction, + }), + is_single_field_sorted_inverted, + ) + .expect("we provided a range query"); + match range_query.sort_direction { + SortDirection::Ascending => RangeSpec { + start: Some(KeyEndpoint::Excluding(comparison_key)), + end: Some(KeyEndpoint::Including(null_key)), + direction: SortDirection::Ascending, + }, + SortDirection::Descending => RangeSpec { + start: Some(KeyEndpoint::Including(null_key)), + end: Some(KeyEndpoint::Excluding(comparison_key)), + direction: SortDirection::Descending, + }, + } + } else { + // Just all of them. + RangeSpec { + start: None, + end: None, + direction: range_query.sort_direction, + } + } + } + } + } else { + // Here we respond to case 3, examples are `a = 1` or `a = 1 && b = 2`. + let comparison_key = comparison_key + .expect("here's at least a eq filter because there's no range query"); + RangeSpec { + start: Some(KeyEndpoint::Including(comparison_key.clone())), + end: Some(KeyEndpoint::Including(comparison_key)), + direction: SortDirection::Ascending, // doesn't matter + } + }) + } + IndexScanKind::FullText { filter } => match filter.op { + Operator::Contains => { + let token = match &filter.val { + Field::String(token) => token, + Field::Text(token) => token, + _ => return Err(CacheError::Index(IndexError::ExpectedStringFullText)), + }; + let key = index::get_full_text_secondary_index(token); + Ok(RangeSpec { + start: Some(KeyEndpoint::Including(key.clone())), + end: Some(KeyEndpoint::Including(key)), + direction: SortDirection::Ascending, // doesn't matter + }) + } + Operator::MatchesAll | Operator::MatchesAny => { + unimplemented!("matches all and matches any are not implemented") + } + other => panic!("operator {other:?} is not supported by full text index"), + }, + } +} + +fn build_sorted_inverted_comparison_key( + eq_filters: &[(usize, Field)], + range_query: Option<&SortedInvertedRangeQuery>, + is_single_field_index: bool, +) -> Option> { + let mut fields = vec![]; + eq_filters.iter().for_each(|filter| { + fields.push(&filter.1); + }); + if let Some(range_query) = range_query { + if let Some((_, val)) = &range_query.operator_and_value { + fields.push(val); + } + } + if fields.is_empty() { + None + } else { + Some(index::get_secondary_index(&fields, is_single_field_index)) + } +} + +/// Here we use the invariant that `null` is greater than anything. +fn get_key_interval_from_range_query( + comparison_key: Vec, + null_key: Vec, + operator: Operator, + sort_direction: SortDirection, +) -> RangeSpec { + match (operator, sort_direction) { + (Operator::LT, SortDirection::Ascending) => RangeSpec { + start: None, + end: Some(KeyEndpoint::Excluding(comparison_key)), + direction: SortDirection::Ascending, + }, + (Operator::LT, SortDirection::Descending) => RangeSpec { + start: Some(KeyEndpoint::Excluding(comparison_key)), + end: None, + direction: SortDirection::Descending, + }, + (Operator::LTE, SortDirection::Ascending) => RangeSpec { + start: None, + end: Some(KeyEndpoint::Including(comparison_key)), + direction: SortDirection::Ascending, + }, + (Operator::LTE, SortDirection::Descending) => RangeSpec { + start: Some(KeyEndpoint::Including(comparison_key)), + end: None, + direction: SortDirection::Descending, + }, + (Operator::GT, SortDirection::Ascending) => RangeSpec { + start: Some(KeyEndpoint::Excluding(comparison_key)), + end: Some(KeyEndpoint::Excluding(null_key)), + direction: SortDirection::Ascending, + }, + (Operator::GT, SortDirection::Descending) => RangeSpec { + start: Some(KeyEndpoint::Excluding(null_key)), + end: Some(KeyEndpoint::Excluding(comparison_key)), + direction: SortDirection::Descending, + }, + (Operator::GTE, SortDirection::Ascending) => RangeSpec { + start: Some(KeyEndpoint::Including(comparison_key)), + end: Some(KeyEndpoint::Excluding(null_key)), + direction: SortDirection::Ascending, + }, + (Operator::GTE, SortDirection::Descending) => RangeSpec { + start: Some(KeyEndpoint::Excluding(null_key)), + end: Some(KeyEndpoint::Including(comparison_key)), + direction: SortDirection::Descending, + }, + (other, _) => { + panic!("operator {other:?} is not supported by sorted inverted index range query") + } + } +} diff --git a/dozer-cache/src/cache/lmdb/cache/query/tests.rs b/dozer-cache/src/cache/lmdb/cache/query/tests.rs index fbd6387c62..3789b05d3d 100644 --- a/dozer-cache/src/cache/lmdb/cache/query/tests.rs +++ b/dozer-cache/src/cache/lmdb/cache/query/tests.rs @@ -1,9 +1,6 @@ use crate::cache::{ expression::{FilterExpression, Operator, QueryExpression}, - lmdb::{ - cache::LmdbRwCache, - tests::utils::{create_cache, insert_rec_1}, - }, + lmdb::tests::utils::{create_cache, insert_rec_1}, test_utils::{query_from_filter, schema_1, schema_full_text, schema_multi_indices}, RecordWithId, RoCache, RwCache, }; @@ -14,14 +11,7 @@ use dozer_types::{ #[test] fn query_secondary_sorted_inverted() { - let (schema, secondary_indexes) = schema_1(); - let cache = LmdbRwCache::create( - schema.clone(), - secondary_indexes, - Default::default(), - Default::default(), - ) - .unwrap(); + let (cache, schema, _) = create_cache(schema_1); let mut record = Record::new( schema.identifier, @@ -56,14 +46,7 @@ fn query_secondary_sorted_inverted() { #[test] fn query_secondary_full_text() { - let (schema, secondary_indexes) = schema_full_text(); - let cache = LmdbRwCache::create( - schema.clone(), - secondary_indexes, - Default::default(), - Default::default(), - ) - .unwrap(); + let (cache, schema, _) = create_cache(schema_full_text); let mut record = Record::new( schema.identifier, diff --git a/dozer-cache/src/cache/lmdb/cache/schema_database.rs b/dozer-cache/src/cache/lmdb/cache/schema_database.rs deleted file mode 100644 index 8c8b980e49..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/schema_database.rs +++ /dev/null @@ -1,94 +0,0 @@ -use dozer_storage::{lmdb::RwTransaction, lmdb_storage::LmdbEnvironmentManager, LmdbMap}; -use dozer_types::{ - borrow::IntoOwned, - types::{IndexDefinition, Schema}, -}; - -use crate::errors::CacheError; - -#[derive(Debug, Clone)] -pub struct SchemaDatabase { - database: LmdbMap)>, - schema: Option<(Schema, Vec)>, -} - -impl SchemaDatabase { - pub fn new( - env: &mut LmdbEnvironmentManager, - create_if_not_exist: bool, - ) -> Result { - let database = LmdbMap::new_from_env(env, Some("schemas"), create_if_not_exist)?; - - // Collect existing schemas. - let txn = env.begin_ro_txn()?; - assert!(database.count(&txn)? <= 1, "More than one schema found"); - let schema = database.get(&txn, &SCHEMA_KEY)?.map(IntoOwned::into_owned); - - Ok(Self { database, schema }) - } - - pub fn insert( - &mut self, - txn: &mut RwTransaction, - schema: Schema, - secondary_indexes: Vec, - ) -> Result<(), CacheError> { - let schema_and_indexes = (schema, secondary_indexes); - if !self - .database - .insert(txn, &SCHEMA_KEY, &schema_and_indexes)? - { - panic!("Schema already exists"); - } - - self.schema = Some(schema_and_indexes); - - Ok(()) - } - - pub fn get_schema(&self) -> Option<&(Schema, Vec)> { - self.schema.as_ref() - } -} - -const SCHEMA_KEY: u8 = 0; - -#[cfg(test)] -mod tests { - use dozer_storage::lmdb::Transaction; - use dozer_types::types::{FieldDefinition, FieldType, SchemaIdentifier, SourceDefinition}; - - use crate::cache::lmdb::utils::{init_env, CacheOptions}; - - use super::*; - - #[test] - fn test_schema_database() { - let mut env = init_env(&CacheOptions::default()).unwrap().0; - let mut writer = SchemaDatabase::new(&mut env, true).unwrap(); - - let schema = Schema { - identifier: Some(SchemaIdentifier { id: 1, version: 1 }), - fields: vec![FieldDefinition { - name: "id".to_string(), - typ: FieldType::UInt, - nullable: false, - source: SourceDefinition::Dynamic, - }], - primary_index: vec![0], - }; - let secondary_indexes = vec![IndexDefinition::SortedInverted(vec![0])]; - - let mut txn = env.begin_rw_txn().unwrap(); - writer - .insert(&mut txn, schema.clone(), secondary_indexes.clone()) - .unwrap(); - txn.commit().unwrap(); - - let reader = SchemaDatabase::new(&mut env, false).unwrap(); - - let expected = (schema, secondary_indexes); - assert_eq!(writer.get_schema().unwrap(), &expected); - assert_eq!(reader.get_schema().unwrap(), &expected); - } -} diff --git a/dozer-cache/src/cache/lmdb/cache/secondary_environment.rs b/dozer-cache/src/cache/lmdb/cache/secondary_environment.rs new file mode 100644 index 0000000000..7555a32850 --- /dev/null +++ b/dozer-cache/src/cache/lmdb/cache/secondary_environment.rs @@ -0,0 +1,278 @@ +use dozer_storage::{ + errors::StorageError, + lmdb::{RoTransaction, Transaction}, + lmdb_storage::{LmdbEnvironmentManager, SharedTransaction}, + BeginTransaction, LmdbCounter, LmdbMultimap, LmdbOption, ReadTransaction, +}; +use dozer_types::{borrow::IntoOwned, types::IndexDefinition}; + +use crate::{ + cache::lmdb::{ + comparator, + indexer::{build_index, delete_index}, + utils::init_env, + }, + errors::CacheError, +}; + +use super::{ + main_environment::{Operation, OperationLog}, + CacheCommonOptions, CacheWriteOptions, +}; + +pub type SecondaryIndexDatabase = LmdbMultimap, u64>; + +pub trait SecondaryEnvironment: BeginTransaction { + fn index_definition(&self) -> &IndexDefinition; + fn database(&self) -> SecondaryIndexDatabase; + + fn count_data(&self) -> Result { + let txn = self.begin_txn()?; + self.database().count_data(&txn).map_err(Into::into) + } +} + +#[derive(Debug)] +pub struct RwSecondaryEnvironment { + index_definition: IndexDefinition, + txn: SharedTransaction, + database: SecondaryIndexDatabase, + next_operation_id: LmdbCounter, +} + +impl BeginTransaction for RwSecondaryEnvironment { + type Transaction<'a> = ReadTransaction<'a>; + + fn begin_txn(&self) -> Result, StorageError> { + self.txn.begin_txn() + } +} + +impl SecondaryEnvironment for RwSecondaryEnvironment { + fn index_definition(&self) -> &IndexDefinition { + &self.index_definition + } + + fn database(&self) -> SecondaryIndexDatabase { + self.database + } +} + +impl RwSecondaryEnvironment { + pub fn open( + name: String, + common_options: &CacheCommonOptions, + write_options: CacheWriteOptions, + ) -> Result { + let (env, database, next_operation_id, index_definition) = + open_env_with_index_definition(name, common_options, Some(write_options))?; + + Ok(Self { + index_definition, + txn: env.create_txn()?, + database, + next_operation_id, + }) + } + + pub fn create( + index_definition: &IndexDefinition, + name: String, + common_options: &CacheCommonOptions, + write_options: CacheWriteOptions, + ) -> Result { + let (env, database, next_operation_id, index_definition_option, old_index_definition) = + open_env(name.clone(), common_options, Some(write_options))?; + + set_comparator(&env, index_definition, database)?; + + let txn = env.create_txn()?; + let index_definition = if let Some(old_index_definition) = old_index_definition { + if index_definition != &old_index_definition { + return Err(CacheError::IndexDefinitionMismatch { + name, + given: index_definition.clone(), + stored: old_index_definition, + }); + } + old_index_definition + } else { + let mut txn = txn.write(); + index_definition_option.store(txn.txn_mut(), index_definition)?; + txn.commit_and_renew()?; + index_definition.clone() + }; + + Ok(Self { + index_definition, + txn, + database, + next_operation_id, + }) + } + + pub fn index( + &self, + log_txn: &T, + operation_log: OperationLog, + ) -> Result<(), CacheError> { + let main_env_next_operation_id = operation_log.next_operation_id(log_txn)?; + + let mut txn = self.txn.write(); + let txn = txn.txn_mut(); + loop { + // Start from `next_operation_id`. + let operation_id = self.next_operation_id.load(txn)?; + if operation_id >= main_env_next_operation_id { + return Ok(()); + } + // Get operation by operation id. + let operation = operation_log.get_operation(log_txn, operation_id)?; + match operation { + Operation::Insert { record, .. } => { + // Build secondary index. + build_index( + txn, + self.database, + &record, + &self.index_definition, + operation_id, + )?; + } + Operation::Delete { operation_id } => { + // If the operation is a `Delete`, find the deleted record, which must exist. + let Operation::Insert { record, .. } = operation_log.get_operation(log_txn, operation_id)? else { + panic!("Insert operation {} not found", operation_id); + }; + // Delete secondary index. + delete_index( + txn, + self.database, + &record, + &self.index_definition, + operation_id, + )?; + } + } + self.next_operation_id.store(txn, operation_id + 1)?; + } + } + + pub fn commit(&self) -> Result<(), CacheError> { + self.txn.write().commit_and_renew().map_err(Into::into) + } +} + +#[derive(Debug)] +pub struct RoSecondaryEnvironment { + index_definition: IndexDefinition, + env: LmdbEnvironmentManager, + database: SecondaryIndexDatabase, +} + +impl BeginTransaction for RoSecondaryEnvironment { + type Transaction<'a> = RoTransaction<'a>; + + fn begin_txn(&self) -> Result, StorageError> { + self.env.begin_txn() + } +} + +impl SecondaryEnvironment for RoSecondaryEnvironment { + fn index_definition(&self) -> &IndexDefinition { + &self.index_definition + } + + fn database(&self) -> SecondaryIndexDatabase { + self.database + } +} + +impl RoSecondaryEnvironment { + pub fn new(name: String, common_options: &CacheCommonOptions) -> Result { + let (env, database, _, index_definition) = + open_env_with_index_definition(name, common_options, None)?; + Ok(Self { + env, + database, + index_definition, + }) + } +} + +#[allow(clippy::type_complexity)] +fn open_env( + name: String, + common_options: &CacheCommonOptions, + write_options: Option, +) -> Result< + ( + LmdbEnvironmentManager, + SecondaryIndexDatabase, + LmdbCounter, + LmdbOption, + Option, + ), + CacheError, +> { + let path = common_options + .path + .as_ref() + .map(|(base_path, main_name)| (base_path.join(format!("{main_name}_index")), name)); + let common_options = CacheCommonOptions { + path, + ..*common_options + }; + + let mut env = init_env(&common_options, write_options)?.0; + + let create_if_not_exist = write_options.is_some(); + let database = LmdbMultimap::new(&mut env, Some("database"), create_if_not_exist)?; + let next_operation_id = + LmdbCounter::new(&mut env, Some("next_operation_id"), create_if_not_exist)?; + let index_definition_option = + LmdbOption::new(&mut env, Some("index_definition"), create_if_not_exist)?; + + let index_definition = index_definition_option + .load(&env.begin_txn()?)? + .map(IntoOwned::into_owned); + + Ok(( + env, + database, + next_operation_id, + index_definition_option, + index_definition, + )) +} + +fn open_env_with_index_definition( + name: String, + common_options: &CacheCommonOptions, + write_options: Option, +) -> Result< + ( + LmdbEnvironmentManager, + SecondaryIndexDatabase, + LmdbCounter, + IndexDefinition, + ), + CacheError, +> { + let (env, database, next_operation_id, _, index_definition) = + open_env(name.clone(), common_options, write_options)?; + let index_definition = index_definition.ok_or(CacheError::IndexDefinitionNotFound(name))?; + set_comparator(&env, &index_definition, database)?; + Ok((env, database, next_operation_id, index_definition)) +} + +fn set_comparator( + env: &LmdbEnvironmentManager, + index_definition: &IndexDefinition, + database: SecondaryIndexDatabase, +) -> Result<(), CacheError> { + if let IndexDefinition::SortedInverted(fields) = index_definition { + comparator::set_sorted_inverted_comparator(&env.begin_txn()?, database.database(), fields)?; + } + Ok(()) +} diff --git a/dozer-cache/src/cache/lmdb/cache/secondary_index_database.rs b/dozer-cache/src/cache/lmdb/cache/secondary_index_database.rs deleted file mode 100644 index 60ceb215d2..0000000000 --- a/dozer-cache/src/cache/lmdb/cache/secondary_index_database.rs +++ /dev/null @@ -1,50 +0,0 @@ -use dozer_storage::{ - errors::StorageError, - lmdb::Transaction, - lmdb_storage::{LmdbEnvironmentManager, LmdbExclusiveTransaction}, - LmdbMultimap, -}; -use dozer_types::types::IndexDefinition; - -use crate::{cache::lmdb::comparator, errors::CacheError}; - -pub fn new_secondary_index_database_from_env( - env: &mut LmdbEnvironmentManager, - index: usize, - index_definition: &IndexDefinition, - create_if_not_exist: bool, -) -> Result, u64>, CacheError> { - let name = database_name(index); - - let result = LmdbMultimap::new_from_env(env, Some(&name), create_if_not_exist)?; - - let txn = env.begin_ro_txn()?; - - if let IndexDefinition::SortedInverted(fields) = index_definition { - comparator::set_sorted_inverted_comparator(&txn, result.database(), fields)?; - } - - txn.commit().map_err(StorageError::Lmdb)?; - - Ok(result) -} - -pub fn new_secondary_index_database_from_txn( - txn: &mut LmdbExclusiveTransaction, - index: usize, - index_definition: &IndexDefinition, - create_if_not_exist: bool, -) -> Result, u64>, CacheError> { - let name = database_name(index); - let result = LmdbMultimap::new_from_txn(txn, Some(&name), create_if_not_exist)?; - - if let IndexDefinition::SortedInverted(fields) = index_definition { - comparator::set_sorted_inverted_comparator(txn.txn(), result.database(), fields)?; - } - - Ok(result) -} - -fn database_name(index: usize) -> String { - format!("index_#{index}") -} diff --git a/dozer-cache/src/cache/lmdb/cache_manager.rs b/dozer-cache/src/cache/lmdb/cache_manager.rs index f36a9c4053..d0fbcc2d3c 100644 --- a/dozer-cache/src/cache/lmdb/cache_manager.rs +++ b/dozer-cache/src/cache/lmdb/cache_manager.rs @@ -3,7 +3,9 @@ use std::path::PathBuf; use dozer_storage::{ errors::StorageError, lmdb::{Database, DatabaseFlags}, - lmdb_storage::{LmdbEnvironmentManager, LmdbExclusiveTransaction, SharedTransaction}, + lmdb_storage::{ + CreateDatabase, LmdbEnvironmentManager, LmdbExclusiveTransaction, SharedTransaction, + }, }; use dozer_types::types::{IndexDefinition, Schema}; use tempdir::TempDir; @@ -60,7 +62,7 @@ impl LmdbCacheManager { pub fn new(options: CacheManagerOptions) -> Result { let (temp_dir, base_path) = match &options.path { Some(path) => { - std::fs::create_dir_all(path)?; + std::fs::create_dir_all(path).map_err(|e| CacheError::Io(path.clone(), e))?; (None, path.clone()) } None => { @@ -97,7 +99,7 @@ impl CacheManager for LmdbCacheManager { let cache: Option> = if LmdbEnvironmentManager::exists(&self.base_path, real_name) { let cache = LmdbRwCache::open( - self.cache_common_options(real_name.to_string()), + &self.cache_common_options(real_name.to_string()), self.cache_write_options(), )?; Some(Box::new(cache)) @@ -114,7 +116,7 @@ impl CacheManager for LmdbCacheManager { let real_name = self.resolve_alias(name, &txn)?.unwrap_or(name); let cache: Option> = if LmdbEnvironmentManager::exists(&self.base_path, real_name) { - let cache = LmdbRoCache::new(self.cache_common_options(real_name.to_string()))?; + let cache = LmdbRoCache::new(&self.cache_common_options(real_name.to_string()))?; Some(Box::new(cache)) } else { None @@ -129,9 +131,8 @@ impl CacheManager for LmdbCacheManager { ) -> Result, CacheError> { let name = self.generate_unique_name(); let cache = LmdbRwCache::create( - schema, - indexes, - self.cache_common_options(name), + &(schema, indexes), + &self.cache_common_options(name), self.cache_write_options(), )?; Ok(Box::new(cache)) diff --git a/dozer-cache/src/cache/lmdb/comparator.rs b/dozer-cache/src/cache/lmdb/comparator.rs index 694a22a541..fbfd03510f 100644 --- a/dozer-cache/src/cache/lmdb/comparator.rs +++ b/dozer-cache/src/cache/lmdb/comparator.rs @@ -48,7 +48,9 @@ mod tests { use std::cmp::Ordering::{self, Equal, Greater, Less}; use dozer_storage::{ - lmdb::DatabaseFlags, lmdb_storage::LmdbEnvironmentManager, lmdb_sys::mdb_cmp, + lmdb::DatabaseFlags, + lmdb_storage::{CreateDatabase, LmdbEnvironmentManager}, + lmdb_sys::mdb_cmp, }; use dozer_types::{ chrono::{DateTime, NaiveDate, TimeZone, Utc}, @@ -57,10 +59,7 @@ mod tests { types::Field, }; - use crate::cache::{ - index::get_secondary_index, - lmdb::utils::{self, CacheOptions}, - }; + use crate::cache::{index::get_secondary_index, lmdb::utils}; use super::*; @@ -142,8 +141,9 @@ mod tests { } fn setup(num_fields: usize) -> (LmdbEnvironmentManager, Database) { - let options = CacheOptions::default(); - let mut env = utils::init_env(&options).unwrap().0; + let mut env = utils::init_env(&Default::default(), Some(Default::default())) + .unwrap() + .0; let db = env .create_database(Some("test"), Some(DatabaseFlags::DUP_SORT)) .unwrap(); diff --git a/dozer-cache/src/cache/lmdb/indexer.rs b/dozer-cache/src/cache/lmdb/indexer.rs index a4dc12a77a..c2e544c414 100644 --- a/dozer-cache/src/cache/lmdb/indexer.rs +++ b/dozer-cache/src/cache/lmdb/indexer.rs @@ -1,10 +1,7 @@ use crate::errors::{CacheError, IndexError}; use dozer_storage::lmdb::RwTransaction; -use dozer_types::{ - tracing, - types::{Field, IndexDefinition, Record}, -}; +use dozer_types::types::{Field, IndexDefinition, Record}; use dozer_storage::LmdbMultimap; @@ -13,117 +10,92 @@ use unicode_segmentation::UnicodeSegmentation; use crate::cache::index::{self, get_full_text_secondary_index}; -pub struct Indexer<'a> { - pub secondary_indexes: &'a [LmdbMultimap, u64>], -} -impl<'a> Indexer<'a> { - pub fn build_indexes( - &self, - txn: &mut RwTransaction, - record: &Record, - secondary_indexes: &[IndexDefinition], - id: u64, - ) -> Result<(), CacheError> { - let span = tracing::span!(tracing::Level::TRACE, "building indexes", "{}", id); - let _enter = span.enter(); - - debug_assert!(secondary_indexes.len() == self.secondary_indexes.len()); - - if secondary_indexes.is_empty() { - return Err(CacheError::Index(IndexError::MissingSecondaryIndexes)); +pub fn build_index( + txn: &mut RwTransaction, + database: LmdbMultimap, u64>, + record: &Record, + index_definition: &IndexDefinition, + operation_id: u64, +) -> Result<(), CacheError> { + match index_definition { + IndexDefinition::SortedInverted(fields) => { + let secondary_key = build_index_sorted_inverted(fields, &record.values); + // Ignore existing pair. + database.insert(txn, &secondary_key, &operation_id)?; } - for (index, db) in secondary_indexes.iter().zip(self.secondary_indexes) { - match index { - IndexDefinition::SortedInverted(fields) => { - let secondary_key = Self::_build_index_sorted_inverted(fields, &record.values); - // Ignore existing pair. - db.insert(txn, &secondary_key, &id)?; - } - IndexDefinition::FullText(field_index) => { - for secondary_key in - Self::_build_indices_full_text(*field_index, &record.values)? - { - // Ignore existing pair. - db.insert(txn, &secondary_key, &id)?; - } - } + IndexDefinition::FullText(field_index) => { + for secondary_key in build_indices_full_text(*field_index, &record.values)? { + // Ignore existing pair. + database.insert(txn, &secondary_key, &operation_id)?; } } - Ok(()) } + Ok(()) +} - pub fn delete_indexes( - &self, - txn: &mut RwTransaction, - record: &Record, - secondary_indexes: &[IndexDefinition], - id: u64, - ) -> Result<(), CacheError> { - for (index, db) in secondary_indexes.iter().zip(self.secondary_indexes) { - match index { - IndexDefinition::SortedInverted(fields) => { - let secondary_key = Self::_build_index_sorted_inverted(fields, &record.values); - // Ignore if not found. - db.remove(txn, &secondary_key, &id)?; - } - IndexDefinition::FullText(field_index) => { - for secondary_key in - Self::_build_indices_full_text(*field_index, &record.values)? - { - // Ignore if not found. - db.remove(txn, &secondary_key, &id)?; - } - } +pub fn delete_index( + txn: &mut RwTransaction, + database: LmdbMultimap, u64>, + record: &Record, + index_definition: &IndexDefinition, + operation_id: u64, +) -> Result<(), CacheError> { + match index_definition { + IndexDefinition::SortedInverted(fields) => { + let secondary_key = build_index_sorted_inverted(fields, &record.values); + // Ignore if not found. + database.remove(txn, &secondary_key, &operation_id)?; + } + IndexDefinition::FullText(field_index) => { + for secondary_key in build_indices_full_text(*field_index, &record.values)? { + // Ignore if not found. + database.remove(txn, &secondary_key, &operation_id)?; } } - - Ok(()) } + Ok(()) +} - fn _build_index_sorted_inverted(fields: &[usize], values: &[Field]) -> Vec { - let values = fields - .iter() - .copied() - .filter_map(|index| (values.get(index))) - .collect::>(); - // `values.len() == 1` criteria must be kept the same with `comparator.rs`. - index::get_secondary_index(&values, values.len() == 1) - } +fn build_index_sorted_inverted(fields: &[usize], values: &[Field]) -> Vec { + let values = fields + .iter() + .copied() + .filter_map(|index| (values.get(index))) + .collect::>(); + // `values.len() == 1` criteria must be kept the same with `comparator.rs`. + index::get_secondary_index(&values, values.len() == 1) +} - fn _build_indices_full_text( - field_index: usize, - values: &[Field], - ) -> Result>, CacheError> { - let Some(field) = values.get(field_index) else { +fn build_indices_full_text( + field_index: usize, + values: &[Field], +) -> Result>, CacheError> { + let Some(field) = values.get(field_index) else { return Err(CacheError::Index(IndexError::FieldIndexOutOfRange)); }; - let string = match field { - Field::String(string) => string, - Field::Text(string) => string, - Field::Null => "", - _ => { - return Err(CacheError::Index(IndexError::FieldNotCompatibleIndex( - field_index, - ))) - } - }; + let string = match field { + Field::String(string) => string, + Field::Text(string) => string, + Field::Null => "", + _ => { + return Err(CacheError::Index(IndexError::FieldNotCompatibleIndex( + field_index, + ))) + } + }; - Ok(string - .unicode_words() - .map(get_full_text_secondary_index) - .unique() - .collect()) - } + Ok(string + .unicode_words() + .map(get_full_text_secondary_index) + .unique() + .collect()) } #[cfg(test)] mod tests { use crate::cache::{ - lmdb::{ - cache::LmdbRwCache, - tests::utils::{self as lmdb_utils, create_cache}, - }, + lmdb::tests::utils::{self as lmdb_utils, create_cache}, test_utils, RwCache, }; @@ -132,7 +104,6 @@ mod tests { #[test] fn test_secondary_indexes() { let (cache, schema, secondary_indexes) = create_cache(test_utils::schema_1); - let (txn, secondary_index_databases) = cache.get_txn_and_secondary_indexes(); let items = vec![ (1, Some("a".to_string()), Some(521)), @@ -145,34 +116,23 @@ mod tests { lmdb_utils::insert_rec_1(&cache, &schema, val); } - { - let txn = txn.read(); - // No of index dbs - let index_counts = lmdb_utils::get_index_counts(txn.txn(), secondary_index_databases); - - let index_count: usize = index_counts.iter().sum(); - let expected_count = secondary_indexes.len(); - // 3 columns, 1 compound, 1 descending - assert_eq!( - index_counts.len(), - expected_count, - "Must create db for each index" - ); - - assert_eq!( - index_count, - items.len() * expected_count, - "Must index each field" - ); - } + // No of index dbs + let index_counts = lmdb_utils::get_index_counts(&cache); + let expected_count = secondary_indexes.len(); + assert_eq!(index_counts.len(), expected_count,); + + // 3 columns, 1 compound, 1 descending + assert_eq!( + index_counts.iter().sum::(), + items.len() * expected_count, + ); for a in [1i64, 2, 3, 4] { cache.delete(&Field::Int(a).encode()).unwrap(); } - let txn = txn.read(); assert_eq!( - lmdb_utils::get_index_counts(txn.txn(), secondary_index_databases) + lmdb_utils::get_index_counts(&cache) .into_iter() .sum::(), 0, @@ -184,11 +144,8 @@ mod tests { fn test_build_indices_full_text() { let field_index = 0; assert_eq!( - Indexer::_build_indices_full_text( - field_index, - &[Field::String("today is a good day".into())] - ) - .unwrap(), + build_indices_full_text(field_index, &[Field::String("today is a good day".into())]) + .unwrap(), vec![ get_full_text_secondary_index("today"), get_full_text_secondary_index("is"), @@ -201,14 +158,7 @@ mod tests { #[test] fn test_full_text_secondary_index_with_duplicated_words() { - let (schema, secondary_indexes) = test_utils::schema_full_text(); - let cache = LmdbRwCache::create( - schema.clone(), - secondary_indexes, - Default::default(), - Default::default(), - ) - .unwrap(); + let (cache, schema, _) = create_cache(test_utils::schema_full_text); let items = vec![( Some("another test".to_string()), @@ -224,10 +174,8 @@ mod tests { cache.delete(&Field::String(a).encode()).unwrap(); } - let (txn, secondary_index_databases) = cache.get_txn_and_secondary_indexes(); - let txn = txn.read(); assert_eq!( - lmdb_utils::get_index_counts(txn.txn(), secondary_index_databases) + lmdb_utils::get_index_counts(&cache) .into_iter() .sum::(), 0, diff --git a/dozer-cache/src/cache/lmdb/tests/basic.rs b/dozer-cache/src/cache/lmdb/tests/basic.rs index 0cbecab3ff..d905babdf3 100644 --- a/dozer-cache/src/cache/lmdb/tests/basic.rs +++ b/dozer-cache/src/cache/lmdb/tests/basic.rs @@ -31,7 +31,7 @@ fn query_and_test(cache: &dyn RwCache, inserted_record: &Record, exp: &QueryExpr fn get_schema() { let (cache, schema) = _setup(); - let get_schema = &cache.get_schema().unwrap().0; + let get_schema = &cache.get_schema().0; assert_eq!(get_schema, &schema, "must be equal"); } diff --git a/dozer-cache/src/cache/lmdb/tests/read_write.rs b/dozer-cache/src/cache/lmdb/tests/read_write.rs index 35d6a4b58f..d189c9dd0d 100644 --- a/dozer-cache/src/cache/lmdb/tests/read_write.rs +++ b/dozer-cache/src/cache/lmdb/tests/read_write.rs @@ -11,11 +11,10 @@ fn read_and_write() { // write and read from cache from two different threads. - let (schema, secondary_indexes) = test_utils::schema_1(); + let schema = test_utils::schema_1(); let cache_writer = LmdbRwCache::create( - schema.clone(), - secondary_indexes, - CacheCommonOptions { + &schema, + &CacheCommonOptions { max_readers: 1, max_db_size: 100, path: Some(path.clone()), @@ -35,7 +34,7 @@ fn read_and_write() { ]; for val in items.clone() { - lmdb_utils::insert_rec_1(&cache_writer, &schema, val.clone()); + lmdb_utils::insert_rec_1(&cache_writer, &schema.0, val.clone()); } cache_writer.commit().unwrap(); @@ -43,7 +42,7 @@ fn read_and_write() { path: Some(path), ..Default::default() }; - let cache_reader = LmdbRoCache::new(read_options).unwrap(); + let cache_reader = LmdbRoCache::new(&read_options).unwrap(); for (a, b, c) in items { let rec = cache_reader.get(&Field::Int(a).encode()).unwrap(); let values = vec![ diff --git a/dozer-cache/src/cache/lmdb/tests/utils.rs b/dozer-cache/src/cache/lmdb/tests/utils.rs index 0c6e5f87e0..33a8a89227 100644 --- a/dozer-cache/src/cache/lmdb/tests/utils.rs +++ b/dozer-cache/src/cache/lmdb/tests/utils.rs @@ -1,20 +1,16 @@ -use dozer_storage::{lmdb::Transaction, LmdbMultimap}; -use dozer_types::types::{Field, IndexDefinition, Record, Schema}; +use dozer_types::types::{Field, IndexDefinition, Record, Schema, SchemaWithIndex}; -use crate::cache::{lmdb::cache::LmdbRwCache, RwCache}; +use crate::cache::{ + lmdb::cache::{LmdbCache, LmdbRwCache, SecondaryEnvironment}, + RoCache, RwCache, +}; pub fn create_cache( - schema_gen: impl FnOnce() -> (Schema, Vec), + schema_gen: impl FnOnce() -> SchemaWithIndex, ) -> (LmdbRwCache, Schema, Vec) { - let (schema, secondary_indexes) = schema_gen(); - let cache = LmdbRwCache::create( - schema.clone(), - secondary_indexes.clone(), - Default::default(), - Default::default(), - ) - .unwrap(); - (cache, schema, secondary_indexes) + let schema = schema_gen(); + let cache = LmdbRwCache::create(&schema, &Default::default(), Default::default()).unwrap(); + (cache, schema.0, schema.1) } pub fn insert_rec_1( @@ -50,13 +46,8 @@ pub fn insert_full_text( cache.insert(&mut record).unwrap(); } -pub fn get_index_counts( - txn: &T, - secondary_index_databases: &[LmdbMultimap, u64>], -) -> Vec { - let mut items = Vec::new(); - for db in secondary_index_databases { - items.push(db.count_data(txn).unwrap()); - } - items +pub fn get_index_counts(cache: &C) -> Vec { + (0..cache.get_schema().1.len()) + .map(|index| cache.secondary_env(index).count_data().unwrap()) + .collect() } diff --git a/dozer-cache/src/cache/lmdb/utils.rs b/dozer-cache/src/cache/lmdb/utils.rs index 2a0535e439..5fd473c15a 100644 --- a/dozer-cache/src/cache/lmdb/utils.rs +++ b/dozer-cache/src/cache/lmdb/utils.rs @@ -9,88 +9,79 @@ use tempdir::TempDir; use super::cache::{CacheCommonOptions, CacheWriteOptions}; -#[derive(Clone, Debug, Default)] -pub struct CacheOptions { - pub common: CacheCommonOptions, - pub kind: CacheOptionsKind, -} - -#[derive(Clone, Debug, Default)] -pub struct CacheReadOptions {} - -#[derive(Clone, Debug)] -pub enum CacheOptionsKind { - // Write Options - Write(CacheWriteOptions), - - // Read Options - ReadOnly(CacheReadOptions), -} - -impl Default for CacheOptionsKind { - fn default() -> Self { - Self::Write(CacheWriteOptions::default()) +pub fn init_env( + common_options: &CacheCommonOptions, + write_options: Option, +) -> Result<(LmdbEnvironmentManager, String), CacheError> { + if let Some(write_options) = write_options { + create_env(common_options, write_options) + } else { + let (env, name) = open_env(common_options)?; + Ok((env, name.to_string())) } } -pub fn init_env(options: &CacheOptions) -> Result<(LmdbEnvironmentManager, String), CacheError> { - match &options.kind { - CacheOptionsKind::Write(write_options) => { - let (base_path, name, _temp_dir) = match &options.common.path { - None => { - let base_path = TempDir::new("dozer")?; - ( - base_path.path().to_path_buf(), - "dozer-cache", - Some(base_path), - ) - } - Some((base_path, name)) => { - fs::create_dir_all(base_path)?; - (base_path.clone(), name.deref(), None) - } - }; - - let options = LmdbEnvironmentOptions::new( - options.common.max_db_size, - options.common.max_readers, - write_options.max_size, - EnvironmentFlags::empty(), - ); - - Ok(( - LmdbEnvironmentManager::create(&base_path, name, options)?, - name.to_string(), - )) +fn create_env( + common_options: &CacheCommonOptions, + write_options: CacheWriteOptions, +) -> Result<(LmdbEnvironmentManager, String), CacheError> { + let (base_path, name, _temp_dir) = match &common_options.path { + None => { + let base_path = + TempDir::new("dozer").map_err(|e| CacheError::Io("tempdir".into(), e))?; + ( + base_path.path().to_path_buf(), + "dozer-cache", + Some(base_path), + ) } - CacheOptionsKind::ReadOnly(_) => { - let (base_path, name) = options - .common - .path - .as_ref() - .ok_or(CacheError::PathNotInitialized)?; - - let env_options = LmdbEnvironmentOptions { - max_dbs: options.common.max_db_size, - max_readers: options.common.max_readers, - flags: EnvironmentFlags::READ_ONLY, - ..Default::default() - }; - - Ok(( - LmdbEnvironmentManager::create(base_path, name, env_options)?, - name.to_string(), - )) + Some((base_path, name)) => { + fs::create_dir_all(base_path).map_err(|e| CacheError::Io(base_path.clone(), e))?; + (base_path.clone(), name.deref(), None) } - } + }; + + let options = LmdbEnvironmentOptions::new( + common_options.max_db_size, + common_options.max_readers, + write_options.max_size, + EnvironmentFlags::empty(), + ); + + Ok(( + LmdbEnvironmentManager::create(&base_path, name, options)?, + name.to_string(), + )) +} + +fn open_env(options: &CacheCommonOptions) -> Result<(LmdbEnvironmentManager, &str), CacheError> { + let (base_path, name) = options + .path + .as_ref() + .ok_or(CacheError::PathNotInitialized)?; + + let env_options = LmdbEnvironmentOptions { + max_dbs: options.max_db_size, + max_readers: options.max_readers, + flags: EnvironmentFlags::READ_ONLY, + ..Default::default() + }; + + Ok(( + LmdbEnvironmentManager::create(base_path, name, env_options)?, + name, + )) } #[cfg(test)] mod tests { - use dozer_storage::lmdb::{Cursor, DatabaseFlags, RoCursor, Transaction, WriteFlags}; + use dozer_storage::{ + lmdb::{Cursor, DatabaseFlags, RoCursor, Transaction, WriteFlags}, + lmdb_storage::CreateDatabase, + }; use dozer_types::types::Field; - use crate::cache::lmdb::utils::{init_env, CacheOptions}; + use super::*; fn cursor_dump(mut cursor: RoCursor) -> Vec<(&[u8], &[u8])> { cursor @@ -102,8 +93,9 @@ mod tests { #[test] fn duplicate_test_nested() { - let options = CacheOptions::default(); - let mut env = init_env(&options).unwrap().0; + let mut env = create_env(&Default::default(), Default::default()) + .unwrap() + .0; let db = env .create_database( diff --git a/dozer-cache/src/cache/mod.rs b/dozer-cache/src/cache/mod.rs index 6256c2ede5..7113f14436 100644 --- a/dozer-cache/src/cache/mod.rs +++ b/dozer-cache/src/cache/mod.rs @@ -5,7 +5,7 @@ use self::expression::QueryExpression; use crate::errors::CacheError; use dozer_types::{ serde::{Deserialize, Serialize}, - types::{IndexDefinition, Record, Schema}, + types::{IndexDefinition, Record, Schema, SchemaWithIndex}, }; pub use lmdb::cache_manager::{CacheManagerOptions, LmdbCacheManager}; pub mod expression; @@ -59,7 +59,7 @@ pub trait RoCache: Send + Sync + Debug { fn name(&self) -> &str; // Schema Operations - fn get_schema(&self) -> Result<&(Schema, Vec), CacheError>; + fn get_schema(&self) -> &SchemaWithIndex; // Record Operations fn get(&self, key: &[u8]) -> Result; diff --git a/dozer-cache/src/cache/plan/mod.rs b/dozer-cache/src/cache/plan/mod.rs index d77bfd49a2..2d178d29d0 100644 --- a/dozer-cache/src/cache/plan/mod.rs +++ b/dozer-cache/src/cache/plan/mod.rs @@ -17,7 +17,6 @@ pub enum Plan { #[derive(Clone, Debug, PartialEq, Eq)] pub struct IndexScan { pub index_id: usize, - pub is_single_field_sorted_inverted: bool, pub kind: IndexScanKind, } diff --git a/dozer-cache/src/cache/plan/planner.rs b/dozer-cache/src/cache/plan/planner.rs index 8f0ee26a6f..890e230b3f 100644 --- a/dozer-cache/src/cache/plan/planner.rs +++ b/dozer-cache/src/cache/plan/planner.rs @@ -1,4 +1,4 @@ -use crate::cache::expression::{FilterExpression, Operator, QueryExpression, SortDirection}; +use crate::cache::expression::{FilterExpression, Operator, SortDirection, SortOptions}; use crate::errors::PlanError; use dozer_types::json_value_to_field; use dozer_types::types::{Field, FieldDefinition, Schema}; @@ -11,18 +11,21 @@ use super::{IndexFilter, IndexScanKind}; pub struct QueryPlanner<'a> { schema: &'a Schema, secondary_indexes: &'a [IndexDefinition], - query: &'a QueryExpression, + filter: Option<&'a FilterExpression>, + order_by: &'a SortOptions, } impl<'a> QueryPlanner<'a> { pub fn new( schema: &'a Schema, secondary_indexes: &'a [IndexDefinition], - query: &'a QueryExpression, + filter: Option<&'a FilterExpression>, + order_by: &'a SortOptions, ) -> Self { Self { schema, secondary_indexes, - query, + filter, + order_by, } } @@ -30,14 +33,14 @@ impl<'a> QueryPlanner<'a> { // Collect all the filters. // TODO: Handle filters like And([a > 0, a < 10]). let mut filters = vec![]; - if let Some(expression) = &self.query.filter { + if let Some(expression) = &self.filter { collect_filters(self.schema, expression, &mut filters)?; } // Filter the sort options. // TODO: Handle duplicate fields. let mut order_by = vec![]; - for order in &self.query.order_by.0 { + for order in &self.order_by.0 { // Find the field index. let (field_index, _, _) = get_field_index_and_type(&order.field_name, &self.schema.fields) @@ -235,7 +238,6 @@ fn all_indexes_are_present( scans.push(IndexScan { index_id: idx, kind: index_scan_kind, - is_single_field_sorted_inverted: is_single_field_sorted_inverted(&indexes[idx]), }); } None => return None, @@ -244,14 +246,6 @@ fn all_indexes_are_present( Some(scans) } -fn is_single_field_sorted_inverted(index: &IndexDefinition) -> bool { - match index { - // `fields.len() == 1` criteria must be kept the same with `comparator.rs`. - IndexDefinition::SortedInverted(fields) => fields.len() == 1, - _ => false, - } -} - #[cfg(test)] mod tests { use crate::cache::plan::SortedInvertedRangeQuery; diff --git a/dozer-cache/src/cache/plan/tests.rs b/dozer-cache/src/cache/plan/tests.rs index f8f4ddcf4e..68176e6404 100644 --- a/dozer-cache/src/cache/plan/tests.rs +++ b/dozer-cache/src/cache/plan/tests.rs @@ -1,10 +1,8 @@ use super::{Plan, QueryPlanner}; use crate::cache::{ - expression::{ - self, FilterExpression, Operator, QueryExpression, Skip, SortDirection, SortOption, - }, + expression::{self, FilterExpression, Operator, SortDirection, SortOption, SortOptions}, plan::{IndexScanKind, SortedInvertedRangeQuery}, - test_utils::{self, query_from_filter}, + test_utils, }; use dozer_types::{serde_json::Value, types::Field}; @@ -13,13 +11,20 @@ use dozer_types::{serde_json::Value, types::Field}; fn test_generate_plan_simple() { let (schema, secondary_indexes) = test_utils::schema_0(); - let query = query_from_filter(FilterExpression::Simple( + let filter = FilterExpression::Simple( "foo".to_string(), expression::Operator::EQ, Value::from("bar".to_string()), - )); - let planner = QueryPlanner::new(&schema, &secondary_indexes, &query); - if let Plan::IndexScans(index_scans) = planner.plan().unwrap() { + ); + let plan = QueryPlanner::new( + &schema, + &secondary_indexes, + Some(&filter), + &Default::default(), + ) + .plan() + .unwrap(); + if let Plan::IndexScans(index_scans) = plan { assert_eq!(index_scans.len(), 1); assert_eq!(index_scans[0].index_id, 0); match &index_scans[0].kind { @@ -50,10 +55,16 @@ fn test_generate_plan_and() { Value::from("test".to_string()), ), ]); - let query = query_from_filter(filter); - let planner = QueryPlanner::new(&schema, &secondary_indexes, &query); + let plan = QueryPlanner::new( + &schema, + &secondary_indexes, + Some(&filter), + &Default::default(), + ) + .plan() + .unwrap(); // Pick the 3rd index - if let Plan::IndexScans(index_scans) = planner.plan().unwrap() { + if let Plan::IndexScans(index_scans) = plan { assert_eq!(index_scans.len(), 1); assert_eq!(index_scans[0].index_id, 3); match &index_scans[0].kind { @@ -77,16 +88,11 @@ fn test_generate_plan_and() { fn test_generate_plan_range_query_and_order_by() { let (schema, secondary_indexes) = test_utils::schema_1(); let filter = FilterExpression::Simple("c".into(), expression::Operator::GT, 1.into()); - let query = QueryExpression::new( - Some(filter), - vec![SortOption { - field_name: "c".into(), - direction: SortDirection::Descending, - }], - Some(10), - Skip::Skip(0), - ); - let planner = QueryPlanner::new(&schema, &secondary_indexes, &query); + let order_by = SortOptions(vec![SortOption { + field_name: "c".into(), + direction: SortDirection::Descending, + }]); + let planner = QueryPlanner::new(&schema, &secondary_indexes, Some(&filter), &order_by); if let Plan::IndexScans(index_scans) = planner.plan().unwrap() { assert_eq!(index_scans.len(), 1); assert_eq!(index_scans[0].index_id, 2); @@ -116,11 +122,14 @@ fn test_generate_plan_range_query_and_order_by() { fn test_generate_plan_empty() { let (schema, secondary_indexes) = test_utils::schema_1(); - let query = query_from_filter(FilterExpression::Simple( - "c".into(), - Operator::LT, - Value::Null, - )); - let planner = QueryPlanner::new(&schema, &secondary_indexes, &query); - assert!(matches!(planner.plan().unwrap(), Plan::ReturnEmpty)); + let filter = FilterExpression::Simple("c".into(), Operator::LT, Value::Null); + let plan = QueryPlanner::new( + &schema, + &secondary_indexes, + Some(&filter), + &Default::default(), + ) + .plan() + .unwrap(); + assert!(matches!(plan, Plan::ReturnEmpty)); } diff --git a/dozer-cache/src/cache/test_utils.rs b/dozer-cache/src/cache/test_utils.rs index 803bd10fb5..b0db04f682 100644 --- a/dozer-cache/src/cache/test_utils.rs +++ b/dozer-cache/src/cache/test_utils.rs @@ -1,10 +1,10 @@ use dozer_types::types::{ - FieldDefinition, IndexDefinition, Schema, SchemaIdentifier, SourceDefinition, + FieldDefinition, IndexDefinition, Schema, SchemaIdentifier, SchemaWithIndex, SourceDefinition, }; use super::expression::{FilterExpression, QueryExpression, Skip}; -pub fn schema_0() -> (Schema, Vec) { +pub fn schema_0() -> SchemaWithIndex { ( Schema { identifier: Some(SchemaIdentifier { id: 0, version: 1 }), @@ -20,7 +20,7 @@ pub fn schema_0() -> (Schema, Vec) { ) } -pub fn schema_1() -> (Schema, Vec) { +pub fn schema_1() -> SchemaWithIndex { ( Schema { identifier: Some(SchemaIdentifier { id: 1, version: 1 }), @@ -56,7 +56,7 @@ pub fn schema_1() -> (Schema, Vec) { ) } -pub fn schema_full_text() -> (Schema, Vec) { +pub fn schema_full_text() -> SchemaWithIndex { ( Schema { identifier: Some(SchemaIdentifier { id: 2, version: 1 }), @@ -81,7 +81,7 @@ pub fn schema_full_text() -> (Schema, Vec) { } // This is for testing appending only schema, which doesn't need a primary index, for example, eth logs. -pub fn schema_empty_primary_index() -> (Schema, Vec) { +pub fn schema_empty_primary_index() -> SchemaWithIndex { ( Schema { identifier: Some(SchemaIdentifier { id: 3, version: 1 }), @@ -97,7 +97,7 @@ pub fn schema_empty_primary_index() -> (Schema, Vec) { ) } -pub fn schema_multi_indices() -> (Schema, Vec) { +pub fn schema_multi_indices() -> SchemaWithIndex { ( Schema { identifier: Some(SchemaIdentifier { id: 4, version: 1 }), diff --git a/dozer-cache/src/errors.rs b/dozer-cache/src/errors.rs index 18228b6158..8c497dc791 100644 --- a/dozer-cache/src/errors.rs +++ b/dozer-cache/src/errors.rs @@ -1,13 +1,15 @@ +use std::path::PathBuf; + use dozer_types::thiserror; use dozer_types::thiserror::Error; use dozer_types::errors::types::{DeserializationError, SerializationError, TypeError}; -use dozer_types::types::SchemaIdentifier; +use dozer_types::types::{IndexDefinition, SchemaWithIndex}; #[derive(Error, Debug)] pub enum CacheError { - #[error("Io error: {0}")] - Io(#[from] std::io::Error), + #[error("Io error on {0:?}: {1}")] + Io(PathBuf, #[source] std::io::Error), #[error("Query error: {0}")] Query(#[from] QueryError), #[error("Index error: {0}")] @@ -18,14 +20,24 @@ pub enum CacheError { Type(#[from] TypeError), #[error("Storage error: {0}")] Storage(#[from] dozer_storage::errors::StorageError), - #[error("Schema is not present")] + #[error("Schema is not found")] SchemaNotFound, - #[error("Schema Identifier is duplicated: {0:?}")] - DuplicateSchemaIdentifier(SchemaIdentifier), + #[error("Schema for {name} mismatch: given {given:?}, stored {stored:?}")] + SchemaMismatch { + name: String, + given: Box, + stored: Box, + }, + #[error("Index definition {0} is not found")] + IndexDefinitionNotFound(String), + #[error("Index definition {name} mismatch: given {given:?}, stored {stored:?}")] + IndexDefinitionMismatch { + name: String, + given: IndexDefinition, + stored: IndexDefinition, + }, #[error("Path not initialized for Cache Reader")] PathNotInitialized, - #[error("Secondary index database is not found")] - SecondaryIndexDatabaseNotFound, #[error("Primary key is not found")] PrimaryKeyNotFound, #[error("Primary key already exists")] diff --git a/dozer-cache/src/reader.rs b/dozer-cache/src/reader.rs index de48007667..f04f190265 100644 --- a/dozer-cache/src/reader.rs +++ b/dozer-cache/src/reader.rs @@ -4,7 +4,7 @@ use super::cache::expression::FilterExpression; use crate::errors::CacheError; use dozer_types::{ serde, - types::{IndexDefinition, Record, Schema}, + types::{Record, SchemaWithIndex}, }; use serde::{Deserialize, Serialize}; @@ -36,7 +36,7 @@ impl CacheReader { Ok(()) } - pub fn get_schema(&self) -> Result<&(Schema, Vec), CacheError> { + pub fn get_schema(&self) -> &SchemaWithIndex { self.cache.get_schema() } diff --git a/dozer-core/src/dag_metadata.rs b/dozer-core/src/dag_metadata.rs index 27b4d23f41..4b92f9b2a6 100644 --- a/dozer-core/src/dag_metadata.rs +++ b/dozer-core/src/dag_metadata.rs @@ -10,7 +10,8 @@ use dozer_storage::errors::StorageError; use dozer_storage::errors::StorageError::{DeserializationError, SerializationError}; use dozer_storage::lmdb::{Database, DatabaseFlags}; use dozer_storage::lmdb_storage::{ - LmdbEnvironmentManager, LmdbEnvironmentOptions, LmdbExclusiveTransaction, SharedTransaction, + CreateDatabase, LmdbEnvironmentManager, LmdbEnvironmentOptions, LmdbExclusiveTransaction, + SharedTransaction, }; use dozer_types::bincode; use dozer_types::log::debug; diff --git a/dozer-core/src/executor/execution_dag.rs b/dozer-core/src/executor/execution_dag.rs index cc30aff5ef..fe32a7c3b8 100644 --- a/dozer-core/src/executor/execution_dag.rs +++ b/dozer-core/src/executor/execution_dag.rs @@ -166,7 +166,6 @@ impl ExecutionDag { (senders, record_writers) } - #[allow(clippy::type_complexity)] pub fn collect_receivers( &mut self, node_index: daggy::NodeIndex, diff --git a/dozer-core/src/record_store.rs b/dozer-core/src/record_store.rs index 122236969d..fe858f6ee4 100644 --- a/dozer-core/src/record_store.rs +++ b/dozer-core/src/record_store.rs @@ -18,7 +18,6 @@ impl Debug for dyn RecordWriter { } } -#[allow(clippy::type_complexity)] pub fn create_record_store( _output_port: PortHandle, output_port_type: OutputPortType, diff --git a/dozer-orchestrator/src/pipeline/sinks.rs b/dozer-orchestrator/src/pipeline/sinks.rs index ad103ed38a..24bc98612d 100644 --- a/dozer-orchestrator/src/pipeline/sinks.rs +++ b/dozer-orchestrator/src/pipeline/sinks.rs @@ -16,9 +16,8 @@ use dozer_types::log::{debug, info}; use dozer_types::models::api_endpoint::{ApiEndpoint, ApiIndex}; use dozer_types::models::api_security::ApiSecurity; use dozer_types::models::flags::Flags; - use dozer_types::tracing::span; -use dozer_types::types::FieldType; +use dozer_types::types::{FieldType, SchemaWithIndex}; use dozer_types::types::{IndexDefinition, Operation, Schema, SchemaIdentifier}; use std::collections::HashMap; use std::path::PathBuf; @@ -88,7 +87,7 @@ impl CacheSinkFactory { fn get_output_schema( &self, mut input_schemas: HashMap, - ) -> Result<(Schema, Vec), ExecutionError> { + ) -> Result { debug_assert!(input_schemas.len() == 1); let mut schema = input_schemas .remove(&DEFAULT_PORT_HANDLE) @@ -359,11 +358,7 @@ impl Sink for CacheSink { let _enter = span.enter(); let endpoint_name = &self.api_endpoint.name; - let schema = &self - .cache - .get_schema() - .map_err(|_| ExecutionError::SchemaNotInitialized)? - .0; + let schema = &self.cache.get_schema().0; match op { Operation::Delete { mut old } => { diff --git a/dozer-storage/src/lib.rs b/dozer-storage/src/lib.rs index 5aafab8451..9107bbd334 100644 --- a/dozer-storage/src/lib.rs +++ b/dozer-storage/src/lib.rs @@ -15,6 +15,10 @@ mod lmdb_set; pub use lmdb_set::LmdbSet; mod lmdb_counter; pub use lmdb_counter::LmdbCounter; +mod lmdb_transaction; +pub use lmdb_transaction::{BeginTransaction, ReadTransaction}; +mod lmdb_option; +pub use lmdb_option::LmdbOption; #[cfg(test)] mod tests; diff --git a/dozer-storage/src/lmdb_counter.rs b/dozer-storage/src/lmdb_counter.rs index bf5e05b92c..1ab0762e6d 100644 --- a/dozer-storage/src/lmdb_counter.rs +++ b/dozer-storage/src/lmdb_counter.rs @@ -1,42 +1,28 @@ use dozer_types::borrow::IntoOwned; use lmdb::{RwTransaction, Transaction}; -use crate::{ - errors::StorageError, - lmdb_storage::{LmdbEnvironmentManager, LmdbExclusiveTransaction}, - LmdbMap, -}; +use crate::{errors::StorageError, lmdb_storage::CreateDatabase, LmdbOption}; -const COUNTER_KEY: u8 = 0; - -#[derive(Debug)] -pub struct LmdbCounter(LmdbMap); +#[derive(Debug, Clone, Copy)] +pub struct LmdbCounter(LmdbOption); impl LmdbCounter { - pub fn new_from_env( - env: &mut LmdbEnvironmentManager, - name: Option<&str>, - create_if_not_exist: bool, - ) -> Result { - LmdbMap::new_from_env(env, name, create_if_not_exist).map(Self) - } - - pub fn new_from_txn( - txn: &mut LmdbExclusiveTransaction, + pub fn new( + c: &mut C, name: Option<&str>, create_if_not_exist: bool, ) -> Result { - LmdbMap::new_from_txn(txn, name, create_if_not_exist).map(Self) + LmdbOption::new(c, name, create_if_not_exist).map(Self) } pub fn load(&self, txn: &impl Transaction) -> Result { self.0 - .get(txn, &COUNTER_KEY) + .load(txn) .map(|value| value.map_or(0, IntoOwned::into_owned)) } pub fn store(&self, txn: &mut RwTransaction, value: u64) -> Result<(), StorageError> { - self.0.insert_overwrite(txn, &COUNTER_KEY, &value) + self.0.store(txn, &value) } pub fn fetch_add(&self, txn: &mut RwTransaction, value: u64) -> Result { @@ -64,7 +50,7 @@ mod tests { LmdbEnvironmentOptions::default(), ) .unwrap(); - let counter = LmdbCounter::new_from_env(&mut env, None, true).unwrap(); + let counter = LmdbCounter::new(&mut env, None, true).unwrap(); let txn = env.create_txn().unwrap(); let mut txn = txn.write(); diff --git a/dozer-storage/src/lmdb_database/lmdb_val.rs b/dozer-storage/src/lmdb_database/lmdb_val.rs index 5502fe19a7..77b075dd34 100644 --- a/dozer-storage/src/lmdb_database/lmdb_val.rs +++ b/dozer-storage/src/lmdb_database/lmdb_val.rs @@ -1,6 +1,6 @@ use dozer_types::{ borrow::{Borrow, Cow}, - types::{IndexDefinition, Record, Schema}, + types::{IndexDefinition, Record, SchemaWithIndex}, }; use crate::errors::StorageError; @@ -215,33 +215,61 @@ unsafe impl LmdbKey for Record { unsafe impl LmdbVal for Record {} -impl<'a> Encode<'a> for &'a (Schema, Vec) { +impl<'a> Encode<'a> for &'a IndexDefinition { fn encode(self) -> Result, StorageError> { dozer_types::bincode::serialize(self) .map(Encoded::Vec) .map_err(|e| StorageError::SerializationError { - typ: "(Schema, Vec)", + typ: "IndexDefinition", reason: Box::new(e), }) } } -impl BorrowEncode for (Schema, Vec) { - type Encode<'a> = &'a (Schema, Vec); +impl BorrowEncode for IndexDefinition { + type Encode<'a> = &'a IndexDefinition; } -impl Decode for (Schema, Vec) { +impl Decode for IndexDefinition { fn decode(bytes: &[u8]) -> Result, StorageError> { dozer_types::bincode::deserialize(bytes) .map(Cow::Owned) .map_err(|e| StorageError::DeserializationError { - typ: "(Schema, Vec)", + typ: "IndexDefinition", reason: Box::new(e), }) } } -unsafe impl LmdbVal for (Schema, Vec) {} +unsafe impl LmdbVal for IndexDefinition {} + +impl<'a> Encode<'a> for &'a SchemaWithIndex { + fn encode(self) -> Result, StorageError> { + dozer_types::bincode::serialize(self) + .map(Encoded::Vec) + .map_err(|e| StorageError::SerializationError { + typ: "SchemaWithIndex", + reason: Box::new(e), + }) + } +} + +impl BorrowEncode for SchemaWithIndex { + type Encode<'a> = &'a SchemaWithIndex; +} + +impl Decode for SchemaWithIndex { + fn decode(bytes: &[u8]) -> Result, StorageError> { + dozer_types::bincode::deserialize(bytes) + .map(Cow::Owned) + .map_err(|e| StorageError::DeserializationError { + typ: "SchemaWithIndex", + reason: Box::new(e), + }) + } +} + +unsafe impl LmdbVal for SchemaWithIndex {} #[cfg(test)] mod tests { diff --git a/dozer-storage/src/lmdb_database/raw_iterator.rs b/dozer-storage/src/lmdb_database/raw_iterator.rs index 70235c1509..dcb4846681 100644 --- a/dozer-storage/src/lmdb_database/raw_iterator.rs +++ b/dozer-storage/src/lmdb_database/raw_iterator.rs @@ -155,7 +155,9 @@ mod tests { use lmdb::{Database, DatabaseFlags, Transaction, WriteFlags}; use tempdir::TempDir; - use crate::lmdb_storage::{LmdbEnvironmentManager, LmdbEnvironmentOptions, SharedTransaction}; + use crate::lmdb_storage::{ + CreateDatabase, LmdbEnvironmentManager, LmdbEnvironmentOptions, SharedTransaction, + }; use super::*; diff --git a/dozer-storage/src/lmdb_map.rs b/dozer-storage/src/lmdb_map.rs index 6dfe01eb97..82bf71a328 100644 --- a/dozer-storage/src/lmdb_map.rs +++ b/dozer-storage/src/lmdb_map.rs @@ -4,9 +4,8 @@ use dozer_types::borrow::Cow; use lmdb::{Database, DatabaseFlags, RoCursor, RwTransaction, Transaction, WriteFlags}; use crate::{ - errors::StorageError, - lmdb_storage::{LmdbEnvironmentManager, LmdbExclusiveTransaction}, - Encode, Iterator, KeyIterator, LmdbKey, LmdbKeyType, LmdbVal, ValueIterator, + errors::StorageError, lmdb_storage::CreateDatabase, Encode, Iterator, KeyIterator, LmdbKey, + LmdbKeyType, LmdbVal, ValueIterator, }; #[derive(Debug)] @@ -33,8 +32,8 @@ unsafe impl Send for LmdbMap {} unsafe impl Sync for LmdbMap {} impl LmdbMap { - pub fn new_from_env( - env: &mut LmdbEnvironmentManager, + pub fn new( + c: &mut C, name: Option<&str>, create_if_not_exist: bool, ) -> Result { @@ -44,27 +43,7 @@ impl LmdbMap { None }; - let db = env.create_database(name, create_flags)?; - - Ok(Self { - db, - _key: std::marker::PhantomData, - _value: std::marker::PhantomData, - }) - } - - pub fn new_from_txn( - txn: &mut LmdbExclusiveTransaction, - name: Option<&str>, - create_if_not_exist: bool, - ) -> Result { - let create_flags = if create_if_not_exist { - Some(database_key_flag::()) - } else { - None - }; - - let db = txn.create_database(name, create_flags)?; + let db = c.create_database(name, create_flags)?; Ok(Self { db, @@ -221,16 +200,17 @@ mod tests { #[test] fn test_lmdb_map() { let temp_dir = TempDir::new("test_lmdb_map").unwrap(); - let env = LmdbEnvironmentManager::create( + let mut env = LmdbEnvironmentManager::create( temp_dir.path(), "env", LmdbEnvironmentOptions::default(), ) .unwrap(); + let map = LmdbMap::, Vec>::new(&mut env, None, true).unwrap(); + let txn = env.create_txn().unwrap(); let mut txn = txn.write(); - let map = LmdbMap::, Vec>::new_from_txn(&mut txn, None, true).unwrap(); assert_eq!(map.count(txn.txn()).unwrap(), 0); assert!(map diff --git a/dozer-storage/src/lmdb_multimap.rs b/dozer-storage/src/lmdb_multimap.rs index 81014a1d55..3331beec00 100644 --- a/dozer-storage/src/lmdb_multimap.rs +++ b/dozer-storage/src/lmdb_multimap.rs @@ -5,7 +5,7 @@ use lmdb::{Database, DatabaseFlags, RoCursor, RwTransaction, Transaction, WriteF use crate::{ errors::StorageError, lmdb_map::{database_key_flag, lmdb_stat}, - lmdb_storage::{LmdbEnvironmentManager, LmdbExclusiveTransaction}, + lmdb_storage::CreateDatabase, Encode, Iterator, LmdbKey, LmdbKeyType, }; @@ -33,8 +33,8 @@ unsafe impl Send for LmdbMultimap {} unsafe impl Sync for LmdbMultimap {} impl LmdbMultimap { - pub fn new_from_env( - env: &mut LmdbEnvironmentManager, + pub fn new( + c: &mut C, name: Option<&str>, create_if_not_exist: bool, ) -> Result { @@ -44,27 +44,7 @@ impl LmdbMultimap { None }; - let db = env.create_database(name, create_flags)?; - - Ok(Self { - db, - _key: std::marker::PhantomData, - _value: std::marker::PhantomData, - }) - } - - pub fn new_from_txn( - txn: &mut LmdbExclusiveTransaction, - name: Option<&str>, - create_if_not_exist: bool, - ) -> Result { - let create_flags = if create_if_not_exist { - Some(database_flag::()) - } else { - None - }; - - let db = txn.create_database(name, create_flags)?; + let db = c.create_database(name, create_flags)?; Ok(Self { db, @@ -158,16 +138,17 @@ mod tests { #[test] fn test_lmdb_multimap() { let temp_dir = TempDir::new("test_lmdb_map").unwrap(); - let env = LmdbEnvironmentManager::create( + let mut env = LmdbEnvironmentManager::create( temp_dir.path(), "env", LmdbEnvironmentOptions::default(), ) .unwrap(); + let map = LmdbMultimap::::new(&mut env, None, true).unwrap(); + let txn = env.create_txn().unwrap(); let mut txn = txn.write(); - let map = LmdbMultimap::::new_from_txn(&mut txn, None, true).unwrap(); assert!(map.insert(txn.txn_mut(), &1u64, &2u64).unwrap()); assert!(!map.insert(txn.txn_mut(), &1u64, &2u64).unwrap()); assert!(map.insert(txn.txn_mut(), &1u64, &3u64).unwrap()); diff --git a/dozer-storage/src/lmdb_option.rs b/dozer-storage/src/lmdb_option.rs new file mode 100644 index 0000000000..c6ae2d8bec --- /dev/null +++ b/dozer-storage/src/lmdb_option.rs @@ -0,0 +1,39 @@ +use dozer_types::borrow::Cow; +use lmdb::{RwTransaction, Transaction}; + +use crate::{errors::StorageError, lmdb_storage::CreateDatabase, LmdbMap, LmdbVal}; + +#[derive(Debug)] +pub struct LmdbOption(LmdbMap); + +impl Clone for LmdbOption { + fn clone(&self) -> Self { + Self(self.0) + } +} + +impl Copy for LmdbOption {} + +impl LmdbOption { + pub fn new( + c: &mut C, + name: Option<&str>, + create_if_not_exist: bool, + ) -> Result { + let map = LmdbMap::new(c, name, create_if_not_exist)?; + Ok(Self(map)) + } + + pub fn load<'txn, T: Transaction>( + &self, + txn: &'txn T, + ) -> Result>, StorageError> { + self.0.get(txn, &KEY) + } + + pub fn store(&self, txn: &mut RwTransaction, value: V::Encode<'_>) -> Result<(), StorageError> { + self.0.insert_overwrite(txn, &KEY, value) + } +} + +const KEY: u8 = 0; diff --git a/dozer-storage/src/lmdb_set.rs b/dozer-storage/src/lmdb_set.rs index 95ff711c6d..e6aaae828b 100644 --- a/dozer-storage/src/lmdb_set.rs +++ b/dozer-storage/src/lmdb_set.rs @@ -1,10 +1,6 @@ use lmdb::{RoCursor, RwTransaction, Transaction}; -use crate::{ - errors::StorageError, - lmdb_storage::{LmdbEnvironmentManager, LmdbExclusiveTransaction}, - KeyIterator, LmdbKey, LmdbMap, -}; +use crate::{errors::StorageError, lmdb_storage::CreateDatabase, KeyIterator, LmdbKey, LmdbMap}; #[derive(Debug)] pub struct LmdbSet(LmdbMap>); @@ -18,20 +14,12 @@ impl Clone for LmdbSet { impl Copy for LmdbSet {} impl LmdbSet { - pub fn new_from_env( - env: &mut LmdbEnvironmentManager, + pub fn new( + c: &mut C, name: Option<&str>, create_if_not_exist: bool, ) -> Result { - LmdbMap::new_from_env(env, name, create_if_not_exist).map(Self) - } - - pub fn new_from_txn( - txn: &mut LmdbExclusiveTransaction, - name: Option<&str>, - create_if_not_exist: bool, - ) -> Result { - LmdbMap::new_from_txn(txn, name, create_if_not_exist).map(Self) + LmdbMap::new(c, name, create_if_not_exist).map(Self) } pub fn count(&self, txn: &T) -> Result { @@ -108,7 +96,7 @@ mod tests { LmdbEnvironmentOptions::default(), ) .unwrap(); - let set = LmdbSet::::new_from_env(&mut env, Some("test"), true).unwrap(); + let set = LmdbSet::::new(&mut env, Some("test"), true).unwrap(); let txn = env.create_txn().unwrap(); let mut txn = txn.write(); diff --git a/dozer-storage/src/lmdb_storage.rs b/dozer-storage/src/lmdb_storage.rs index 1a2031fe32..68f9f4ab74 100644 --- a/dozer-storage/src/lmdb_storage.rs +++ b/dozer-storage/src/lmdb_storage.rs @@ -47,6 +47,14 @@ impl Default for LmdbEnvironmentOptions { } } +pub trait CreateDatabase { + fn create_database( + &mut self, + name: Option<&str>, + create_flags: Option, + ) -> Result; +} + #[derive(Debug)] /// This is a safe wrapper around `lmdb::Environment` that is opened with `NO_TLS` and `NO_LOCK`. /// @@ -55,6 +63,21 @@ pub struct LmdbEnvironmentManager { inner: Environment, } +impl CreateDatabase for LmdbEnvironmentManager { + /// Opens a database, creating it if it doesn't exist and `create_flags` is `Some`. + fn create_database( + &mut self, + name: Option<&str>, + create_flags: Option, + ) -> Result { + if let Some(flags) = create_flags { + Ok(self.inner.create_db(name, flags)?) + } else { + Ok(self.inner.open_db(name)?) + } + } +} + impl LmdbEnvironmentManager { pub fn exists(path: &Path, name: &str) -> bool { let full_path = path.join(Path::new(name)); @@ -94,18 +117,6 @@ impl LmdbEnvironmentManager { )))) } - pub fn create_database( - &mut self, - name: Option<&str>, - create_flags: Option, - ) -> Result { - if let Some(flags) = create_flags { - Ok(self.inner.create_db(name, flags)?) - } else { - Ok(self.inner.open_db(name)?) - } - } - pub fn begin_ro_txn(&self) -> Result { Ok(self.inner.begin_ro_txn()?) } @@ -149,6 +160,26 @@ pub struct LmdbExclusiveTransaction { const PANIC_MESSAGE: &str = "LmdbExclusiveTransaction cannot be used after `commit_and_renew` fails."; +impl CreateDatabase for LmdbExclusiveTransaction { + /// If this method fails, following calls to `self` will panic. + fn create_database( + &mut self, + name: Option<&str>, + create_flags: Option, + ) -> Result { + // SAFETY: This transaction is exclusive and commits immediately. + let db = unsafe { + if let Some(flags) = create_flags { + self.txn_mut().create_db(name, flags)? + } else { + self.txn_mut().open_db(name)? + } + }; + self.commit_and_renew()?; + Ok(db) + } +} + impl LmdbExclusiveTransaction { pub fn new(env: Environment) -> Result { let inner = env.begin_rw_txn()?; @@ -175,25 +206,6 @@ impl LmdbExclusiveTransaction { Ok(()) } - /// Opens a database, creating it if it doesn't exist and `create_flags` is `Some`. - /// If this method fails, following calls to `self` will panic. - pub fn create_database( - &mut self, - name: Option<&str>, - create_flags: Option, - ) -> Result { - // SAFETY: This transaction is exclusive and commits immediately. - let db = unsafe { - if let Some(flags) = create_flags { - self.txn_mut().create_db(name, flags)? - } else { - self.txn_mut().open_db(name)? - } - }; - self.commit_and_renew()?; - Ok(db) - } - pub fn txn(&self) -> &RwTransaction { self.inner.as_ref().expect(PANIC_MESSAGE) } diff --git a/dozer-storage/src/lmdb_transaction.rs b/dozer-storage/src/lmdb_transaction.rs new file mode 100644 index 0000000000..5cc333a6e3 --- /dev/null +++ b/dozer-storage/src/lmdb_transaction.rs @@ -0,0 +1,41 @@ +use dozer_types::parking_lot::RwLockReadGuard; +use lmdb::{RoTransaction, Transaction}; + +use crate::{ + errors::StorageError, + lmdb_storage::{LmdbEnvironmentManager, LmdbExclusiveTransaction, SharedTransaction}, +}; + +pub struct ReadTransaction<'a>(RwLockReadGuard<'a, LmdbExclusiveTransaction>); + +impl<'a> Transaction for ReadTransaction<'a> { + fn txn(&self) -> *mut lmdb_sys::MDB_txn { + self.0.txn().txn() + } +} + +/// This trait abstracts the behavior of locking a `SharedTransaction` for reading +/// and beginning a `RoTransaction` from `LmdbEnvironmentManager`. +pub trait BeginTransaction { + type Transaction<'a>: Transaction + where + Self: 'a; + + fn begin_txn(&self) -> Result, StorageError>; +} + +impl BeginTransaction for SharedTransaction { + type Transaction<'a> = ReadTransaction<'a> where Self: 'a; + + fn begin_txn(&self) -> Result, StorageError> { + Ok(ReadTransaction(self.read())) + } +} + +impl BeginTransaction for LmdbEnvironmentManager { + type Transaction<'a> = RoTransaction<'a> where Self: 'a; + + fn begin_txn(&self) -> Result, StorageError> { + self.begin_ro_txn() + } +} diff --git a/dozer-types/src/borrow/mod.rs b/dozer-types/src/borrow/mod.rs index b86c5d72f5..bbcaf90e1a 100644 --- a/dozer-types/src/borrow/mod.rs +++ b/dozer-types/src/borrow/mod.rs @@ -1,4 +1,4 @@ -use crate::types::{IndexDefinition, Record, Schema}; +use crate::types::{IndexDefinition, Record, SchemaWithIndex}; pub trait IntoOwned { fn into_owned(self) -> Owned; @@ -75,7 +75,7 @@ macro_rules! impl_borrow_for_clone_type { }; } -impl_borrow_for_clone_type!(u8, u32, u64, Record, (Schema, Vec)); +impl_borrow_for_clone_type!(u8, u32, u64, Record, IndexDefinition, SchemaWithIndex); pub enum Cow<'a, B: Borrow + 'a> { Borrowed(B::Borrowed<'a>), diff --git a/dozer-types/src/types/mod.rs b/dozer-types/src/types/mod.rs index fea2fdcfd3..c347b4572a 100644 --- a/dozer-types/src/types/mod.rs +++ b/dozer-types/src/types/mod.rs @@ -164,6 +164,8 @@ pub enum IndexDefinition { FullText(usize), } +pub type SchemaWithIndex = (Schema, Vec); + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub struct Record { /// Schema implemented by this Record