From 9fa344a721a80119c5bae5f90cee39910898a70e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 19 Feb 2024 08:39:33 +0100 Subject: [PATCH] Improve the cookbook by talking about prefix codecs --- heed/src/cookbook.rs | 151 +++++++++++++++++++++++++++++++++++-------- lmdb-master-sys/lmdb | 2 +- 2 files changed, 124 insertions(+), 29 deletions(-) diff --git a/heed/src/cookbook.rs b/heed/src/cookbook.rs index 40e61c19..6eb3554a 100644 --- a/heed/src/cookbook.rs +++ b/heed/src/cookbook.rs @@ -1,53 +1,148 @@ //! A cookbook of examples on how to use heed. //! -//! # Implement a custom codec with `BytesEncode`/`BytesDecode` +//! # Create custom and prefix codecs //! //! With heed you can store any kind of data and serialize it the way you want. -//! To do so you'll need to create a codec by usin the [`BytesEncode`] and [`BytesDecode`] traits. +//! To do so you'll need to create a codec by using the [`BytesEncode`] and [`BytesDecode`] traits. +//! +//! Now imagine that your data is lexicographically well ordered. You can now leverage +//! the use of prefix codecs. Those are classic codecs but are only used to encode key prefixes. +//! +//! In this example we will store logs associated to a timestamp. By encoding the timestamp +//! in big endian we can create a prefix codec that restricts a subset of the data. It is recommended +//! to create codecs to encode prefixes when possible instead of using a slice of bytes. //! //! ``` //! use std::borrow::Cow; -//! use heed::{BoxedError, BytesEncode, BytesDecode}; +//! use std::error::Error; +//! use std::fs; +//! use std::path::Path; +//! +//! use heed::types::*; +//! use heed::{BoxedError, BytesDecode, BytesEncode, Database, EnvOpenOptions}; +//! +//! #[derive(Debug, PartialEq, Eq)] +//! pub enum Level { +//! Debug, +//! Warn, +//! Error, +//! } //! -//! pub enum MyCounter<'a> { -//! One, -//! Two, -//! WhatIsThat(&'a [u8]), +//! #[derive(Debug, PartialEq, Eq)] +//! pub struct LogKey { +//! timestamp: u32, +//! level: Level, //! } //! -//! pub struct MyCounterCodec; +//! pub struct LogKeyCodec; //! -//! impl<'a> BytesEncode<'a> for MyCounterCodec { -//! type EItem = MyCounter<'a>; +//! impl<'a> BytesEncode<'a> for LogKeyCodec { +//! type EItem = LogKey; +//! +//! /// Encodes the u32 timestamp in big endian followed by the log level with a single byte. +//! fn bytes_encode(log: &Self::EItem) -> Result, BoxedError> { +//! let (timestamp_bytes, level_byte) = match log { +//! LogKey { timestamp, level: Level::Debug } => (timestamp.to_be_bytes(), 0), +//! LogKey { timestamp, level: Level::Warn } => (timestamp.to_be_bytes(), 1), +//! LogKey { timestamp, level: Level::Error } => (timestamp.to_be_bytes(), 2), +//! }; //! -//! fn bytes_encode(my_counter: &Self::EItem) -> Result, BoxedError> { //! let mut output = Vec::new(); +//! output.extend_from_slice(×tamp_bytes); +//! output.push(level_byte); +//! Ok(Cow::Owned(output)) +//! } +//! } //! -//! match my_counter { -//! MyCounter::One => output.push(1), -//! MyCounter::Two => output.push(2), -//! MyCounter::WhatIsThat(bytes) => { -//! output.push(u8::MAX); -//! output.extend_from_slice(bytes); -//! }, -//! } +//! impl<'a> BytesDecode<'a> for LogKeyCodec { +//! type DItem = LogKey; //! -//! Ok(Cow::Owned(output)) +//! fn bytes_decode(bytes: &'a [u8]) -> Result { +//! use std::mem::size_of; +//! +//! let timestamp = match bytes.get(..size_of::()) { +//! Some(bytes) => bytes.try_into().map(u32::from_be_bytes).unwrap(), +//! None => return Err("invalid log key: cannot extract timestamp".into()), +//! }; +//! +//! let level = match bytes.get(size_of::()) { +//! Some(&0) => Level::Debug, +//! Some(&1) => Level::Warn, +//! Some(&2) => Level::Error, +//! Some(_) => return Err("invalid log key: invalid log level".into()), +//! None => return Err("invalid log key: cannot extract log level".into()), +//! }; +//! +//! Ok(LogKey { timestamp, level }) //! } //! } //! -//! impl<'a> BytesDecode<'a> for MyCounterCodec { -//! type DItem = MyCounter<'a>; +//! /// Encodes the high part of a timestamp. As it is located +//! /// at the start of the key it can be used to only return +//! /// the logs that appeared during a, rather long, period. +//! pub struct LogAtHalfTimestampCodec; +//! +//! impl<'a> BytesEncode<'a> for LogAtHalfTimestampCodec { +//! type EItem = u32; +//! +//! /// This method encodes only the prefix of the keys in this particular case, the timestamp. +//! fn bytes_encode(half_timestamp: &Self::EItem) -> Result, BoxedError> { +//! Ok(Cow::Owned(half_timestamp.to_be_bytes()[..2].to_vec())) +//! } +//! } +//! +//! impl<'a> BytesDecode<'a> for LogAtHalfTimestampCodec { +//! type DItem = LogKey; //! //! fn bytes_decode(bytes: &'a [u8]) -> Result { -//! match bytes[0] { -//! 1 => Ok(MyCounter::One), -//! 2 => Ok(MyCounter::One), -//! u8::MAX => Ok(MyCounter::WhatIsThat(&bytes[1..])), -//! _ => Err("invalid input".into()), -//! } +//! LogKeyCodec::bytes_decode(bytes) //! } //! } +//! +//! fn main() -> Result<(), Box> { +//! let path = Path::new("target").join("heed.mdb"); +//! +//! fs::create_dir_all(&path)?; +//! +//! let env = EnvOpenOptions::new() +//! .map_size(10 * 1024 * 1024) // 10MB +//! .max_dbs(3000) +//! .open(path)?; +//! +//! let mut wtxn = env.write_txn()?; +//! let db: Database = env.create_database(&mut wtxn, None)?; +//! +//! db.put( +//! &mut wtxn, +//! &LogKey { timestamp: 1608326232, level: Level::Debug }, +//! "this is a very old log", +//! )?; +//! db.put( +//! &mut wtxn, +//! &LogKey { timestamp: 1708326232, level: Level::Debug }, +//! "fibonacci was executed in 21ms", +//! )?; +//! db.put(&mut wtxn, &LogKey { timestamp: 1708326242, level: Level::Error }, "fibonacci crashed")?; +//! db.put( +//! &mut wtxn, +//! &LogKey { timestamp: 1708326272, level: Level::Warn }, +//! "fibonacci is running since 12s", +//! )?; +//! +//! // We change the way we want to read our database by changing the key codec. +//! // In this example we can prefix search only for the logs between a period of time +//! // (the two high bytes of the u32 timestamp). +//! let iter = db.remap_key_type::().prefix_iter(&wtxn, &1708326232)?; +//! +//! // As we filtered the log for a specific +//! // period of time we must not see the very old log. +//! for result in iter { +//! let (LogKey { timestamp: _, level: _ }, content) = result?; +//! assert_ne!(content, "this is a very old log"); +//! } +//! +//! Ok(()) +//! } //! ``` //! //! diff --git a/lmdb-master-sys/lmdb b/lmdb-master-sys/lmdb index b8e54b4c..3947014a 160000 --- a/lmdb-master-sys/lmdb +++ b/lmdb-master-sys/lmdb @@ -1 +1 @@ -Subproject commit b8e54b4c31378932b69f1298972de54a565185b1 +Subproject commit 3947014aed7ffe39a79991fa7fb5b234da47ad1a