Skip to content

Commit

Permalink
Add interner lifetime
Browse files Browse the repository at this point in the history
This commit adds 'i lifetime that can be used by backends to return &'i
str when it's known that the backing string storage won't be moved.

This is the case for fixed capacity pool-like backends and backends
which avoid backing buffer moves via double indirection.

Signed-off-by: Tin Švagelj <[email protected]>
  • Loading branch information
Caellian committed Nov 19, 2024
1 parent 07c468f commit 598d4a2
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 139 deletions.
44 changes: 24 additions & 20 deletions src/backend/bucket/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod fixed_str;
mod interned_str;

use self::{fixed_str::FixedString, interned_str::InternedStr};
use super::Backend;
use super::{Backend, PhantomBackend};
use crate::{symbol::expect_valid_symbol, DefaultSymbol, Symbol};
use alloc::{string::String, vec::Vec};
use core::{iter::Enumerate, marker::PhantomData, slice};
Expand Down Expand Up @@ -43,28 +43,28 @@ use core::{iter::Enumerate, marker::PhantomData, slice};
/// | Contiguous | **yes** |
/// | Iteration | **best** |
#[derive(Debug)]
pub struct BucketBackend<S = DefaultSymbol> {
pub struct BucketBackend<'i, S: Symbol = DefaultSymbol> {
spans: Vec<InternedStr>,
head: FixedString,
full: Vec<String>,
marker: PhantomData<fn() -> S>,
marker: PhantomBackend<'i, Self>,
}

/// # Safety
///
/// The bucket backend requires a manual [`Send`] impl because it is self
/// referential. When cloning a bucket backend a deep clone is performed and
/// all references to itself are updated for the clone.
unsafe impl<S> Send for BucketBackend<S> where S: Symbol {}
unsafe impl<'i, S> Send for BucketBackend<'i, S> where S: Symbol {}

/// # Safety
///
/// The bucket backend requires a manual [`Send`] impl because it is self
/// referential. Those references won't escape its own scope and also
/// the bucket backend has no interior mutability.
unsafe impl<S> Sync for BucketBackend<S> where S: Symbol {}
unsafe impl<'i, S> Sync for BucketBackend<'i, S> where S: Symbol {}

impl<S> Default for BucketBackend<S> {
impl<'i, S: Symbol> Default for BucketBackend<'i, S> {
#[cfg_attr(feature = "inline-more", inline)]
fn default() -> Self {
Self {
Expand All @@ -76,10 +76,14 @@ impl<S> Default for BucketBackend<S> {
}
}

impl<S> Backend for BucketBackend<S>
impl<'i, S> Backend<'i> for BucketBackend<'i, S>
where
S: Symbol,
{
type Access<'local> = &'local str
where
Self: 'local,
'i: 'local;
type Symbol = S;
type Iter<'a>
= Iter<'a, S>
Expand Down Expand Up @@ -136,7 +140,7 @@ where
}
}

impl<S> BucketBackend<S>
impl<'i, S> BucketBackend<'i, S>
where
S: Symbol,
{
Expand Down Expand Up @@ -167,7 +171,7 @@ where
}
}

impl<S> Clone for BucketBackend<S> {
impl<'i, S: Symbol> Clone for BucketBackend<'i, S> {
fn clone(&self) -> Self {
// For performance reasons we copy all cloned strings into a single cloned
// head string leaving the cloned `full` empty.
Expand All @@ -191,9 +195,9 @@ impl<S> Clone for BucketBackend<S> {
}
}

impl<S> Eq for BucketBackend<S> where S: Symbol {}
impl<'i, S> Eq for BucketBackend<'i, S> where S: Symbol {}

impl<S> PartialEq for BucketBackend<S>
impl<'i, S> PartialEq for BucketBackend<'i, S>
where
S: Symbol,
{
Expand All @@ -203,39 +207,39 @@ where
}
}

impl<'a, S> IntoIterator for &'a BucketBackend<S>
impl<'i, 'l, S> IntoIterator for &'l BucketBackend<'i, S>
where
S: Symbol,
{
type Item = (S, &'a str);
type IntoIter = Iter<'a, S>;
type Item = (S, &'l str);
type IntoIter = Iter<'l, S>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

pub struct Iter<'a, S> {
iter: Enumerate<slice::Iter<'a, InternedStr>>,
pub struct Iter<'l, S> {
iter: Enumerate<slice::Iter<'l, InternedStr>>,
symbol_marker: PhantomData<fn() -> S>,
}

impl<'a, S> Iter<'a, S> {
impl<'i, 'l, S: Symbol> Iter<'l, S> {
#[cfg_attr(feature = "inline-more", inline)]
pub fn new(backend: &'a BucketBackend<S>) -> Self {
pub fn new(backend: &'l BucketBackend<'i, S>) -> Self {
Self {
iter: backend.spans.iter().enumerate(),
symbol_marker: Default::default(),
}
}
}

impl<'a, S> Iterator for Iter<'a, S>
impl<'l, S> Iterator for Iter<'l, S>
where
S: Symbol,
{
type Item = (S, &'a str);
type Item = (S, &'l str);

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
Expand Down
50 changes: 27 additions & 23 deletions src/backend/buffer.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#![cfg(feature = "backends")]

use super::Backend;
use super::{Backend, PhantomBackend};
use crate::{symbol::expect_valid_symbol, DefaultSymbol, Symbol};
use alloc::vec::Vec;
use core::{marker::PhantomData, mem, str};
use core::{mem, str};

/// An interner backend that appends all interned string information in a single buffer.
///
Expand Down Expand Up @@ -34,13 +34,13 @@ use core::{marker::PhantomData, mem, str};
/// | Contiguous | **no** |
/// | Iteration | **bad** |
#[derive(Debug)]
pub struct BufferBackend<S = DefaultSymbol> {
pub struct BufferBackend<'i, S: Symbol = DefaultSymbol> {
len_strings: usize,
buffer: Vec<u8>,
marker: PhantomData<fn() -> S>,
marker: PhantomBackend<'i, Self>,
}

impl<S> PartialEq for BufferBackend<S>
impl<'i, S> PartialEq for BufferBackend<'i, S>
where
S: Symbol,
{
Expand All @@ -49,9 +49,9 @@ where
}
}

impl<S> Eq for BufferBackend<S> where S: Symbol {}
impl<'i, S> Eq for BufferBackend<'i, S> where S: Symbol {}

impl<S> Clone for BufferBackend<S> {
impl<'i, S: Symbol> Clone for BufferBackend<'i, S> {
fn clone(&self) -> Self {
Self {
len_strings: self.len_strings,
Expand All @@ -61,7 +61,7 @@ impl<S> Clone for BufferBackend<S> {
}
}

impl<S> Default for BufferBackend<S> {
impl<'i, S: Symbol> Default for BufferBackend<'i, S> {
#[cfg_attr(feature = "inline-more", inline)]
fn default() -> Self {
Self {
Expand All @@ -72,7 +72,7 @@ impl<S> Default for BufferBackend<S> {
}
}

impl<S> BufferBackend<S>
impl<'i, S> BufferBackend<'i, S>
where
S: Symbol,
{
Expand Down Expand Up @@ -147,15 +147,19 @@ where
}
}

impl<S> Backend for BufferBackend<S>
impl<'i, S> Backend<'i> for BufferBackend<'i, S>
where
S: Symbol,
{
type Access<'l> = &'l str
where
Self: 'l;
type Symbol = S;
type Iter<'a>
= Iter<'a, S>
type Iter<'l>
= Iter<'i, 'l, S>
where
Self: 'a;
'i: 'l,
Self: 'l;

#[cfg_attr(feature = "inline-more", inline)]
fn with_capacity(capacity: usize) -> Self {
Expand Down Expand Up @@ -307,28 +311,28 @@ fn decode_var_usize_cold(buffer: &[u8]) -> Option<(usize, usize)> {
Some((result, i + 1))
}

impl<'a, S> IntoIterator for &'a BufferBackend<S>
impl<'i, 'l, S> IntoIterator for &'l BufferBackend<'i, S>
where
S: Symbol,
{
type Item = (S, &'a str);
type IntoIter = Iter<'a, S>;
type Item = (S, &'l str);
type IntoIter = Iter<'i, 'l, S>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

pub struct Iter<'a, S> {
backend: &'a BufferBackend<S>,
pub struct Iter<'i, 'l, S: Symbol> {
backend: &'l BufferBackend<'i, S>,
remaining: usize,
next: usize,
}

impl<'a, S> Iter<'a, S> {
impl<'i, 'l, S: Symbol> Iter<'i, 'l, S> {
#[cfg_attr(feature = "inline-more", inline)]
pub fn new(backend: &'a BufferBackend<S>) -> Self {
pub fn new(backend: &'l BufferBackend<'i, S>) -> Self {
Self {
backend,
remaining: backend.len_strings,
Expand All @@ -337,11 +341,11 @@ impl<'a, S> Iter<'a, S> {
}
}

impl<'a, S> Iterator for Iter<'a, S>
impl<'i, 'l, S> Iterator for Iter<'i, 'l, S>
where
S: Symbol,
{
type Item = (S, &'a str);
type Item = (S, &'l str);

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
Expand All @@ -365,7 +369,7 @@ where
}
}

impl<S> ExactSizeIterator for Iter<'_, S>
impl<'i, S> ExactSizeIterator for Iter<'i, '_, S>
where
S: Symbol,
{
Expand Down
34 changes: 28 additions & 6 deletions src/backend/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,43 @@ use crate::Symbol;

/// The default backend recommended for general use.
#[cfg(feature = "backends")]
pub type DefaultBackend = StringBackend<crate::DefaultSymbol>;
pub type DefaultBackend<'i> = StringBackend<'i, crate::DefaultSymbol>;

/// [`PhantomData`][std::marker::PhantomData] wrapper that describes how a [`Backend`]
/// implementor uses lifetime `'i` and [`B::Symbol`][Backend::Symbol].
#[allow(type_alias_bounds)] // included for clarity
type PhantomBackend<'i, B: Backend<'i>> = std::marker::PhantomData<
// 'i is invariant, Symbol is covariant + Send + Sync
(core::cell::Cell<&'i ()>, fn() -> <B as Backend<'i>>::Symbol)
>;

/// Types implementing this trait may act as backends for the string interner.
///
/// The job of a backend is to actually store, manage and organize the interned
/// strings. Different backends have different trade-offs. Users should pick
/// their backend with hinsight of their personal use-case.
pub trait Backend: Default {
pub trait Backend<'i>: Default {
/// The symbol used by the string interner backend.
type Symbol: Symbol;

/// Describes the lifetime of returned string.
///
/// If interned strings can move between insertion this type will be
/// `&'local str` - indicating that resolved `str` is only valid while
/// container isn't mutably accessed.
///
/// If interned strings can't move then this type is `&'container str`,
/// indicating that resolved `str` are valid for as long as interner exists.
type Access<'l>: AsRef<str>
where
Self: 'l,
'i: 'l;

/// The iterator over the symbols and their strings.
type Iter<'a>: Iterator<Item = (Self::Symbol, &'a str)>
type Iter<'l>: Iterator<Item = (Self::Symbol, Self::Access<'l>)>
where
Self: 'a;
'i: 'l,
Self: 'l;

/// Creates a new backend for the given capacity.
///
Expand Down Expand Up @@ -61,7 +83,7 @@ pub trait Backend: Default {
fn shrink_to_fit(&mut self);

/// Resolves the given symbol to its original string contents.
fn resolve(&self, symbol: Self::Symbol) -> Option<&str>;
fn resolve(&self, symbol: Self::Symbol) -> Option<Self::Access<'_>>;

/// Resolves the given symbol to its original string contents.
///
Expand All @@ -72,7 +94,7 @@ pub trait Backend: Default {
/// by the [`intern`](`Backend::intern`) or
/// [`intern_static`](`Backend::intern_static`) methods of the same
/// interner backend.
unsafe fn resolve_unchecked(&self, symbol: Self::Symbol) -> &str;
unsafe fn resolve_unchecked(&self, symbol: Self::Symbol) -> Self::Access<'_>;

/// Creates an iterator that yields all interned strings and their symbols.
fn iter(&self) -> Self::Iter<'_>;
Expand Down
Loading

0 comments on commit 598d4a2

Please sign in to comment.