Skip to content

Commit

Permalink
feat: prototyping final content/deser interface
Browse files Browse the repository at this point in the history
  • Loading branch information
leeola committed Apr 18, 2023
1 parent af81f6a commit 5e1dbf7
Show file tree
Hide file tree
Showing 23 changed files with 1,326 additions and 74 deletions.
28 changes: 28 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,5 @@ members = [
# "git_fixi",
]

[workspace.dependencies]
ambassador = "0.3.5"
4 changes: 2 additions & 2 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ where
self.meta
.set_head(
"local",
&*self.repo,
&*self.branch,
&self.repo,
&self.branch,
&self.replica_id,
log_head.clone(),
)
Expand Down
3 changes: 3 additions & 0 deletions fixity_store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ license-file = "LICENSE"

[features]
default = ["rkyv", "json"]
# Provide various test helpers or test focused implementations.
test = []
json = ["serde_json", "serde"]

[dependencies]
Expand All @@ -16,6 +18,7 @@ thiserror = "1.0"
anyhow = "1.0"
multihash = "0.16"
multibase = "0.9"
ambassador.workspace = true
# Feature: rkyv
rkyv = { version = "0.7", optional = true }
# Feature: json
Expand Down
72 changes: 57 additions & 15 deletions fixity_store/src/container.rs
Original file line number Diff line number Diff line change
@@ -1,32 +1,74 @@
use crate::{
contentid::NewContentId,
deser::{Deserialize, Serialize},
deser_store::DeserStore,
store::StoreError,
type_desc::{TypeDescription, ValueDesc},
Store,
};
use async_trait::async_trait;

#[async_trait]
pub trait NewContainer<'s, S>: Sized + Send + 's
where
S: Store,
{
async fn open(store: &'s S, cid: &S::Cid) -> Result<Self, StoreError>;
async fn save(&mut self, store: &'s S) -> Result<S::Cid, StoreError>;
async fn save_with_cids(
pub trait NewContainer<Deser, Cid: NewContentId>: Sized + Send + TypeDescription {
/// A description of the [de]serialized type(s) that this container manages.
///
/// Used to determine / validate Fixity repository types.
///
/// This is in contrast to the `Container: TypeDescription` bound for `Self`,
/// which describes the `Container` itself - which may or may not be what is written
/// to stores.
fn deser_type_desc() -> ValueDesc;
fn new_container<S: DeserStore<Deser, Cid>>(store: &S) -> Self;
async fn open<S: DeserStore<Deser, Cid>>(store: &S, cid: &Cid) -> Result<Self, StoreError>;
async fn save<S: DeserStore<Deser, Cid>>(&mut self, store: &S) -> Result<Cid, StoreError>;
async fn save_with_cids<S: DeserStore<Deser, Cid>>(
&mut self,
store: &S,
cids_buf: &mut Vec<S::Cid>,
cids_buf: &mut Vec<Cid>,
) -> Result<(), StoreError>;
async fn merge<S: DeserStore<Deser, Cid>>(
&mut self,
store: &S,
other: &Cid,
) -> Result<(), StoreError>;
async fn diff<S: DeserStore<Deser, Cid>>(
&mut self,
store: &S,
other: &Cid,
) -> Result<Self, StoreError>;
// TODO: Method to report contained Cids and/or Containers to allow correct syncing of a
// Container and all the cids within it.
}
#[async_trait]
pub trait ContainerRef<'s, S>: NewContainer<'s, S>
where
S: Store,
{
type Ref: TryInto<Self, Error = StoreError>;
async fn open_ref(store: &'s S, cid: &S::Cid) -> Result<Self::Ref, StoreError>;
pub trait ContainerRef<Deser, Cid: NewContentId>: NewContainer<Deser, Cid> {
type Ref: ContainerRefInto<Self>;
type DiffRef: ContainerRefInto<Self>;
async fn open_ref<S: DeserStore<Deser, Cid>>(
store: &S,
cid: &Cid,
) -> Result<Self::Ref, StoreError>;
async fn diff_ref<S: DeserStore<Deser, Cid>>(
&mut self,
store: &S,
other: &Cid,
) -> Result<Self::DiffRef, StoreError>;
}
// NIT: Infallible conversions were making `TryInto` awkward for `Ref` and `DiffRef` on
// `ContainerRef`, so this trait fills that role without the infallible issues.
// I must be misunderstanding how to deal with Infallible `TryInto`'s easily, while
// also putting bounds on the associated `TryInto::Error` type.
//
// Or perhaps it's just awkward because associated type bounds don't exist yet.
pub trait ContainerRefInto<Owned> {
type Error: Into<StoreError>;
fn container_ref_into(self) -> Result<Owned, Self::Error>;
}
impl<Owned> ContainerRefInto<Owned> for Owned {
type Error = StoreError;
fn container_ref_into(self) -> Result<Owned, Self::Error> {
Ok(self)
}
}

#[async_trait]
pub trait Container<'s, S>: Sized + Send + 's
where
Expand Down
24 changes: 22 additions & 2 deletions fixity_store/src/content_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,20 @@ pub enum ContentStoreError {
}
#[async_trait]
pub trait ContentStore<Cid: NewContentId>: Send + Sync {
// NIT: The conversion around the the generic byte types is .. annoying.
// A single type (Into<Vec<u8>> for example) doesn't cover common cases.
// So we either add a lot of conversions on the type, and hope they align..
// or some types just end up needlessly converting. Which is unfortunate.
//
// Not sure the ideal solution.
type Bytes: AsRef<[u8]> + Into<Arc<[u8]>>;
async fn exists(&self, cid: &Cid) -> Result<bool, ContentStoreError>;
async fn read_unchecked(&self, cid: &Cid) -> Result<Self::Bytes, ContentStoreError>;
async fn write_unchecked(&self, cid: &Cid, bytes: Vec<u8>) -> Result<(), ContentStoreError>;
async fn write_unchecked<B>(&self, cid: &Cid, bytes: B) -> Result<(), ContentStoreError>
where
B: AsRef<[u8]> + Into<Arc<[u8]>> + Send;
// TODO: Allow the caller to own the buf, for mutation of buf.
// async fn read_unchecked_vec(&self, cid: &Cid) -> Result<Vec<u8>, ContentStoreError>;
}
#[async_trait]
impl<T, Cid> ContentStore<Cid> for Arc<T>
Expand All @@ -30,7 +40,17 @@ where
async fn read_unchecked(&self, cid: &Cid) -> Result<Self::Bytes, ContentStoreError> {
self.deref().read_unchecked(cid).await
}
async fn write_unchecked(&self, cid: &Cid, bytes: Vec<u8>) -> Result<(), ContentStoreError> {
async fn write_unchecked<B>(&self, cid: &Cid, bytes: B) -> Result<(), ContentStoreError>
where
B: AsRef<[u8]> + Into<Arc<[u8]>> + Send,
{
self.deref().write_unchecked(cid, bytes).await
}
}
#[async_trait]
pub trait ContentStoreV2<Cid: NewContentId>: Send + Sync {
async fn exists(&self, cid: &Cid) -> Result<bool, ContentStoreError>;
// NIT: This return type will probably need to change to work with mmap.
async fn read_unchecked(&self, cid: &Cid) -> Result<Arc<[u8]>, ContentStoreError>;
async fn write_unchecked(&self, cid: &Cid, bytes: Vec<u8>) -> Result<(), ContentStoreError>;
}
70 changes: 64 additions & 6 deletions fixity_store/src/contentid.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
pub mod multihash_256;

use crate::{
deser::{Deserialize, Serialize},
type_desc::TypeDescription,
};
use multibase::Base;
use multihash::MultihashDigest;
use std::{
Expand All @@ -11,17 +15,24 @@ use thiserror::Error;

pub const CID_LENGTH: usize = 34;

pub trait NewContentId: Clone + Sized + Send + Sync + Eq + Ord + Hash + Debug + Display {
type Hash: AsRef<[u8]>;
pub trait NewContentId:
Clone + Sized + Send + Sync + Eq + Ord + Hash + Debug + Display + 'static + TypeDescription
{
type Hash<'a>: AsRef<[u8]>;
/// Hash the given bytes and producing a content identifier.
fn hash<B: AsRef<[u8]>>(buf: B) -> Self;
fn hash(buf: &[u8]) -> Self;
/// Construct a content identifier from the given hash.
fn from_hash<H: TryInto<Self::Hash>>(hash: H) -> Result<Self, FromHashError>;
fn as_hash(&self) -> &Self::Hash;
fn len(&self) -> usize {
fn from_hash(hash: Vec<u8>) -> Result<Self, FromHashError>;
fn as_hash(&self) -> Self::Hash<'_>;
fn size(&self) -> usize {
self.as_hash().as_ref().len()
}
}
pub trait ContentIdDeser<Deser>: NewContentId + Serialize<Deser> + Deserialize<Deser> {}
impl<Deser, T> ContentIdDeser<Deser> for T where
T: NewContentId + Serialize<Deser> + Deserialize<Deser>
{
}
#[derive(Error, Debug)]
pub enum FromHashError {
#[error("invalid length")]
Expand Down Expand Up @@ -146,3 +157,50 @@ impl From<Hasher> for multihash::Code {
}
}
}

#[cfg(any(test, feature = "test"))]
pub mod test {
use super::{FromHashError, NewContentId};
use multihash::MultihashDigest;

// TODO: macro these impls.

impl NewContentId for i32 {
type Hash<'a> = [u8; 4];
fn hash(buf: &[u8]) -> Self {
let mhash = multihash::Code::Blake2s128.digest(buf.as_ref());
let digest = &mhash.digest()[0..4];
Self::from_be_bytes(
digest
.try_into()
.expect("Blake2s128 truncated to 4 bytes fits into a [u8; 4]"),
)
}
fn from_hash(hash: Vec<u8>) -> Result<Self, super::FromHashError> {
let hash = Self::Hash::try_from(hash).map_err(|_| FromHashError::Length)?;
Ok(Self::from_be_bytes(hash))
}
fn as_hash(&self) -> Self::Hash<'static> {
self.to_be_bytes()
}
}
impl NewContentId for i64 {
type Hash<'a> = [u8; 8];
fn hash(buf: &[u8]) -> Self {
let mhash = multihash::Code::Blake2s128.digest(buf.as_ref());
let digest = &mhash.digest()[0..8];
Self::from_be_bytes(
digest
.try_into()
.expect("Blake2s128 truncated to 8 bytes fits into a [u8; 8]"),
)
}
fn from_hash(hash: Vec<u8>) -> Result<Self, super::FromHashError> {
let hash = Self::Hash::try_from(hash).map_err(|_| FromHashError::Length)?;
Ok(Self::from_be_bytes(hash))
}
fn as_hash(&self) -> Self::Hash<'static> {
self.to_be_bytes()
}
}
}
31 changes: 25 additions & 6 deletions fixity_store/src/contentid/multihash_256.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
use crate::type_desc::{TypeDescription, ValueDesc};

use super::{FromHashError, NewContentId};
use multibase::Base;
use multihash::MultihashDigest;
#[cfg(feature = "serde")]
use serde_big_array::BigArray;
use std::fmt::{Debug, Display};
use std::{
any::TypeId,
fmt::{Debug, Display},
};

const MULTIHASH_256_LEN: usize = 34;
const MULTIBASE_ENCODE: Base = Base::Base58Btc;
Expand All @@ -17,8 +22,8 @@ pub struct Multihash256(
#[cfg_attr(feature = "rkyv", serde(with = "BigArray"))] [u8; MULTIHASH_256_LEN],
);
impl NewContentId for Multihash256 {
type Hash = [u8; MULTIHASH_256_LEN];
fn hash<B: AsRef<[u8]>>(buf: B) -> Self {
type Hash<'a> = &'a [u8; MULTIHASH_256_LEN];
fn hash(buf: &[u8]) -> Self {
let hash = multihash::Code::Blake3_256.digest(buf.as_ref()).to_bytes();
match Self::from_hash(hash) {
Ok(cid) => cid,
Expand All @@ -27,17 +32,31 @@ impl NewContentId for Multihash256 {
},
}
}
fn from_hash<H: TryInto<Self::Hash>>(hash: H) -> Result<Self, FromHashError> {
fn from_hash(hash: Vec<u8>) -> Result<Self, FromHashError> {
hash.try_into()
.map_or(Err(FromHashError::Length), |hash| Ok(Self(hash)))
}
fn as_hash(&self) -> &Self::Hash {
fn as_hash(&self) -> Self::Hash<'_> {
&self.0
}
fn len(&self) -> usize {
fn size(&self) -> usize {
self.0.len()
}
}
impl TypeDescription for Multihash256 {
fn type_desc() -> ValueDesc {
// TODO: use the inner TypeDescription impls ..
ValueDesc::Struct {
name: "Multihash256",
type_id: TypeId::of::<Multihash256>(),
values: vec![ValueDesc::Array {
value: Box::new(ValueDesc::Number(TypeId::of::<u8>())),
type_id: TypeId::of::<<Self as NewContentId>::Hash<'_>>(),
len: MULTIHASH_256_LEN,
}],
}
}
}
impl Debug for Multihash256 {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// PERF: Can we fork multibase to make a non-allocating display? I would think
Expand Down
Loading

0 comments on commit 5e1dbf7

Please sign in to comment.