diff --git a/Cargo.lock b/Cargo.lock index ec0c9bd7..a86db2b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -992,6 +992,7 @@ dependencies = [ "serde", "serde_json", "serde_with", + "test-strategy", "thiserror", "tokio", ] @@ -2064,6 +2065,29 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "structmeta" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e1575d8d40908d70f6fd05537266b90ae71b15dbbe7a8b7dffa2b759306d329" +dependencies = [ + "proc-macro2", + "quote", + "structmeta-derive", + "syn 2.0.72", +] + +[[package]] +name = "structmeta-derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] + [[package]] name = "strum" version = "0.26.3" @@ -2130,6 +2154,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "test-strategy" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf41af45e3f54cc184831d629d41d5b2bda8297e29c81add7ae4f362ed5e01b" +dependencies = [ + "proc-macro2", + "quote", + "structmeta", + "syn 2.0.72", +] + [[package]] name = "thiserror" version = "1.0.63" diff --git a/Cargo.toml b/Cargo.toml index e6eed785..da853c6c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,8 @@ thiserror = "1.0.63" serde_json = "1.0.125" serde = { version = "1.0.208", features = ["derive"] } serde_with = "3.9.0" +test-strategy = "0.4.0" +proptest = "1.5.0" [profile.release-with-debug] inherits = "release" @@ -27,5 +29,4 @@ debug = true [dev-dependencies] pretty_assertions = "1.4.0" -proptest = "1.0.0" tokio = { version = "1.39.2", features = ["rt-multi-thread", "macros"] } diff --git a/proptest-regressions/dataset.txt b/proptest-regressions/dataset.txt new file mode 100644 index 00000000..255d4384 --- /dev/null +++ b/proptest-regressions/dataset.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 10b7f8cfd2afb88ddee1d4abf0d6f1bb9e6c6bc87af8c2c0d5e90e49d50dede5 # shrinks to path = "" diff --git a/src/dataset.rs b/src/dataset.rs index 9e55e8fb..0ea986d6 100644 --- a/src/dataset.rs +++ b/src/dataset.rs @@ -31,8 +31,6 @@ impl ChangeSet { path: Path, node_id: NodeId, ) -> Result<(), DeleteNodeError> { - // TODO: test delete a deleted group. - // TODO: test delete a non-existent group let was_new = self.new_groups.remove(&path).is_some(); self.updated_attributes.remove(&path); if !was_new { @@ -722,10 +720,11 @@ pub enum FlushError { #[cfg(test)] mod tests { + use std::{error::Error, num::NonZeroU64, path::PathBuf}; use crate::{ - manifest::mk_manifests_table, storage::InMemoryStorage, + manifest::mk_manifests_table, storage::InMemoryStorage, strategies::*, structure::mk_structure_table, ChunkInfo, ChunkKeyEncoding, ChunkRef, ChunkShape, Codec, DataType, FillValue, Flags, ManifestExtents, StorageTransformer, TableRegion, @@ -734,6 +733,110 @@ mod tests { use super::*; use itertools::Itertools; use pretty_assertions::assert_eq; + use proptest::prelude::{prop_assert, prop_assert_eq}; + use test_strategy::proptest; + + #[proptest(async = "tokio")] + async fn test_add_delete_group( + #[strategy(node_paths())] path: Path, + #[strategy(empty_datasets())] mut dataset: Dataset, + ) { + // getting any path from an empty dataset must fail + prop_assert!(dataset.get_node(&path).await.is_err()); + + // adding a new group must succeed + prop_assert!(dataset.add_group(path.clone()).await.is_ok()); + + // Getting a group just added must succeed + let node = dataset.get_node(&path).await; + prop_assert!(node.is_ok()); + + // Getting the group twice must be equal + prop_assert_eq!(node.unwrap(), dataset.get_node(&path).await.unwrap()); + + // adding an existing group fails + prop_assert_eq!( + dataset.add_group(path.clone()).await.unwrap_err(), + AddNodeError::AlreadyExists(path.clone()) + ); + + // deleting the added group must succeed + prop_assert!(dataset.delete_group(path.clone()).await.is_ok()); + + // deleting twice must fail + prop_assert_eq!( + dataset.delete_group(path.clone()).await.unwrap_err(), + DeleteNodeError::NotFound(path.clone()) + ); + + // getting a deleted group must fail + prop_assert!(dataset.get_node(&path).await.is_err()); + + // adding again must succeed + prop_assert!(dataset.add_group(path.clone()).await.is_ok()); + + // deleting again must succeed + prop_assert!(dataset.delete_group(path.clone()).await.is_ok()); + } + + #[proptest(async = "tokio")] + async fn test_add_delete_array( + #[strategy(node_paths())] path: Path, + #[strategy(zarr_array_metadata())] metadata: ZarrArrayMetadata, + #[strategy(empty_datasets())] mut dataset: Dataset, + ) { + // new array must always succeed + prop_assert!(dataset.add_array(path.clone(), metadata.clone()).await.is_ok()); + + // adding to the same path must fail + prop_assert!(dataset.add_array(path.clone(), metadata.clone()).await.is_err()); + + // first delete must succeed + prop_assert!(dataset.delete_array(path.clone()).await.is_ok()); + + // deleting twice must fail + prop_assert_eq!( + dataset.delete_array(path.clone()).await.unwrap_err(), + DeleteNodeError::NotFound(path.clone()) + ); + + // adding again must succeed + prop_assert!(dataset.add_array(path.clone(), metadata.clone()).await.is_ok()); + + // deleting again must succeed + prop_assert!(dataset.delete_array(path.clone()).await.is_ok()); + } + + #[proptest(async = "tokio")] + async fn test_add_array_group_clash( + #[strategy(node_paths())] path: Path, + #[strategy(zarr_array_metadata())] metadata: ZarrArrayMetadata, + #[strategy(empty_datasets())] mut dataset: Dataset, + ) { + // adding a group at an existing array node must fail + prop_assert!(dataset.add_array(path.clone(), metadata.clone()).await.is_ok()); + prop_assert_eq!( + dataset.add_group(path.clone()).await.unwrap_err(), + AddNodeError::AlreadyExists(path.clone()) + ); + prop_assert_eq!( + dataset.delete_group(path.clone()).await.unwrap_err(), + DeleteNodeError::NotAGroup(path.clone()) + ); + prop_assert!(dataset.delete_array(path.clone()).await.is_ok()); + + // adding an array at an existing group node must fail + prop_assert!(dataset.add_group(path.clone()).await.is_ok()); + prop_assert_eq!( + dataset.add_array(path.clone(), metadata.clone()).await.unwrap_err(), + AddNodeError::AlreadyExists(path.clone()) + ); + prop_assert_eq!( + dataset.delete_array(path.clone()).await.unwrap_err(), + DeleteNodeError::NotAnArray(path.clone()) + ); + prop_assert!(dataset.delete_group(path.clone()).await.is_ok()); + } #[tokio::test(flavor = "multi_thread")] async fn test_dataset_with_updates() -> Result<(), Box> { diff --git a/src/lib.rs b/src/lib.rs index 89893f2b..7962ddad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,7 @@ pub mod dataset; pub mod manifest; pub mod storage; +pub mod strategies; pub mod structure; pub mod zarr; @@ -44,6 +45,7 @@ use std::{ sync::Arc, }; use structure::StructureTable; +use test_strategy::Arbitrary; use thiserror::Error; #[derive(Debug, Clone)] @@ -59,6 +61,8 @@ pub struct ArrayIndices(pub Vec); /// The shape of an array. /// 0 is a valid shape member pub type ArrayShape = Vec; +pub type DimensionName = Option; +pub type DimensionNames = Vec; pub type Path = PathBuf; @@ -197,7 +201,7 @@ struct NameConfigSerializer { configuration: serde_json::Value, } -#[derive(Copy, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] +#[derive(Arbitrary, Copy, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] pub enum ChunkKeyEncoding { Slash, Dot, @@ -263,7 +267,7 @@ impl TryFrom for ChunkKeyEncoding { } } -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Arbitrary, Clone, Debug, PartialEq, Serialize, Deserialize)] #[serde(untagged)] pub enum FillValue { // FIXME: test all json (de)serializations @@ -503,8 +507,6 @@ pub struct StorageTransformer { pub configuration: Option>, } -pub type DimensionName = String; - #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct UserAttributes { #[serde(flatten)] @@ -608,7 +610,7 @@ pub struct ZarrArrayMetadata { pub codecs: Vec, pub storage_transformers: Option>, // each dimension name can be null in Zarr - pub dimension_names: Option>>, + pub dimension_names: Option, } #[derive(Clone, Debug, Hash, PartialEq, Eq)] @@ -728,7 +730,7 @@ pub enum StorageError { /// Different implementation can cache the files differently, or not at all. /// Implementations are free to assume files are never overwritten. #[async_trait] -pub trait Storage { +pub trait Storage: fmt::Debug { async fn fetch_structure( &self, id: &ObjectId, @@ -765,7 +767,7 @@ pub trait Storage { async fn write_chunk(&self, id: ObjectId, bytes: Bytes) -> Result<(), StorageError>; } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Dataset { storage: Arc, structure_id: Option, diff --git a/src/storage.rs b/src/storage.rs index 130773d8..e3bbfa80 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -1,6 +1,7 @@ use base64::{engine::general_purpose::URL_SAFE as BASE64_URL_SAFE, Engine as _}; use std::{ collections::HashMap, + fmt, fs::create_dir_all, ops::Range, sync::{Arc, RwLock}, @@ -60,7 +61,7 @@ impl ObjectStorage { ) -> Result { use object_store::aws::AmazonS3Builder; let store = AmazonS3Builder::new() - // TODO: Generalize the auth config + // FIXME: Generalize the auth config .with_access_key_id("minio123") .with_secret_access_key("minio123") .with_endpoint("http://localhost:9000") @@ -108,6 +109,11 @@ impl ObjectStorage { } } +impl fmt::Debug for ObjectStorage { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "ObjectStorage, prefix={}, store={}", self.prefix, self.store) + } +} #[async_trait] impl Storage for ObjectStorage { async fn fetch_structure( @@ -221,6 +227,12 @@ impl InMemoryStorage { } } +impl fmt::Debug for InMemoryStorage { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "InMemoryStorage at {:p}", self) + } +} + #[async_trait] impl Storage for InMemoryStorage { async fn fetch_structure( diff --git a/src/strategies.rs b/src/strategies.rs new file mode 100644 index 00000000..28050b3d --- /dev/null +++ b/src/strategies.rs @@ -0,0 +1,90 @@ +use std::num::NonZeroU64; +use std::path::PathBuf; +use std::sync::Arc; + +use proptest::prelude::*; +use proptest::{collection::vec, option, strategy::Strategy}; + +use crate::storage::InMemoryStorage; +use crate::{ + ArrayShape, ChunkKeyEncoding, ChunkShape, Codec, Dataset, DimensionNames, FillValue, + Path, StorageTransformer, ZarrArrayMetadata, +}; + +pub fn node_paths() -> impl Strategy { + // FIXME: Add valid paths + any::() +} + +pub fn empty_datasets() -> impl Strategy { + // FIXME: add storages strategy + let storage = InMemoryStorage::new(); + let dataset = Dataset::create(Arc::new(storage)); + prop_oneof![Just(dataset)] +} + +pub fn codecs() -> impl Strategy> { + prop_oneof![Just(vec![Codec { name: "mycodec".to_string(), configuration: None }]),] +} + +pub fn storage_transformers() -> impl Strategy>> { + prop_oneof![ + Just(Some(vec![StorageTransformer { + name: "mytransformer".to_string(), + configuration: None, + }])), + Just(None), + ] +} + +#[derive(Debug)] +pub struct ShapeDim { + shape: ArrayShape, + chunk_shape: ChunkShape, + dimension_names: Option, +} + +pub fn shapes_and_dims(max_ndim: Option) -> impl Strategy { + // FIXME: ndim = 0 + let max_ndim = max_ndim.unwrap_or(4usize); + vec(1u64..26u64, 1..max_ndim) + .prop_flat_map(|shape| { + let ndim = shape.len(); + let chunk_shape: Vec> = shape + .clone() + .into_iter() + .map(|size| { + (1u64..=size) + .prop_map(|chunk_size| NonZeroU64::new(chunk_size).unwrap()) + .boxed() + }) + .collect(); + (Just(shape), chunk_shape, option::of(vec(option::of(any::()), ndim))) + }) + .prop_map(|(shape, chunk_shape, dimension_names)| ShapeDim { + shape, + chunk_shape: ChunkShape(chunk_shape), + dimension_names, + }) +} + +prop_compose! { + pub fn zarr_array_metadata()( + chunk_key_encoding: ChunkKeyEncoding, + fill_value: FillValue, + shape_and_dim in shapes_and_dims(None), + storage_transformers in storage_transformers(), + codecs in codecs(), + ) -> ZarrArrayMetadata { + ZarrArrayMetadata { + shape: shape_and_dim.shape, + data_type: fill_value.get_data_type(), + chunk_shape: shape_and_dim.chunk_shape, + chunk_key_encoding, + fill_value, + codecs, + storage_transformers, + dimension_names: shape_and_dim.dimension_names, + } + } +} diff --git a/src/structure.rs b/src/structure.rs index b7dd9fef..dc87718b 100644 --- a/src/structure.rs +++ b/src/structure.rs @@ -365,7 +365,7 @@ where fn mk_dimension_names_array(coll: T) -> ListArray where T: IntoIterator>, - P: IntoIterator>, + P: IntoIterator, { let mut b = ListBuilder::new(StringBuilder::new()); for list in coll { @@ -661,36 +661,12 @@ pub fn mk_structure_table>( mod strategies { use crate::FillValue; use proptest::prelude::*; - use proptest::prop_oneof; use proptest::strategy::Strategy; - pub(crate) fn fill_value_strategy() -> impl Strategy { - use proptest::collection::vec; - prop_oneof![ - any::().prop_map(FillValue::Bool), - any::().prop_map(FillValue::Int8), - any::().prop_map(FillValue::Int16), - any::().prop_map(FillValue::Int32), - any::().prop_map(FillValue::Int64), - any::().prop_map(FillValue::UInt8), - any::().prop_map(FillValue::UInt16), - any::().prop_map(FillValue::UInt32), - any::().prop_map(FillValue::UInt64), - any::().prop_map(FillValue::Float16), - any::().prop_map(FillValue::Float32), - any::().prop_map(FillValue::Float64), - (any::(), any::()) - .prop_map(|(real, imag)| FillValue::Complex64(real, imag)), - (any::(), any::()) - .prop_map(|(real, imag)| FillValue::Complex128(real, imag)), - vec(any::(), 0..64).prop_map(FillValue::RawBits), - ] - } - pub(crate) fn fill_values_vec_strategy( ) -> impl Strategy>> { use proptest::collection::vec; - vec(proptest::option::of(fill_value_strategy()), 0..10) + vec(proptest::option::of(any::()), 0..10) } }