From 98b790a6c8ed2529eaedb92e93807dab26a2ca47 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 09:30:20 -0400 Subject: [PATCH 01/13] Add conversion between TileDB and Arrow attributes --- tiledb/api/src/error.rs | 2 ++ tiledb/arrow/src/attribute.rs | 65 +++++++++++++++++++++++++++++++++++ tiledb/arrow/src/datatype.rs | 12 +++++++ tiledb/arrow/src/lib.rs | 3 ++ 4 files changed, 82 insertions(+) create mode 100644 tiledb/arrow/src/attribute.rs diff --git a/tiledb/api/src/error.rs b/tiledb/api/src/error.rs index f9bd38f6..5f594921 100644 --- a/tiledb/api/src/error.rs +++ b/tiledb/api/src/error.rs @@ -97,3 +97,5 @@ impl Drop for Error { } } } + +impl std::error::Error for Error {} diff --git a/tiledb/arrow/src/attribute.rs b/tiledb/arrow/src/attribute.rs new file mode 100644 index 00000000..cd47bd8f --- /dev/null +++ b/tiledb/arrow/src/attribute.rs @@ -0,0 +1,65 @@ +use tiledb::context::Context as TileDBContext; +use tiledb::Result as TileDBResult; + +use crate::datatype::{arrow_type_physical, tiledb_type_physical}; + +pub fn arrow_field( + attr: &tiledb::array::Attribute, +) -> TileDBResult> { + if let Some(arrow_dt) = arrow_type_physical(&attr.datatype()?) { + Ok(Some(arrow_schema::Field::new( + attr.name()?, + arrow_dt, + attr.is_nullable(), + ))) + } else { + Ok(None) + } +} + +pub fn tiledb_attribute<'ctx>( + context: &'ctx TileDBContext, + field: &arrow_schema::Field, +) -> TileDBResult>> { + if let Some(tiledb_dt) = tiledb_type_physical(field.data_type()) { + Ok(Some( + tiledb::array::AttributeBuilder::new( + context, + field.name(), + tiledb_dt, + )? + .nullability(field.is_nullable())?, + )) + } else { + Ok(None) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use proptest::prelude::*; + + #[test] + fn test_invertibility() -> TileDBResult<()> { + let c: TileDBContext = TileDBContext::new()?; + + proptest!(|(attr in tiledb_test::attribute::arbitrary(&c))| { + if let Some(arrow_field) = arrow_field(&attr).expect("Error reading tiledb attribute") { + assert_eq!(attr.name()?, *arrow_field.name()); + assert!(crate::datatype::is_same_physical_type(&attr.datatype()?, arrow_field.data_type())); + assert_eq!(attr.is_nullable(), arrow_field.is_nullable()); + + // convert back to TileDB attribute + let tdb_out = tiledb_attribute(&c, &arrow_field)?.expect("Arrow attribute did not invert").build(); + assert_eq!(attr.name()?, tdb_out.name()?); + assert_eq!(attr.datatype()?, tdb_out.datatype()?); + assert_eq!(attr.is_nullable(), tdb_out.is_nullable()); + } + }); + + // TODO: go the other direction + + Ok(()) + } +} diff --git a/tiledb/arrow/src/datatype.rs b/tiledb/arrow/src/datatype.rs index 4b93658d..9a97cd09 100644 --- a/tiledb/arrow/src/datatype.rs +++ b/tiledb/arrow/src/datatype.rs @@ -118,6 +118,17 @@ pub fn tiledb_type_physical( } } +pub fn is_same_physical_type( + tdb_dt: &tiledb::Datatype, + arrow_dt: &arrow_schema::DataType, +) -> bool { + if let Some(tdb_to_arrow) = arrow_type_physical(tdb_dt) { + tdb_to_arrow == *arrow_dt + } else { + false + } +} + #[cfg(test)] mod tests { use super::*; @@ -127,6 +138,7 @@ mod tests { #[test] fn test_physical(tdb_dt in tiledb_test::datatype::arbitrary()) { if let Some(arrow_dt) = arrow_type_physical(&tdb_dt) { + assert!(is_same_physical_type(&tdb_dt, &arrow_dt)); if let Some(adt_width) = arrow_dt.primitive_width() { let tdb_width : usize = tdb_dt.size().try_into().unwrap(); assert_eq!(adt_width, tdb_width); diff --git a/tiledb/arrow/src/lib.rs b/tiledb/arrow/src/lib.rs index 677431f9..5439b30a 100644 --- a/tiledb/arrow/src/lib.rs +++ b/tiledb/arrow/src/lib.rs @@ -4,5 +4,8 @@ extern crate tiledb; #[cfg(test)] extern crate proptest; +#[cfg(test)] +extern crate tiledb_test; +pub mod attribute; pub mod datatype; From af043ac3adc4eff9331d275e71e4526930a22a29 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 11:03:33 -0400 Subject: [PATCH 02/13] CAPISameRepr --- tiledb/api/src/convert.rs | 47 +++++++++++++++------------------------ 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/tiledb/api/src/convert.rs b/tiledb/api/src/convert.rs index 3fa4992a..01d4965d 100644 --- a/tiledb/api/src/convert.rs +++ b/tiledb/api/src/convert.rs @@ -1,3 +1,16 @@ +pub trait CAPISameRepr: Copy + Default {} + +impl CAPISameRepr for u8 {} +impl CAPISameRepr for u16 {} +impl CAPISameRepr for u32 {} +impl CAPISameRepr for u64 {} +impl CAPISameRepr for i8 {} +impl CAPISameRepr for i16 {} +impl CAPISameRepr for i32 {} +impl CAPISameRepr for i64 {} +impl CAPISameRepr for f32 {} +impl CAPISameRepr for f64 {} + pub trait CAPIConverter { type CAPIType: Default + Copy; @@ -5,38 +18,14 @@ pub trait CAPIConverter { fn to_rust(value: &Self::CAPIType) -> Self; } -impl CAPIConverter for i32 { - type CAPIType = std::ffi::c_int; - - fn to_capi(&self) -> Self::CAPIType { - *self as Self::CAPIType - } - - fn to_rust(value: &Self::CAPIType) -> Self { - *value as Self - } -} - -impl CAPIConverter for u32 { - type CAPIType = std::ffi::c_uint; - - fn to_capi(&self) -> Self::CAPIType { - *self as Self::CAPIType - } - - fn to_rust(value: &Self::CAPIType) -> Self { - *value as Self - } -} - -impl CAPIConverter for f64 { - type CAPIType = std::ffi::c_double; +impl CAPIConverter for T { + type CAPIType = Self; fn to_capi(&self) -> Self::CAPIType { - *self as Self::CAPIType + *self } - fn to_rust(value: &Self::CAPIType) -> Self { - *value as Self + fn to_rust(value: &Self::CAPIType) -> T { + *value } } From 166c1e9c0c674a37b7f572e5069c5852ee9140ec Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 11:10:27 -0400 Subject: [PATCH 03/13] Add fn_typed macro --- tiledb/api/src/datatype.rs | 51 ++++++++++++++++++++++++++++++++++++++ tiledb/api/src/lib.rs | 1 + 2 files changed, 52 insertions(+) create mode 100644 tiledb/api/src/datatype.rs diff --git a/tiledb/api/src/datatype.rs b/tiledb/api/src/datatype.rs new file mode 100644 index 00000000..4669ff91 --- /dev/null +++ b/tiledb/api/src/datatype.rs @@ -0,0 +1,51 @@ +#[macro_export] +macro_rules! fn_typed { + ($obj:ident.$func:ident, $datatype:expr$(, $arg:expr)* => $then:expr) => { + match $datatype { + crate::Datatype::Int8 => $then($obj.$func::()), + crate::Datatype::Int16 => $then($obj.$func::()), + crate::Datatype::Int32 => $then($obj.$func::()), + crate::Datatype::Int64 => $then($obj.$func::()), + crate::Datatype::UInt8 => $then($obj.$func::()), + crate::Datatype::UInt16 => $then($obj.$func::()), + crate::Datatype::UInt32 => $then($obj.$func::()), + crate::Datatype::UInt64 => $then($obj.$func::()), + crate::Datatype::Float32 => $then($obj.$func::()), + crate::Datatype::Float64 => $then($obj.$func::()), + crate::Datatype::Char => unimplemented!(), + crate::Datatype::StringAscii => unimplemented!(), + crate::Datatype::StringUtf8 => unimplemented!(), + crate::Datatype::StringUtf16 => unimplemented!(), + crate::Datatype::StringUtf32 => unimplemented!(), + crate::Datatype::StringUcs2 => unimplemented!(), + crate::Datatype::StringUcs4 => unimplemented!(), + crate::Datatype::Any => unimplemented!(), + crate::Datatype::DateTimeYear => unimplemented!(), + crate::Datatype::DateTimeMonth => unimplemented!(), + crate::Datatype::DateTimeWeek => unimplemented!(), + crate::Datatype::DateTimeDay => unimplemented!(), + crate::Datatype::DateTimeHour => unimplemented!(), + crate::Datatype::DateTimeMinute => unimplemented!(), + crate::Datatype::DateTimeSecond => unimplemented!(), + crate::Datatype::DateTimeMillisecond => unimplemented!(), + crate::Datatype::DateTimeMicrosecond => unimplemented!(), + crate::Datatype::DateTimeNanosecond => unimplemented!(), + crate::Datatype::DateTimePicosecond => unimplemented!(), + crate::Datatype::DateTimeFemtosecond => unimplemented!(), + crate::Datatype::DateTimeAttosecond => unimplemented!(), + crate::Datatype::TimeHour => unimplemented!(), + crate::Datatype::TimeMinute => unimplemented!(), + crate::Datatype::TimeSecond => unimplemented!(), + crate::Datatype::TimeMillisecond => unimplemented!(), + crate::Datatype::TimeMicrosecond => unimplemented!(), + crate::Datatype::TimeNanosecond => unimplemented!(), + crate::Datatype::TimePicosecond => unimplemented!(), + crate::Datatype::TimeFemtosecond => unimplemented!(), + crate::Datatype::TimeAttosecond => unimplemented!(), + crate::Datatype::Blob => unimplemented!(), + crate::Datatype::Boolean => unimplemented!(), + crate::Datatype::GeometryWkb => unimplemented!(), + crate::Datatype::GeometryWkt => unimplemented!(), + } + }; +} diff --git a/tiledb/api/src/lib.rs b/tiledb/api/src/lib.rs index 2ddf5dae..f74b95e7 100644 --- a/tiledb/api/src/lib.rs +++ b/tiledb/api/src/lib.rs @@ -24,6 +24,7 @@ pub mod array; pub mod config; pub mod context; pub mod convert; +pub mod datatype; pub mod error; pub mod filter; pub mod filter_list; From 7e537e081d06df6c1fba26bd4effecc98eaedad2 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 11:11:00 -0400 Subject: [PATCH 04/13] impl Debug for Dimension --- tiledb/api/Cargo.toml | 1 + tiledb/api/src/array/dimension.rs | 15 +++++++++++++++ tiledb/api/src/lib.rs | 1 + 3 files changed, 17 insertions(+) diff --git a/tiledb/api/Cargo.toml b/tiledb/api/Cargo.toml index 0aa10157..b6b488f9 100644 --- a/tiledb/api/Cargo.toml +++ b/tiledb/api/Cargo.toml @@ -8,6 +8,7 @@ name = "tiledb" path = "src/lib.rs" [dependencies] +serde_json = "1.0.114" tiledb-sys = { workspace = true } [dev-dependencies] diff --git a/tiledb/api/src/array/dimension.rs b/tiledb/api/src/array/dimension.rs index ccb723c7..a2d990f9 100644 --- a/tiledb/api/src/array/dimension.rs +++ b/tiledb/api/src/array/dimension.rs @@ -1,8 +1,11 @@ +use serde_json::json; +use std::fmt::{Debug, Formatter, Result as FmtResult}; use std::ops::Deref; use crate::context::Context; use crate::convert::CAPIConverter; use crate::filter_list::FilterList; +use crate::fn_typed; use crate::Datatype; use crate::Result as TileDBResult; @@ -100,6 +103,18 @@ impl<'ctx> Dimension<'ctx> { } } +impl<'ctx> Debug for Dimension<'ctx> { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + let json = json!({ + "datatype": format!("{}", self.datatype()), + "domain": fn_typed!(self.domain, self.datatype() => |x| match x { Ok(x) => format!("{:?}", x), Err(e) => format!("<{}>", e) }), + /* TODO: filters */ + "raw": format!("{:p}", *self.raw) + }); + write!(f, "{}", json) + } +} + pub struct Builder<'ctx> { dim: Dimension<'ctx>, } diff --git a/tiledb/api/src/lib.rs b/tiledb/api/src/lib.rs index f74b95e7..91598d7a 100644 --- a/tiledb/api/src/lib.rs +++ b/tiledb/api/src/lib.rs @@ -1,3 +1,4 @@ +extern crate serde_json; extern crate tiledb_sys as ffi; macro_rules! cstring { From 2670c861ea6efa557122f4a041f9addba2cc6531 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 11:17:39 -0400 Subject: [PATCH 05/13] impl Debug for Domain --- tiledb/api/src/array/domain.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tiledb/api/src/array/domain.rs b/tiledb/api/src/array/domain.rs index f27cf580..abe98e13 100644 --- a/tiledb/api/src/array/domain.rs +++ b/tiledb/api/src/array/domain.rs @@ -1,3 +1,5 @@ +use serde_json::json; +use std::fmt::{Debug, Formatter, Result as FmtResult}; use std::ops::Deref; use crate::array::{dimension::RawDimension, Dimension}; @@ -39,7 +41,7 @@ impl<'ctx> Domain<'ctx> { Domain { context, raw } } - pub fn ndim(&self) -> u32 { + pub fn ndim(&self) -> usize { let mut ndim: u32 = out_ptr!(); let c_ret = unsafe { ffi::tiledb_domain_get_ndim( @@ -50,7 +52,7 @@ impl<'ctx> Domain<'ctx> { }; // the only errors are possible via mis-use of the C API, which Rust prevents assert_eq!(ffi::TILEDB_OK, c_ret); - ndim + ndim as usize } pub fn dimension(&self, idx: usize) -> TileDBResult> { @@ -85,6 +87,28 @@ impl<'ctx> Domain<'ctx> { } } +impl<'ctx> Debug for Domain<'ctx> { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + let mut json = json!({ + "raw": format!("{:p}", *self.raw) + /* TODO: what other fields? */ + }); + + json["dimensions"] = serde_json::value::Value::Array( + (0..self.ndim()) + .map(|d| { + serde_json::value::Value::String(match self.dimension(d) { + Ok(d) => format!("{:?}", d), + Err(e) => format!("<{}>", e), + }) + }) + .collect::>(), + ); + + write!(f, "{}", json) + } +} + pub struct Builder<'ctx> { domain: Domain<'ctx>, } From 2178eff90e851436518a838d16d3814b56dc9e7b Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 11:24:45 -0400 Subject: [PATCH 06/13] impl Debug for Schema --- tiledb/api/src/array/schema.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tiledb/api/src/array/schema.rs b/tiledb/api/src/array/schema.rs index c666b55e..6aa8d47c 100644 --- a/tiledb/api/src/array/schema.rs +++ b/tiledb/api/src/array/schema.rs @@ -1,4 +1,6 @@ +use serde_json::json; use std::convert::TryFrom; +use std::fmt::{Debug, Formatter, Result as FmtResult}; use std::ops::Deref; use crate::array::attribute::RawAttribute; @@ -7,7 +9,7 @@ use crate::array::{Attribute, Domain, Layout}; use crate::context::Context; use crate::Result as TileDBResult; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum ArrayType { Dense, Sparse, @@ -239,6 +241,29 @@ impl<'ctx> Schema<'ctx> { } } +impl<'ctx> Debug for Schema<'ctx> { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + let json = json!({ + "array_type": format!("{:?}", self.array_type()), + "capacity": self.capacity(), + "cell_order": format!("{:?}", self.cell_order()), + "tile_order": format!("{:?}", self.tile_order()), + "allows_duplicates": self.allows_duplicates(), + "domain": match self.domain() { + Ok(d) => format!("{:?}", d), + Err(e) => format!("<{}>", e) + }, + "attributes": (0.. self.nattributes()).map(|a| match self.attribute(a) { + Ok(a) => format!("{:?}", a), + Err(e) => format!("", a, e) + }).collect::>(), + "version": self.version(), + "raw": format!("{:p}", *self.raw), + }); + write!(f, "{}", json) + } +} + pub struct Builder<'ctx> { schema: Schema<'ctx>, } From 61dab4bfd7c3a58bc23ed6dccf31643f85ef04b8 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 14:38:15 -0400 Subject: [PATCH 07/13] Add LifetimeStrategy to enable proptest strategy type erasure with 'ctx lifetime --- tiledb/test/src/lib.rs | 1 + tiledb/test/src/strategy.rs | 66 +++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 tiledb/test/src/strategy.rs diff --git a/tiledb/test/src/lib.rs b/tiledb/test/src/lib.rs index 34c504ac..298be6e3 100644 --- a/tiledb/test/src/lib.rs +++ b/tiledb/test/src/lib.rs @@ -3,3 +3,4 @@ extern crate tiledb; pub mod attribute; pub mod datatype; +pub mod strategy; diff --git a/tiledb/test/src/strategy.rs b/tiledb/test/src/strategy.rs new file mode 100644 index 00000000..38cc8950 --- /dev/null +++ b/tiledb/test/src/strategy.rs @@ -0,0 +1,66 @@ +use proptest::prelude::*; +use proptest::strategy::*; +use proptest::test_runner::TestRunner; +use std::sync::Arc; + +type LifetimeValueTree<'ctx, T> = Box + 'ctx>; + +/// Similar to BoxedStrategy, but with a narrower lifetime than 'static. +/// Use when there are conflicts with multiple Strategy implementing types that +/// have the same Value output - this erases the implementing type and just leaves the trait. +#[derive(Debug)] +pub struct LifetimeStrategy<'ctx, T>( + Arc> + 'ctx>, +); + +impl<'ctx, T: std::fmt::Debug> Strategy for LifetimeStrategy<'ctx, T> { + type Tree = LifetimeValueTree<'ctx, T>; + type Value = T; + + fn new_tree(&self, runner: &mut TestRunner) -> NewTree { + self.0.new_tree(runner) + } +} + +pub trait LifetimeBoundStrategy<'ctx>: Strategy + 'ctx { + fn bind(self) -> LifetimeStrategy<'ctx, ::Value>; +} + +impl<'ctx, S> LifetimeBoundStrategy<'ctx> for S +where + S: Sized + Strategy + 'ctx, +{ + fn bind(self) -> LifetimeStrategy<'ctx, ::Value> { + LifetimeStrategy(Arc::new(LifetimeStrategyWrapper::new(self))) + } +} + +#[derive(Debug)] +struct LifetimeStrategyWrapper<'ctx, T> { + strategy: T, + _lifetime: std::marker::PhantomData<&'ctx T>, +} + +impl<'ctx, S: Strategy> LifetimeStrategyWrapper<'ctx, S> +where + S::Tree: 'ctx, +{ + pub fn new(strategy: S) -> Self { + LifetimeStrategyWrapper { + strategy, + _lifetime: std::marker::PhantomData, + } + } +} + +impl<'ctx, T: Strategy> Strategy for LifetimeStrategyWrapper<'ctx, T> +where + T::Tree: 'ctx, +{ + type Tree = Box + 'ctx>; + type Value = T::Value; + + fn new_tree(&self, runner: &mut TestRunner) -> NewTree { + Ok(Box::new(self.strategy.new_tree(runner)?)) + } +} From 1d490872a84e57f20f694598f06ac9e2e5250fe9 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 14:39:19 -0400 Subject: [PATCH 08/13] Change fn_typed to be any expression --- tiledb/api/src/array/dimension.rs | 2 +- tiledb/api/src/datatype.rs | 202 +++++++++++++++++++++++------- 2 files changed, 157 insertions(+), 47 deletions(-) diff --git a/tiledb/api/src/array/dimension.rs b/tiledb/api/src/array/dimension.rs index a2d990f9..bef16c3d 100644 --- a/tiledb/api/src/array/dimension.rs +++ b/tiledb/api/src/array/dimension.rs @@ -107,7 +107,7 @@ impl<'ctx> Debug for Dimension<'ctx> { fn fmt(&self, f: &mut Formatter) -> FmtResult { let json = json!({ "datatype": format!("{}", self.datatype()), - "domain": fn_typed!(self.domain, self.datatype() => |x| match x { Ok(x) => format!("{:?}", x), Err(e) => format!("<{}>", e) }), + "domain": fn_typed!(self.domain, self.datatype() => match domain { Ok(x) => format!("{:?}", x), Err(e) => format!("<{}>", e) }), /* TODO: filters */ "raw": format!("{:p}", *self.raw) }); diff --git a/tiledb/api/src/datatype.rs b/tiledb/api/src/datatype.rs index 4669ff91..47070e9b 100644 --- a/tiledb/api/src/datatype.rs +++ b/tiledb/api/src/datatype.rs @@ -1,51 +1,161 @@ #[macro_export] macro_rules! fn_typed { - ($obj:ident.$func:ident, $datatype:expr$(, $arg:expr)* => $then:expr) => { + ($func:ident, $datatype:expr$(, $arg:expr)* => $then:expr) => {{ + type Datatype = $crate::Datatype; match $datatype { - crate::Datatype::Int8 => $then($obj.$func::()), - crate::Datatype::Int16 => $then($obj.$func::()), - crate::Datatype::Int32 => $then($obj.$func::()), - crate::Datatype::Int64 => $then($obj.$func::()), - crate::Datatype::UInt8 => $then($obj.$func::()), - crate::Datatype::UInt16 => $then($obj.$func::()), - crate::Datatype::UInt32 => $then($obj.$func::()), - crate::Datatype::UInt64 => $then($obj.$func::()), - crate::Datatype::Float32 => $then($obj.$func::()), - crate::Datatype::Float64 => $then($obj.$func::()), - crate::Datatype::Char => unimplemented!(), - crate::Datatype::StringAscii => unimplemented!(), - crate::Datatype::StringUtf8 => unimplemented!(), - crate::Datatype::StringUtf16 => unimplemented!(), - crate::Datatype::StringUtf32 => unimplemented!(), - crate::Datatype::StringUcs2 => unimplemented!(), - crate::Datatype::StringUcs4 => unimplemented!(), - crate::Datatype::Any => unimplemented!(), - crate::Datatype::DateTimeYear => unimplemented!(), - crate::Datatype::DateTimeMonth => unimplemented!(), - crate::Datatype::DateTimeWeek => unimplemented!(), - crate::Datatype::DateTimeDay => unimplemented!(), - crate::Datatype::DateTimeHour => unimplemented!(), - crate::Datatype::DateTimeMinute => unimplemented!(), - crate::Datatype::DateTimeSecond => unimplemented!(), - crate::Datatype::DateTimeMillisecond => unimplemented!(), - crate::Datatype::DateTimeMicrosecond => unimplemented!(), - crate::Datatype::DateTimeNanosecond => unimplemented!(), - crate::Datatype::DateTimePicosecond => unimplemented!(), - crate::Datatype::DateTimeFemtosecond => unimplemented!(), - crate::Datatype::DateTimeAttosecond => unimplemented!(), - crate::Datatype::TimeHour => unimplemented!(), - crate::Datatype::TimeMinute => unimplemented!(), - crate::Datatype::TimeSecond => unimplemented!(), - crate::Datatype::TimeMillisecond => unimplemented!(), - crate::Datatype::TimeMicrosecond => unimplemented!(), - crate::Datatype::TimeNanosecond => unimplemented!(), - crate::Datatype::TimePicosecond => unimplemented!(), - crate::Datatype::TimeFemtosecond => unimplemented!(), - crate::Datatype::TimeAttosecond => unimplemented!(), - crate::Datatype::Blob => unimplemented!(), - crate::Datatype::Boolean => unimplemented!(), - crate::Datatype::GeometryWkb => unimplemented!(), - crate::Datatype::GeometryWkt => unimplemented!(), + Datatype::Int8 => { + let $func = $func::(); + $then + } + Datatype::Int16 => { + let $func = $func::(); + $then + } + Datatype::Int32 => { + let $func = $func::(); + $then + } + Datatype::Int64 => { + let $func = $func::(); + $then + } + Datatype::UInt8 => { + let $func = $func::(); + $then + } + Datatype::UInt16 => { + let $func = $func::(); + $then + } + Datatype::UInt32 => { + let $func = $func::(); + $then + } + Datatype::UInt64 => { + let $func = $func::(); + $then + } + Datatype::Float32 => { + let $func = $func::(); + $then + } + Datatype::Float64 => { + let $func = $func::(); + $then + } + Datatype::Char => unimplemented!(), + Datatype::StringAscii => unimplemented!(), + Datatype::StringUtf8 => unimplemented!(), + Datatype::StringUtf16 => unimplemented!(), + Datatype::StringUtf32 => unimplemented!(), + Datatype::StringUcs2 => unimplemented!(), + Datatype::StringUcs4 => unimplemented!(), + Datatype::Any => unimplemented!(), + Datatype::DateTimeYear => unimplemented!(), + Datatype::DateTimeMonth => unimplemented!(), + Datatype::DateTimeWeek => unimplemented!(), + Datatype::DateTimeDay => unimplemented!(), + Datatype::DateTimeHour => unimplemented!(), + Datatype::DateTimeMinute => unimplemented!(), + Datatype::DateTimeSecond => unimplemented!(), + Datatype::DateTimeMillisecond => unimplemented!(), + Datatype::DateTimeMicrosecond => unimplemented!(), + Datatype::DateTimeNanosecond => unimplemented!(), + Datatype::DateTimePicosecond => unimplemented!(), + Datatype::DateTimeFemtosecond => unimplemented!(), + Datatype::DateTimeAttosecond => unimplemented!(), + Datatype::TimeHour => unimplemented!(), + Datatype::TimeMinute => unimplemented!(), + Datatype::TimeSecond => unimplemented!(), + Datatype::TimeMillisecond => unimplemented!(), + Datatype::TimeMicrosecond => unimplemented!(), + Datatype::TimeNanosecond => unimplemented!(), + Datatype::TimePicosecond => unimplemented!(), + Datatype::TimeFemtosecond => unimplemented!(), + Datatype::TimeAttosecond => unimplemented!(), + Datatype::Blob => unimplemented!(), + Datatype::Boolean => unimplemented!(), + Datatype::GeometryWkb => unimplemented!(), + Datatype::GeometryWkt => unimplemented!(), } - }; + }}; + ($obj:ident.$func:ident, $datatype:expr$(, $arg:expr)* => $then:expr) => {{ + type Datatype = $crate::Datatype; + match $datatype { + Datatype::Int8 => { + let $func = $obj.$func::(); + $then + } + Datatype::Int16 => { + let $func = $obj.$func::(); + $then + } + Datatype::Int32 => { + let $func = $obj.$func::(); + $then + } + Datatype::Int64 => { + let $func = $obj.$func::(); + $then + } + Datatype::UInt8 => { + let $func = $obj.$func::(); + $then + } + Datatype::UInt16 => { + let $func = $obj.$func::(); + $then + } + Datatype::UInt32 => { + let $func = $obj.$func::(); + $then + } + Datatype::UInt64 => { + let $func = $obj.$func::(); + $then + } + Datatype::Float32 => { + let $func = $obj.$func::(); + $then + } + Datatype::Float64 => { + let $func = $obj.$func::(); + $then + } + Datatype::Char => unimplemented!(), + Datatype::StringAscii => unimplemented!(), + Datatype::StringUtf8 => unimplemented!(), + Datatype::StringUtf16 => unimplemented!(), + Datatype::StringUtf32 => unimplemented!(), + Datatype::StringUcs2 => unimplemented!(), + Datatype::StringUcs4 => unimplemented!(), + Datatype::Any => unimplemented!(), + Datatype::DateTimeYear => unimplemented!(), + Datatype::DateTimeMonth => unimplemented!(), + Datatype::DateTimeWeek => unimplemented!(), + Datatype::DateTimeDay => unimplemented!(), + Datatype::DateTimeHour => unimplemented!(), + Datatype::DateTimeMinute => unimplemented!(), + Datatype::DateTimeSecond => unimplemented!(), + Datatype::DateTimeMillisecond => unimplemented!(), + Datatype::DateTimeMicrosecond => unimplemented!(), + Datatype::DateTimeNanosecond => unimplemented!(), + Datatype::DateTimePicosecond => unimplemented!(), + Datatype::DateTimeFemtosecond => unimplemented!(), + Datatype::DateTimeAttosecond => unimplemented!(), + Datatype::TimeHour => unimplemented!(), + Datatype::TimeMinute => unimplemented!(), + Datatype::TimeSecond => unimplemented!(), + Datatype::TimeMillisecond => unimplemented!(), + Datatype::TimeMicrosecond => unimplemented!(), + Datatype::TimeNanosecond => unimplemented!(), + Datatype::TimePicosecond => unimplemented!(), + Datatype::TimeFemtosecond => unimplemented!(), + Datatype::TimeAttosecond => unimplemented!(), + Datatype::Blob => unimplemented!(), + Datatype::Boolean => unimplemented!(), + Datatype::GeometryWkb => unimplemented!(), + Datatype::GeometryWkt => unimplemented!(), + } + }}; } From fdf52c865e5861d39208ce5e14a3b8cec20fa188 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 16:12:25 -0400 Subject: [PATCH 09/13] Add arbitrary Dimension generator --- tiledb/test/Cargo.toml | 1 + tiledb/test/src/attribute.rs | 3 +- tiledb/test/src/datatype.rs | 17 ++++++ tiledb/test/src/dimension.rs | 101 +++++++++++++++++++++++++++++++++++ tiledb/test/src/lib.rs | 2 + 5 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 tiledb/test/src/dimension.rs diff --git a/tiledb/test/Cargo.toml b/tiledb/test/Cargo.toml index 51b21bc7..7e5d2e30 100644 --- a/tiledb/test/Cargo.toml +++ b/tiledb/test/Cargo.toml @@ -8,5 +8,6 @@ name = "tiledb_test" path = "src/lib.rs" [dependencies] +num-traits = "0.2" proptest = "1.0.0" tiledb = { workspace = true } diff --git a/tiledb/test/src/attribute.rs b/tiledb/test/src/attribute.rs index bb3e402f..d46e6ef8 100644 --- a/tiledb/test/src/attribute.rs +++ b/tiledb/test/src/attribute.rs @@ -19,8 +19,9 @@ pub fn arbitrary(context: &Context) -> impl Strategy { mod tests { use super::*; + /// Test that the arbitrary attribute construction always succeeds #[test] - fn attribute_alloc() { + fn attribute_arbitrary() { let ctx = Context::new().expect("Error creating context"); proptest!(|(_ in arbitrary(&ctx))| {}); diff --git a/tiledb/test/src/datatype.rs b/tiledb/test/src/datatype.rs index 0398b3d0..edfaaace 100644 --- a/tiledb/test/src/datatype.rs +++ b/tiledb/test/src/datatype.rs @@ -48,3 +48,20 @@ pub fn arbitrary() -> impl Strategy { Just(tiledb::Datatype::GeometryWkt), ] } + +/// Choose an arbitrary datatype which satisifes the CAPIConverter trait +// TODO: make sure to keep this list up to date as we add more types +pub fn arbitrary_conv() -> impl Strategy { + prop_oneof![ + Just(tiledb::Datatype::Int8), + Just(tiledb::Datatype::Int16), + Just(tiledb::Datatype::Int32), + Just(tiledb::Datatype::Int64), + Just(tiledb::Datatype::UInt8), + Just(tiledb::Datatype::UInt16), + Just(tiledb::Datatype::UInt32), + Just(tiledb::Datatype::UInt64), + Just(tiledb::Datatype::Float32), + Just(tiledb::Datatype::Float64), + ] +} diff --git a/tiledb/test/src/dimension.rs b/tiledb/test/src/dimension.rs new file mode 100644 index 00000000..d43124ad --- /dev/null +++ b/tiledb/test/src/dimension.rs @@ -0,0 +1,101 @@ +use num_traits::{Bounded, Num}; +use proptest::prelude::*; +use std::fmt::Debug; +use tiledb::array::{Dimension, DimensionBuilder}; +use tiledb::context::Context; +use tiledb::fn_typed; +use tiledb::Result as TileDBResult; + +use crate::strategy::LifetimeBoundStrategy; + +fn arbitrary_range_and_extent() -> impl Strategy +where + T: Num + + Bounded + + Clone + + Copy + + Debug + + std::fmt::Display + + PartialOrd + + std::ops::Sub + + 'static, + std::ops::Range: Strategy, +{ + let one = ::one(); + let lower_limit = ::min_value(); + let upper_limit = ::max_value(); + std::ops::Range:: { + start: lower_limit + one + one, + end: upper_limit - one, + } + .prop_flat_map(move |upper_bound| { + ( + std::ops::Range:: { + start: lower_limit + one, + end: upper_bound - one, + }, + Just(upper_bound), + ) + }) + .prop_flat_map(move |(lower_bound, upper_bound)| { + ( + Just([lower_bound, upper_bound]), + std::ops::Range:: { + start: one, + end: { + let zero = ::zero(); + let extent_limit = if lower_bound >= zero { + upper_bound - lower_bound + } else if upper_bound >= zero { + if upper_limit + lower_bound > upper_bound { + upper_bound - lower_bound + } else { + upper_limit - upper_bound + } + } else { + upper_bound - lower_bound + }; + + let extent_bound = + if upper_limit - extent_limit < upper_bound { + upper_limit - upper_bound + } else { + extent_limit + }; + extent_bound + }, + }, + ) + }) +} + +pub fn arbitrary<'ctx>( + context: &'ctx Context, +) -> impl Strategy> { + ( + crate::datatype::arbitrary_conv(), + crate::attribute::arbitrary_name(), + ) + .prop_flat_map(|(dt, name)| { + fn_typed!(arbitrary_range_and_extent, dt => + (Just(dt), Just(name), arbitrary_range_and_extent).prop_map(|(dt, name, values)| { + DimensionBuilder::new(context, name.as_ref(), dt, &values.0, &values.1) + .map(|b| b.build()) + }).bind()) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Test that the arbitrary attribute construction always succeeds + #[test] + fn dimension_arbitrary() { + let ctx = Context::new().expect("Error creating context"); + + proptest!(|(maybe_dimension in arbitrary(&ctx))| { + maybe_dimension.expect("Error constructing arbitrary dimension"); + }); + } +} diff --git a/tiledb/test/src/lib.rs b/tiledb/test/src/lib.rs index 298be6e3..6e2eb1b7 100644 --- a/tiledb/test/src/lib.rs +++ b/tiledb/test/src/lib.rs @@ -1,6 +1,8 @@ +extern crate num_traits; extern crate proptest; extern crate tiledb; pub mod attribute; pub mod datatype; +pub mod dimension; pub mod strategy; From f7513c2eee1f61dc70df0d04985f290cf6cdc03e Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 16:25:20 -0400 Subject: [PATCH 10/13] Clippy --- Cargo.lock | 3 ++- tiledb/api/src/array/domain.rs | 13 +++++-------- tiledb/test/src/dimension.rs | 16 +++++++--------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0a26a5f..88efc139 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1010,9 +1010,9 @@ name = "tiledb" version = "0.1.0" dependencies = [ "proptest", + "serde_json", "tempdir", "tiledb-sys", - "tiledb-test", ] [[package]] @@ -1034,6 +1034,7 @@ version = "0.1.0" name = "tiledb-test" version = "0.1.0" dependencies = [ + "num-traits", "proptest", "tiledb", ] diff --git a/tiledb/api/src/array/domain.rs b/tiledb/api/src/array/domain.rs index abe98e13..37302888 100644 --- a/tiledb/api/src/array/domain.rs +++ b/tiledb/api/src/array/domain.rs @@ -89,13 +89,8 @@ impl<'ctx> Domain<'ctx> { impl<'ctx> Debug for Domain<'ctx> { fn fmt(&self, f: &mut Formatter) -> FmtResult { - let mut json = json!({ - "raw": format!("{:p}", *self.raw) - /* TODO: what other fields? */ - }); - - json["dimensions"] = serde_json::value::Value::Array( - (0..self.ndim()) + let json = json!({ + "dimensions": (0..self.ndim()) .map(|d| { serde_json::value::Value::String(match self.dimension(d) { Ok(d) => format!("{:?}", d), @@ -103,7 +98,9 @@ impl<'ctx> Debug for Domain<'ctx> { }) }) .collect::>(), - ); + "raw": format!("{:p}", *self.raw) + /* TODO: what other fields? */ + }); write!(f, "{}", json) } diff --git a/tiledb/test/src/dimension.rs b/tiledb/test/src/dimension.rs index d43124ad..35a0224d 100644 --- a/tiledb/test/src/dimension.rs +++ b/tiledb/test/src/dimension.rs @@ -56,21 +56,19 @@ where upper_bound - lower_bound }; - let extent_bound = - if upper_limit - extent_limit < upper_bound { - upper_limit - upper_bound - } else { - extent_limit - }; - extent_bound + if upper_limit - extent_limit < upper_bound { + upper_limit - upper_bound + } else { + extent_limit + } }, }, ) }) } -pub fn arbitrary<'ctx>( - context: &'ctx Context, +pub fn arbitrary( + context: &Context, ) -> impl Strategy> { ( crate::datatype::arbitrary_conv(), From a6f1318228b38b2b98d3b4e76adda5fa04296f52 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 16:41:55 -0400 Subject: [PATCH 11/13] Fix fn_typed macro arguments --- tiledb/api/src/datatype.rs | 40 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tiledb/api/src/datatype.rs b/tiledb/api/src/datatype.rs index 47070e9b..4253f4c7 100644 --- a/tiledb/api/src/datatype.rs +++ b/tiledb/api/src/datatype.rs @@ -4,43 +4,43 @@ macro_rules! fn_typed { type Datatype = $crate::Datatype; match $datatype { Datatype::Int8 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::Int16 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::Int32 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::Int64 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::UInt8 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::UInt16 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::UInt32 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::UInt64 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::Float32 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::Float64 => { - let $func = $func::(); + let $func = $func::($($arg,)*); $then } Datatype::Char => unimplemented!(), @@ -83,43 +83,43 @@ macro_rules! fn_typed { type Datatype = $crate::Datatype; match $datatype { Datatype::Int8 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::Int16 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::Int32 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::Int64 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::UInt8 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::UInt16 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::UInt32 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::UInt64 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::Float32 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::Float64 => { - let $func = $obj.$func::(); + let $func = $obj.$func::($($arg,)*); $then } Datatype::Char => unimplemented!(), From 1838b21f1a74a29477df0e63aaf8c5aa0a94a249 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 17:13:15 -0400 Subject: [PATCH 12/13] Fix arbitrary_range_and_extent and add comments --- tiledb/test/src/dimension.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tiledb/test/src/dimension.rs b/tiledb/test/src/dimension.rs index 35a0224d..1756aed2 100644 --- a/tiledb/test/src/dimension.rs +++ b/tiledb/test/src/dimension.rs @@ -8,6 +8,9 @@ use tiledb::Result as TileDBResult; use crate::strategy::LifetimeBoundStrategy; +/// Construct a strategy to generate valid (domain, extent) pairs. +/// A valid output satisfies +/// `lower < lower + extent <= upper < upper + extent <= type_limit`. fn arbitrary_range_and_extent() -> impl Strategy where T: Num @@ -21,18 +24,23 @@ where + 'static, std::ops::Range: Strategy, { + /* + * First generate the upper bound. + * Then generate the lower bound. + * Then generate the extent. + */ let one = ::one(); let lower_limit = ::min_value(); let upper_limit = ::max_value(); std::ops::Range:: { - start: lower_limit + one + one, - end: upper_limit - one, + start: lower_limit + one + one + one, // Needs this much space for lower bound + end: upper_limit - one, // The extent is at least one, so we cannot match the upper limit } .prop_flat_map(move |upper_bound| { ( std::ops::Range:: { start: lower_limit + one, - end: upper_bound - one, + end: upper_bound - one, // extent is at least one, cannot match upper bound }, Just(upper_bound), ) From 604fb76284db37e3f71bfc242f72d55f12947aac Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 20 Mar 2024 17:17:24 -0400 Subject: [PATCH 13/13] Review comments --- tiledb/api/src/array/dimension.rs | 8 ++++++-- tiledb/api/src/datatype.rs | 9 +++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tiledb/api/src/array/dimension.rs b/tiledb/api/src/array/dimension.rs index bef16c3d..1b1c695e 100644 --- a/tiledb/api/src/array/dimension.rs +++ b/tiledb/api/src/array/dimension.rs @@ -1,7 +1,8 @@ -use serde_json::json; use std::fmt::{Debug, Formatter, Result as FmtResult}; use std::ops::Deref; +use serde_json::json; + use crate::context::Context; use crate::convert::CAPIConverter; use crate::filter_list::FilterList; @@ -107,7 +108,10 @@ impl<'ctx> Debug for Dimension<'ctx> { fn fmt(&self, f: &mut Formatter) -> FmtResult { let json = json!({ "datatype": format!("{}", self.datatype()), - "domain": fn_typed!(self.domain, self.datatype() => match domain { Ok(x) => format!("{:?}", x), Err(e) => format!("<{}>", e) }), + "domain": fn_typed!(self.domain, self.datatype() => match domain { + Ok(x) => format!("{:?}", x), + Err(e) => format!("<{}>", e) + }), /* TODO: filters */ "raw": format!("{:p}", *self.raw) }); diff --git a/tiledb/api/src/datatype.rs b/tiledb/api/src/datatype.rs index 4253f4c7..43bffc58 100644 --- a/tiledb/api/src/datatype.rs +++ b/tiledb/api/src/datatype.rs @@ -1,4 +1,13 @@ #[macro_export] + +/// Apply a generic function `$func` to data which implements `$datatype` and then run +/// the expression `$then` on the result. +/// The `$then` expression may use the function name as an identifier for the function result. + +// note to developers: this is mimicking the C++ code +// template +// inline auto apply_with_type(Fn&& f, Datatype type, Args&&... args) +// macro_rules! fn_typed { ($func:ident, $datatype:expr$(, $arg:expr)* => $then:expr) => {{ type Datatype = $crate::Datatype;