Initial wrappers for Enumerations

TileDB-Inc · Mar 23, 2024 · c67ef8b · c67ef8b
1 parent d1b95d1
commit c67ef8b
Show file tree

Hide file tree

Showing 10 changed files with 761 additions and 14 deletions.
diff --git a/tiledb/api/src/array/enumeration.rs b/tiledb/api/src/array/enumeration.rs
@@ -0,0 +1,387 @@
+use std::fmt::{self, Debug, Formatter, Result as FmtResult};
+use std::ops::Deref;
+
+use serde_json::json;
+
+use crate::column::Column;
+use crate::context::Context;
+use crate::string::{RawTDBString, TDBString};
+use crate::Datatype;
+use crate::Result as TileDBResult;
+
+pub(crate) enum RawEnumeration {
+    Owned(*mut ffi::tiledb_enumeration_t),
+}
+
+impl Deref for RawEnumeration {
+    type Target = *mut ffi::tiledb_enumeration_t;
+    fn deref(&self) -> &Self::Target {
+        let RawEnumeration::Owned(ref ffi) = *self;
+        ffi
+    }
+}
+
+impl Drop for RawEnumeration {
+    fn drop(&mut self) {
+        let RawEnumeration::Owned(ref mut ffi) = *self;
+        unsafe {
+            ffi::tiledb_enumeration_free(ffi);
+        }
+    }
+}
+
+pub struct Enumeration<'ctx> {
+    pub(crate) context: &'ctx Context,
+    pub(crate) raw: RawEnumeration,
+}
+
+impl<'ctx> Enumeration<'ctx> {
+    pub(crate) fn capi(&self) -> *mut ffi::tiledb_enumeration_t {
+        *self.raw
+    }
+
+    pub fn name(&self) -> TileDBResult<String> {
+        let mut c_str: *mut ffi::tiledb_string_t = out_ptr!();
+        let res = unsafe {
+            ffi::tiledb_enumeration_get_name(
+                self.context.capi(),
+                self.capi(),
+                &mut c_str,
+            )
+        };
+        if res == ffi::TILEDB_OK {
+            let tdb_str = TDBString {
+                raw: RawTDBString::Owned(c_str),
+            };
+            tdb_str.to_string()
+        } else {
+            Err(self.context.expect_last_error())
+        }
+    }
+
+    pub fn datatype(&self) -> TileDBResult<Datatype> {
+        let mut dtype: ffi::tiledb_datatype_t = 0;
+        let res = unsafe {
+            ffi::tiledb_enumeration_get_type(
+                self.context.capi(),
+                self.capi(),
+                &mut dtype,
+            )
+        };
+        if res == ffi::TILEDB_OK {
+            Ok(Datatype::from_capi_enum(dtype))
+        } else {
+            Err(self.context.expect_last_error())
+        }
+    }
+
+    pub fn cell_val_num(&self) -> TileDBResult<u32> {
+        let mut c_cvn: u32 = 0;
+        let res = unsafe {
+            ffi::tiledb_enumeration_get_cell_val_num(
+                self.context.capi(),
+                self.capi(),
+                &mut c_cvn,
+            )
+        };
+        if res == ffi::TILEDB_OK {
+            Ok(c_cvn)
+        } else {
+            Err(self.context.expect_last_error())
+        }
+    }
+
+    pub fn is_var_sized(&self) -> TileDBResult<bool> {
+        Ok(self.cell_val_num()? == u32::MAX)
+    }
+
+    pub fn ordered(&self) -> TileDBResult<bool> {
+        let mut c_ordered: i32 = 0;
+        let res = unsafe {
+            ffi::tiledb_enumeration_get_ordered(
+                self.context.capi(),
+                self.capi(),
+                &mut c_ordered,
+            )
+        };
+        if res == ffi::TILEDB_OK {
+            Ok(c_ordered != 0)
+        } else {
+            Err(self.context.expect_last_error())
+        }
+    }
+
+    pub fn extend(&self, column: Column) -> TileDBResult<Enumeration<'ctx>> {
+        let mut c_new_enmr: *mut ffi::tiledb_enumeration_t = out_ptr!();
+
+        // Rust semantics require that slice pointers aren't nullptr so that
+        // nullptr can be used to distinguish between Some and None. The stdlib
+        // empty slices all appear to return 0x1 which is mentioned in the docs
+        // as a valid strategy. For our situation, we just use a zero length to
+        // indicate when we should pass nullptr.
+        let offsets_ptr = if column.offsets().is_empty() {
+            std::ptr::null_mut()
+        } else {
+            column.offsets().as_ptr()
+        };
+
+        // An important note here is that the Enumeration allocator copies the
+        // contents of data of offsets rather than assumes ownership. That
+        // means this is safe as those bytes are guaranteed to be alive until
+        // we drop self at the end of this method after returning from
+        // tiledb_enumeration_alloc.
+        let res = unsafe {
+            ffi::tiledb_enumeration_extend(
+                self.context.capi(),
+                self.capi(),
+                column.data().as_ptr() as *const std::ffi::c_void,
+                column.data().len() as u64,
+                offsets_ptr as *const std::ffi::c_void,
+                column.offsets().len() as u64,
+                &mut c_new_enmr,
+            )
+        };
+
+        if res == ffi::TILEDB_OK {
+            Ok(Enumeration {
+                context: self.context,
+                raw: RawEnumeration::Owned(c_new_enmr),
+            })
+        } else {
+            Err(self.context.expect_last_error())
+        }
+    }
+}
+
+impl<'ctx> Debug for Enumeration<'ctx> {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        let name = self.name().map_err(|_| fmt::Error)?;
+        let dtype = self
+            .datatype()
+            .map_err(|_| fmt::Error)?
+            .to_string()
+            .unwrap_or("<unknown datatype>".to_owned());
+        let cell_val_num = self.cell_val_num().map_err(|_| fmt::Error)?;
+        let ordered = self.ordered().map_err(|_| fmt::Error)?;
+
+        // TODO: Add enumeration values display.
+
+        let json = json!({
+            "name": name,
+            "datatype": dtype,
+            "cell_val_num": cell_val_num,
+            "ordered": ordered
+        });
+        write!(f, "{}", json)
+    }
+}
+
+impl<'c1, 'c2> PartialEq<Enumeration<'c2>> for Enumeration<'c1> {
+    fn eq(&self, other: &Enumeration<'c2>) -> bool {
+        let names_match = match (self.name(), other.name()) {
+            (Ok(mine), Ok(theirs)) => mine == theirs,
+            _ => false,
+        };
+        if !names_match {
+            return false;
+        }
+
+        let types_match = match (self.datatype(), other.datatype()) {
+            (Ok(mine), Ok(theirs)) => mine == theirs,
+            _ => false,
+        };
+        if !types_match {
+            return false;
+        }
+
+        let cell_val_num_match =
+            match (self.cell_val_num(), other.cell_val_num()) {
+                (Ok(mine), Ok(theirs)) => mine == theirs,
+                _ => false,
+            };
+        if !cell_val_num_match {
+            return false;
+        }
+
+        let ordered_match = match (self.ordered(), other.ordered()) {
+            (Ok(mine), Ok(theirs)) => mine == theirs,
+            _ => false,
+        };
+        if !ordered_match {
+            return false;
+        }
+
+        // TODO: Match data and offsets
+
+        true
+    }
+}
+
+pub struct Builder<'ctx> {
+    context: &'ctx Context,
+    name: String,
+    dtype: Datatype,
+    cell_val_num: u32,
+    ordered: bool,
+}
+
+impl<'ctx> Builder<'ctx> {
+    pub fn new(context: &'ctx Context, name: &str, dtype: Datatype) -> Self {
+        Builder {
+            context,
+            name: name.to_owned(),
+            dtype,
+            cell_val_num: 1,
+            ordered: false,
+        }
+    }
+
+    pub fn cell_val_num(self, cell_val_num: u32) -> Self {
+        Self {
+            cell_val_num,
+            ..self
+        }
+    }
+
+    pub fn var_sized(self) -> Self {
+        Self {
+            cell_val_num: u32::MAX,
+            ..self
+        }
+    }
+
+    pub fn ordered(self, ordered: bool) -> Self {
+        Self { ordered, ..self }
+    }
+
+    pub fn build(self, column: Column) -> TileDBResult<Enumeration<'ctx>> {
+        let mut c_enmr: *mut ffi::tiledb_enumeration_t = out_ptr!();
+        let name_bytes = self.name.as_bytes();
+        let c_name = cstring!(name_bytes);
+        let c_dtype = self.dtype.capi_enum();
+
+        // Rust semantics require that slice pointers aren't nullptr so that
+        // nullptr can be used to distinguish between Some and None. The stdlib
+        // empty slices all appear to return 0x1 which is mentioned in the docs
+        // as a valid strategy. For our situation, we just use a zero length to
+        // indicate when we should pass nullptr.
+        let offsets_ptr = if column.offsets().is_empty() {
+            std::ptr::null_mut()
+        } else {
+            column.offsets().as_ptr()
+        };
+
+        // An important note here is that the Enumeration allocator copies the
+        // contents of data of offsets rather than assumes ownership. That
+        // means this is safe as those bytes are guaranteed to be alive until
+        // we drop self at the end of this method after returning from
+        // tiledb_enumeration_alloc.
+        let res = unsafe {
+            ffi::tiledb_enumeration_alloc(
+                self.context.capi(),
+                c_name.as_c_str().as_ptr(),
+                c_dtype,
+                self.cell_val_num,
+                if self.ordered { 1 } else { 0 },
+                column.data().as_ptr() as *const std::ffi::c_void,
+                column.data().len() as u64,
+                offsets_ptr as *const std::ffi::c_void,
+                column.offsets().len() as u64,
+                &mut c_enmr,
+            )
+        };
+
+        if res == ffi::TILEDB_OK {
+            Ok(Enumeration {
+                context: self.context,
+                raw: RawEnumeration::Owned(c_enmr),
+            })
+        } else {
+            Err(self.context.expect_last_error())
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::column::AsColumn;
+
+    #[test]
+    fn basic_build() -> TileDBResult<()> {
+        let ctx = Context::new().expect("Error creating context instance.");
+        let enmr = Builder::new(&ctx, "foo", Datatype::Int32)
+            .build(vec![0, 1, 2, 3, 4].as_column())
+            .expect("Error building enumeration.");
+
+        assert_eq!(enmr.name()?, "foo");
+        assert_eq!(enmr.datatype()?, Datatype::Int32);
+        assert_eq!(enmr.cell_val_num()?, 1);
+        assert!(!enmr.ordered()?);
+
+        Ok(())
+    }
+
+    #[test]
+    fn var_sized_error_build() -> TileDBResult<()> {
+        let ctx = Context::new().expect("Error creating context instance.");
+        let enmr_res = Builder::new(&ctx, "foo", Datatype::Int32)
+            .var_sized()
+            .build(vec![0u8, 1, 2, 3, 4].as_column());
+
+        assert!(enmr_res.is_err());
+
+        Ok(())
+    }
+
+    #[test]
+    fn ordered_build() -> TileDBResult<()> {
+        let ctx = Context::new().expect("Error creating context instance.");
+        let enmr = Builder::new(&ctx, "foo", Datatype::Int32)
+            .ordered(true)
+            .build(vec![0, 1, 2, 3, 4].as_column())
+            .expect("Error building enumeration.");
+
+        assert_eq!(enmr.name()?, "foo");
+        assert_eq!(enmr.datatype()?, Datatype::Int32);
+        assert_eq!(enmr.cell_val_num()?, 1);
+        assert!(enmr.ordered()?);
+
+        Ok(())
+    }
+
+    #[test]
+    fn string_build() -> TileDBResult<()> {
+        let ctx = Context::new().expect("Error creating context instance.");
+        let enmr = Builder::new(&ctx, "foo", Datatype::StringAscii)
+            .var_sized()
+            .build(vec!["foo", "bar", "baz", "bam", "mam"].as_column())
+            .expect("Error building enumeration.");
+
+        assert_eq!(enmr.name()?, "foo");
+        assert_eq!(enmr.datatype()?, Datatype::StringAscii);
+        assert_eq!(enmr.cell_val_num()?, u32::MAX);
+        assert!(!enmr.ordered()?);
+
+        Ok(())
+    }
+
+    #[test]
+    fn extend_enumeration() -> TileDBResult<()> {
+        let ctx = Context::new().expect("Error creating context instance.");
+        let enmr1 = Builder::new(&ctx, "foo", Datatype::Int32)
+            .build(vec![1, 2, 3, 4, 5].as_column())
+            .expect("Error building enumeration.");
+
+        let enmr2 = enmr1
+            .extend(vec![6, 7, 8, 9, 10].as_column())
+            .expect("Error extending enumeration.");
+
+        assert_eq!(enmr1.name()?, enmr2.name()?);
+        assert_eq!(enmr1.datatype()?, enmr2.datatype()?);
+        assert_eq!(enmr1.cell_val_num()?, enmr2.cell_val_num()?);
+        assert_eq!(enmr1.ordered()?, enmr2.ordered()?);
+
+        Ok(())
+    }
+}