Skip to content

Commit

Permalink
Initial wrappers for Enumerations
Browse files Browse the repository at this point in the history
  • Loading branch information
davisp committed Mar 23, 2024
1 parent d1b95d1 commit c67ef8b
Show file tree
Hide file tree
Showing 10 changed files with 761 additions and 14 deletions.
387 changes: 387 additions & 0 deletions tiledb/api/src/array/enumeration.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,387 @@
use std::fmt::{self, Debug, Formatter, Result as FmtResult};
use std::ops::Deref;

use serde_json::json;

use crate::column::Column;
use crate::context::Context;
use crate::string::{RawTDBString, TDBString};
use crate::Datatype;
use crate::Result as TileDBResult;

pub(crate) enum RawEnumeration {
Owned(*mut ffi::tiledb_enumeration_t),
}

impl Deref for RawEnumeration {
type Target = *mut ffi::tiledb_enumeration_t;
fn deref(&self) -> &Self::Target {
let RawEnumeration::Owned(ref ffi) = *self;
ffi
}
}

impl Drop for RawEnumeration {
fn drop(&mut self) {
let RawEnumeration::Owned(ref mut ffi) = *self;
unsafe {
ffi::tiledb_enumeration_free(ffi);
}
}
}

pub struct Enumeration<'ctx> {
pub(crate) context: &'ctx Context,
pub(crate) raw: RawEnumeration,
}

impl<'ctx> Enumeration<'ctx> {
pub(crate) fn capi(&self) -> *mut ffi::tiledb_enumeration_t {
*self.raw
}

pub fn name(&self) -> TileDBResult<String> {
let mut c_str: *mut ffi::tiledb_string_t = out_ptr!();
let res = unsafe {
ffi::tiledb_enumeration_get_name(
self.context.capi(),
self.capi(),
&mut c_str,
)
};
if res == ffi::TILEDB_OK {
let tdb_str = TDBString {
raw: RawTDBString::Owned(c_str),
};
tdb_str.to_string()
} else {
Err(self.context.expect_last_error())
}
}

pub fn datatype(&self) -> TileDBResult<Datatype> {
let mut dtype: ffi::tiledb_datatype_t = 0;
let res = unsafe {
ffi::tiledb_enumeration_get_type(
self.context.capi(),
self.capi(),
&mut dtype,
)
};
if res == ffi::TILEDB_OK {
Ok(Datatype::from_capi_enum(dtype))
} else {
Err(self.context.expect_last_error())
}
}

pub fn cell_val_num(&self) -> TileDBResult<u32> {
let mut c_cvn: u32 = 0;
let res = unsafe {
ffi::tiledb_enumeration_get_cell_val_num(
self.context.capi(),
self.capi(),
&mut c_cvn,
)
};
if res == ffi::TILEDB_OK {
Ok(c_cvn)
} else {
Err(self.context.expect_last_error())
}
}

pub fn is_var_sized(&self) -> TileDBResult<bool> {
Ok(self.cell_val_num()? == u32::MAX)
}

pub fn ordered(&self) -> TileDBResult<bool> {
let mut c_ordered: i32 = 0;
let res = unsafe {
ffi::tiledb_enumeration_get_ordered(
self.context.capi(),
self.capi(),
&mut c_ordered,
)
};
if res == ffi::TILEDB_OK {
Ok(c_ordered != 0)
} else {
Err(self.context.expect_last_error())
}
}

pub fn extend(&self, column: Column) -> TileDBResult<Enumeration<'ctx>> {
let mut c_new_enmr: *mut ffi::tiledb_enumeration_t = out_ptr!();

// Rust semantics require that slice pointers aren't nullptr so that
// nullptr can be used to distinguish between Some and None. The stdlib
// empty slices all appear to return 0x1 which is mentioned in the docs
// as a valid strategy. For our situation, we just use a zero length to
// indicate when we should pass nullptr.
let offsets_ptr = if column.offsets().is_empty() {
std::ptr::null_mut()
} else {
column.offsets().as_ptr()
};

// An important note here is that the Enumeration allocator copies the
// contents of data of offsets rather than assumes ownership. That
// means this is safe as those bytes are guaranteed to be alive until
// we drop self at the end of this method after returning from
// tiledb_enumeration_alloc.
let res = unsafe {
ffi::tiledb_enumeration_extend(
self.context.capi(),
self.capi(),
column.data().as_ptr() as *const std::ffi::c_void,
column.data().len() as u64,
offsets_ptr as *const std::ffi::c_void,
column.offsets().len() as u64,
&mut c_new_enmr,
)
};

if res == ffi::TILEDB_OK {
Ok(Enumeration {
context: self.context,
raw: RawEnumeration::Owned(c_new_enmr),
})
} else {
Err(self.context.expect_last_error())
}
}
}

impl<'ctx> Debug for Enumeration<'ctx> {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
let name = self.name().map_err(|_| fmt::Error)?;
let dtype = self
.datatype()
.map_err(|_| fmt::Error)?
.to_string()
.unwrap_or("<unknown datatype>".to_owned());
let cell_val_num = self.cell_val_num().map_err(|_| fmt::Error)?;
let ordered = self.ordered().map_err(|_| fmt::Error)?;

// TODO: Add enumeration values display.

let json = json!({
"name": name,
"datatype": dtype,
"cell_val_num": cell_val_num,
"ordered": ordered
});
write!(f, "{}", json)
}
}

impl<'c1, 'c2> PartialEq<Enumeration<'c2>> for Enumeration<'c1> {
fn eq(&self, other: &Enumeration<'c2>) -> bool {
let names_match = match (self.name(), other.name()) {
(Ok(mine), Ok(theirs)) => mine == theirs,
_ => false,
};
if !names_match {
return false;
}

let types_match = match (self.datatype(), other.datatype()) {
(Ok(mine), Ok(theirs)) => mine == theirs,
_ => false,
};
if !types_match {
return false;
}

let cell_val_num_match =
match (self.cell_val_num(), other.cell_val_num()) {
(Ok(mine), Ok(theirs)) => mine == theirs,
_ => false,
};
if !cell_val_num_match {
return false;
}

let ordered_match = match (self.ordered(), other.ordered()) {
(Ok(mine), Ok(theirs)) => mine == theirs,
_ => false,
};
if !ordered_match {
return false;
}

// TODO: Match data and offsets

true
}
}

pub struct Builder<'ctx> {
context: &'ctx Context,
name: String,
dtype: Datatype,
cell_val_num: u32,
ordered: bool,
}

impl<'ctx> Builder<'ctx> {
pub fn new(context: &'ctx Context, name: &str, dtype: Datatype) -> Self {
Builder {
context,
name: name.to_owned(),
dtype,
cell_val_num: 1,
ordered: false,
}
}

pub fn cell_val_num(self, cell_val_num: u32) -> Self {
Self {
cell_val_num,
..self
}
}

pub fn var_sized(self) -> Self {
Self {
cell_val_num: u32::MAX,
..self
}
}

pub fn ordered(self, ordered: bool) -> Self {
Self { ordered, ..self }
}

pub fn build(self, column: Column) -> TileDBResult<Enumeration<'ctx>> {
let mut c_enmr: *mut ffi::tiledb_enumeration_t = out_ptr!();
let name_bytes = self.name.as_bytes();
let c_name = cstring!(name_bytes);
let c_dtype = self.dtype.capi_enum();

// Rust semantics require that slice pointers aren't nullptr so that
// nullptr can be used to distinguish between Some and None. The stdlib
// empty slices all appear to return 0x1 which is mentioned in the docs
// as a valid strategy. For our situation, we just use a zero length to
// indicate when we should pass nullptr.
let offsets_ptr = if column.offsets().is_empty() {
std::ptr::null_mut()
} else {
column.offsets().as_ptr()
};

// An important note here is that the Enumeration allocator copies the
// contents of data of offsets rather than assumes ownership. That
// means this is safe as those bytes are guaranteed to be alive until
// we drop self at the end of this method after returning from
// tiledb_enumeration_alloc.
let res = unsafe {
ffi::tiledb_enumeration_alloc(
self.context.capi(),
c_name.as_c_str().as_ptr(),
c_dtype,
self.cell_val_num,
if self.ordered { 1 } else { 0 },
column.data().as_ptr() as *const std::ffi::c_void,
column.data().len() as u64,
offsets_ptr as *const std::ffi::c_void,
column.offsets().len() as u64,
&mut c_enmr,
)
};

if res == ffi::TILEDB_OK {
Ok(Enumeration {
context: self.context,
raw: RawEnumeration::Owned(c_enmr),
})
} else {
Err(self.context.expect_last_error())
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::column::AsColumn;

#[test]
fn basic_build() -> TileDBResult<()> {
let ctx = Context::new().expect("Error creating context instance.");
let enmr = Builder::new(&ctx, "foo", Datatype::Int32)
.build(vec![0, 1, 2, 3, 4].as_column())
.expect("Error building enumeration.");

assert_eq!(enmr.name()?, "foo");
assert_eq!(enmr.datatype()?, Datatype::Int32);
assert_eq!(enmr.cell_val_num()?, 1);
assert!(!enmr.ordered()?);

Ok(())
}

#[test]
fn var_sized_error_build() -> TileDBResult<()> {
let ctx = Context::new().expect("Error creating context instance.");
let enmr_res = Builder::new(&ctx, "foo", Datatype::Int32)
.var_sized()
.build(vec![0u8, 1, 2, 3, 4].as_column());

assert!(enmr_res.is_err());

Ok(())
}

#[test]
fn ordered_build() -> TileDBResult<()> {
let ctx = Context::new().expect("Error creating context instance.");
let enmr = Builder::new(&ctx, "foo", Datatype::Int32)
.ordered(true)
.build(vec![0, 1, 2, 3, 4].as_column())
.expect("Error building enumeration.");

assert_eq!(enmr.name()?, "foo");
assert_eq!(enmr.datatype()?, Datatype::Int32);
assert_eq!(enmr.cell_val_num()?, 1);
assert!(enmr.ordered()?);

Ok(())
}

#[test]
fn string_build() -> TileDBResult<()> {
let ctx = Context::new().expect("Error creating context instance.");
let enmr = Builder::new(&ctx, "foo", Datatype::StringAscii)
.var_sized()
.build(vec!["foo", "bar", "baz", "bam", "mam"].as_column())
.expect("Error building enumeration.");

assert_eq!(enmr.name()?, "foo");
assert_eq!(enmr.datatype()?, Datatype::StringAscii);
assert_eq!(enmr.cell_val_num()?, u32::MAX);
assert!(!enmr.ordered()?);

Ok(())
}

#[test]
fn extend_enumeration() -> TileDBResult<()> {
let ctx = Context::new().expect("Error creating context instance.");
let enmr1 = Builder::new(&ctx, "foo", Datatype::Int32)
.build(vec![1, 2, 3, 4, 5].as_column())
.expect("Error building enumeration.");

let enmr2 = enmr1
.extend(vec![6, 7, 8, 9, 10].as_column())
.expect("Error extending enumeration.");

assert_eq!(enmr1.name()?, enmr2.name()?);
assert_eq!(enmr1.datatype()?, enmr2.datatype()?);
assert_eq!(enmr1.cell_val_num()?, enmr2.cell_val_num()?);
assert_eq!(enmr1.ordered()?, enmr2.ordered()?);

Ok(())
}
}
Loading

0 comments on commit c67ef8b

Please sign in to comment.