Skip to content

Commit

Permalink
Change to virtual workspace with tiledb-api crate, and add tiledb-arr…
Browse files Browse the repository at this point in the history
…ow crate (#8)

* Virtual workspace with tiledb-api, tiledb-sys crates

* Add tiledb-arrow crate

* arrow_type_physical with proptest

* Proper directory structure, all crates in 'tiledb'
  • Loading branch information
rroelke authored Mar 19, 2024
1 parent 35ff669 commit 78a073e
Show file tree
Hide file tree
Showing 50 changed files with 216 additions and 12 deletions.
18 changes: 10 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
[package]
name = "tiledb"
version = "0.1.0"
edition = "2021"
[workspace]
members = [ "tiledb/api", "tiledb/arrow", "tiledb/sys" ]
resolver = "2"

[dependencies]
tiledb-sys = {version = "0.1.0", path = "tiledb-sys" }
[workspace.package]
edition = "2021"
rust-version = "1.72"
version = "0.1.0"

[dev-dependencies]
tempdir = "0.3.7"
[workspace.dependencies]
tiledb = { path = "tiledb/api", version = "0.1.0" }
tiledb-sys = { path = "tiledb/sys", version = "0.1.0" }
4 changes: 0 additions & 4 deletions tiledb-sys/Cargo.toml

This file was deleted.

14 changes: 14 additions & 0 deletions tiledb/api/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "tiledb"
version = { workspace = true }
edition = { workspace = true }

[lib]
name = "tiledb"
path = "src/lib.rs"

[dependencies]
tiledb-sys = { workspace = true }

[dev-dependencies]
tempdir = "0.3.7"
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
12 changes: 12 additions & 0 deletions tiledb/arrow/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "tiledb-arrow"
version = { workspace = true }
edition = { workspace = true }

[dependencies]
arrow = { version = "50.0.0", features = ["prettyprint"] }
arrow-schema = { version = "50.0.0", default-features = false }
tiledb = { workspace = true }

[dev-dependencies]
proptest = "1.0.0"
168 changes: 168 additions & 0 deletions tiledb/arrow/src/datatype.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/// For a TileDB type, returns an Arrow type if the bits of the canonical input type match.
/// If this returns Some(arrow_dt), then values of arrow_dt can be used in functions which expect tdb_dt, and vice verse.
pub fn arrow_type_physical(
tdb_dt: &tiledb::Datatype,
) -> Option<arrow_schema::DataType> {
match *tdb_dt {
tiledb::Datatype::Int8 => Some(arrow_schema::DataType::Int8),
tiledb::Datatype::Int16 => Some(arrow_schema::DataType::Int16),
tiledb::Datatype::Int32 => Some(arrow_schema::DataType::Int32),
tiledb::Datatype::Int64 => Some(arrow_schema::DataType::Int64),
tiledb::Datatype::UInt8 => Some(arrow_schema::DataType::UInt8),
tiledb::Datatype::UInt16 => Some(arrow_schema::DataType::UInt16),
tiledb::Datatype::UInt32 => Some(arrow_schema::DataType::UInt32),
tiledb::Datatype::UInt64 => Some(arrow_schema::DataType::UInt64),
tiledb::Datatype::Float32 => Some(arrow_schema::DataType::Float32),
tiledb::Datatype::Float64 => Some(arrow_schema::DataType::Float64),
tiledb::Datatype::Char => None,
tiledb::Datatype::StringAscii => None,
tiledb::Datatype::StringUtf8 => None,
tiledb::Datatype::StringUtf16 => None,
tiledb::Datatype::StringUtf32 => None,
tiledb::Datatype::StringUcs2 => None,
tiledb::Datatype::StringUcs4 => None,
tiledb::Datatype::Any => None,
tiledb::Datatype::DateTimeYear => None,
tiledb::Datatype::DateTimeMonth => None,
tiledb::Datatype::DateTimeWeek => None,
tiledb::Datatype::DateTimeDay => None,
tiledb::Datatype::DateTimeHour => None,
tiledb::Datatype::DateTimeMinute => None,
tiledb::Datatype::DateTimeSecond => {
Some(arrow_schema::DataType::Timestamp(
arrow_schema::TimeUnit::Second,
None,
))
}
tiledb::Datatype::DateTimeMillisecond => {
Some(arrow_schema::DataType::Timestamp(
arrow_schema::TimeUnit::Millisecond,
None,
))
}
tiledb::Datatype::DateTimeMicrosecond => {
Some(arrow_schema::DataType::Timestamp(
arrow_schema::TimeUnit::Microsecond,
None,
))
}
tiledb::Datatype::DateTimeNanosecond => {
Some(arrow_schema::DataType::Timestamp(
arrow_schema::TimeUnit::Microsecond,
None,
))
}
tiledb::Datatype::DateTimePicosecond => None,
tiledb::Datatype::DateTimeFemtosecond => None,
tiledb::Datatype::DateTimeAttosecond => None,
tiledb::Datatype::TimeHour => None,
tiledb::Datatype::TimeMinute => None,
tiledb::Datatype::TimeSecond => None, // TODO: arrow type is 32 bits, is tiledb type?
tiledb::Datatype::TimeMillisecond => None,
tiledb::Datatype::TimeMicrosecond => Some(
arrow_schema::DataType::Time64(arrow_schema::TimeUnit::Microsecond),
),
tiledb::Datatype::TimeNanosecond => Some(
arrow_schema::DataType::Time64(arrow_schema::TimeUnit::Nanosecond),
),
tiledb::Datatype::TimePicosecond => None,
tiledb::Datatype::TimeFemtosecond => None,
tiledb::Datatype::TimeAttosecond => None,
tiledb::Datatype::Blob => None,
tiledb::Datatype::Boolean => None,
tiledb::Datatype::GeometryWkb => None,
tiledb::Datatype::GeometryWkt => None,
}
}

/// For an Arrow type, returns a TileDB type if the bits of the canonical input type match.
/// If this returns Some(tdb_t), then values for tdb_t can be used in functions which expect
/// arrow_dt and vice verse.
pub fn tiledb_type_physical(
arrow_dt: &arrow_schema::DataType,
) -> Option<tiledb::Datatype> {
match *arrow_dt {
arrow_schema::DataType::Int8 => Some(tiledb::Datatype::Int8),
arrow_schema::DataType::Int16 => Some(tiledb::Datatype::Int16),
arrow_schema::DataType::Int32 => Some(tiledb::Datatype::Int32),
arrow_schema::DataType::Int64 => Some(tiledb::Datatype::Int64),
arrow_schema::DataType::UInt8 => Some(tiledb::Datatype::UInt8),
arrow_schema::DataType::UInt16 => Some(tiledb::Datatype::UInt16),
arrow_schema::DataType::UInt32 => Some(tiledb::Datatype::UInt32),
arrow_schema::DataType::UInt64 => Some(tiledb::Datatype::UInt64),
arrow_schema::DataType::Float32 => Some(tiledb::Datatype::Float32),
arrow_schema::DataType::Float64 => Some(tiledb::Datatype::Float64),
_ => None, // TODO
}
}

#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;

fn datatype_strategy() -> impl Strategy<Value = tiledb::Datatype> {
prop_oneof![
Just(tiledb::Datatype::Int8),
Just(tiledb::Datatype::Int16),
Just(tiledb::Datatype::Int32),
Just(tiledb::Datatype::Int64),
Just(tiledb::Datatype::UInt8),
Just(tiledb::Datatype::UInt16),
Just(tiledb::Datatype::UInt32),
Just(tiledb::Datatype::UInt64),
Just(tiledb::Datatype::Float32),
Just(tiledb::Datatype::Float64),
Just(tiledb::Datatype::Char),
Just(tiledb::Datatype::StringAscii),
Just(tiledb::Datatype::StringUtf8),
Just(tiledb::Datatype::StringUtf16),
Just(tiledb::Datatype::StringUtf32),
Just(tiledb::Datatype::StringUcs2),
Just(tiledb::Datatype::StringUcs4),
Just(tiledb::Datatype::Any),
Just(tiledb::Datatype::DateTimeYear),
Just(tiledb::Datatype::DateTimeMonth),
Just(tiledb::Datatype::DateTimeWeek),
Just(tiledb::Datatype::DateTimeDay),
Just(tiledb::Datatype::DateTimeHour),
Just(tiledb::Datatype::DateTimeMinute),
Just(tiledb::Datatype::DateTimeSecond),
Just(tiledb::Datatype::DateTimeMillisecond),
Just(tiledb::Datatype::DateTimeMicrosecond),
Just(tiledb::Datatype::DateTimeNanosecond),
Just(tiledb::Datatype::DateTimePicosecond),
Just(tiledb::Datatype::DateTimeFemtosecond),
Just(tiledb::Datatype::DateTimeAttosecond),
Just(tiledb::Datatype::TimeHour),
Just(tiledb::Datatype::TimeMinute),
Just(tiledb::Datatype::TimeSecond),
Just(tiledb::Datatype::TimeMillisecond),
Just(tiledb::Datatype::TimeMicrosecond),
Just(tiledb::Datatype::TimeNanosecond),
Just(tiledb::Datatype::TimePicosecond),
Just(tiledb::Datatype::TimeFemtosecond),
Just(tiledb::Datatype::TimeAttosecond),
Just(tiledb::Datatype::Blob),
Just(tiledb::Datatype::Boolean),
Just(tiledb::Datatype::GeometryWkb),
Just(tiledb::Datatype::GeometryWkt),
]
}

proptest! {
#[test]
fn test_physical(tdb_dt in datatype_strategy()) {
if let Some(arrow_dt) = arrow_type_physical(&tdb_dt) {
if let Some(adt_width) = arrow_dt.primitive_width() {
let tdb_width : usize = tdb_dt.size().try_into().unwrap();
assert_eq!(adt_width, tdb_width);
} else {
// TODO: assert that `tdb_dt` is variable-length
}

// TODO: invertibility
}
}
}
}
8 changes: 8 additions & 0 deletions tiledb/arrow/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
extern crate arrow;
extern crate arrow_schema;
extern crate tiledb;

#[cfg(test)]
extern crate proptest;

pub mod datatype;
File renamed without changes.
4 changes: 4 additions & 0 deletions tiledb/sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[package]
name = "tiledb-sys"
version = { workspace = true }
edition = { workspace = true }
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 78a073e

Please sign in to comment.