Skip to content

Commit

Permalink
feat: add more components to the wasm-pack compatible list (#8843)
Browse files Browse the repository at this point in the history
* feat: add datafusion-physical-plan to compatible list

Signed-off-by: Ruihang Xia <[email protected]>

* feat: add datafusion-execution to the list

Signed-off-by: Ruihang Xia <[email protected]>

* feat: add datafusion to the list

Signed-off-by: Ruihang Xia <[email protected]>

* fix: toml format

Signed-off-by: Ruihang Xia <[email protected]>

---------

Signed-off-by: Ruihang Xia <[email protected]>
  • Loading branch information
waynexia authored Jan 13, 2024
1 parent eb81ea2 commit 965f4bc
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 8 deletions.
7 changes: 4 additions & 3 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ path = "src/lib.rs"
# Used to enable the avro format
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
backtrace = ["datafusion-common/backtrace"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
crypto_expressions = ["datafusion-physical-expr/crypto_expressions", "datafusion-optimizer/crypto_expressions"]
default = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions", "compression", "parquet"]
encoding_expressions = ["datafusion-physical-expr/encoding_expressions"]
Expand Down Expand Up @@ -87,8 +87,8 @@ pin-project-lite = "^0.2.7"
rand = { workspace = true }
sqlparser = { workspace = true }
tempfile = { workspace = true }
tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
tokio-util = { version = "0.7.4", features = ["io"] }
tokio = { version = "1.28", features = ["macros", "rt", "sync"] }
tokio-util = { version = "0.7.4", features = ["io"], optional = true }
url = { workspace = true }
uuid = { version = "1.0", features = ["v4"] }
xz2 = { version = "0.1", optional = true }
Expand All @@ -113,6 +113,7 @@ rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
serde_json = { workspace = true }
test-utils = { path = "../../test-utils" }
thiserror = { workspace = true }
tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
tokio-postgres = "0.7.7"
[target.'cfg(not(target_os = "windows"))'.dev-dependencies]
nix = { version = "0.27.1", features = ["fs"] }
Expand Down
4 changes: 4 additions & 0 deletions datafusion/core/src/datasource/listing/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,14 @@ impl ListingTableUrl {
let s = s.as_ref();

// This is necessary to handle the case of a path starting with a drive letter
#[cfg(not(target_arch = "wasm32"))]
if std::path::Path::new(s).is_absolute() {
return Self::parse_path(s);
}

match Url::parse(s) {
Ok(url) => Self::try_new(url, None),
#[cfg(not(target_arch = "wasm32"))]
Err(url::ParseError::RelativeUrlWithoutBase) => Self::parse_path(s),
Err(e) => Err(DataFusionError::External(Box::new(e))),
}
Expand Down Expand Up @@ -146,6 +148,7 @@ impl ListingTableUrl {
}

/// Creates a new [`ListingTableUrl`] interpreting `s` as a filesystem path
#[cfg(not(target_arch = "wasm32"))]
fn parse_path(s: &str) -> Result<Self> {
let (path, glob) = match split_glob_expression(s) {
Some((prefix, glob)) => {
Expand Down Expand Up @@ -282,6 +285,7 @@ impl ListingTableUrl {
}

/// Creates a file URL from a potentially relative filesystem path
#[cfg(not(target_arch = "wasm32"))]
fn url_from_filesystem_path(s: &str) -> Option<Url> {
let path = std::path::Path::new(s);
let is_dir = match path.exists() {
Expand Down
11 changes: 10 additions & 1 deletion datafusion/execution/src/object_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
use dashmap::DashMap;
use datafusion_common::{exec_err, DataFusionError, Result};
#[cfg(not(target_arch = "wasm32"))]
use object_store::local::LocalFileSystem;
use object_store::ObjectStore;
use std::sync::Arc;
Expand Down Expand Up @@ -169,16 +170,24 @@ impl Default for DefaultObjectStoreRegistry {

impl DefaultObjectStoreRegistry {
/// This will register [`LocalFileSystem`] to handle `file://` paths
#[cfg(not(target_arch = "wasm32"))]
pub fn new() -> Self {
let object_stores: DashMap<String, Arc<dyn ObjectStore>> = DashMap::new();
object_stores.insert("file://".to_string(), Arc::new(LocalFileSystem::new()));
Self { object_stores }
}

/// Default without any backend registered.
#[cfg(target_arch = "wasm32")]
pub fn new() -> Self {
Self::default()
}
}

///
/// Stores are registered based on the scheme, host and port of the provided URL
/// with a [`LocalFileSystem::new`] automatically registered for `file://`
/// with a [`LocalFileSystem::new`] automatically registered for `file://` (if the
/// target arch is not `wasm32`).
///
/// For example:
///
Expand Down
15 changes: 12 additions & 3 deletions datafusion/physical-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ name = "datafusion_physical_plan"
path = "src/lib.rs"

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
ahash = { version = "0.8", default-features = false, features = [
"runtime-rng",
] }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
Expand All @@ -54,10 +56,17 @@ once_cell = "1.18.0"
parking_lot = { workspace = true }
pin-project-lite = "^0.2.7"
rand = { workspace = true }
tokio = { version = "1.28", features = ["sync", "fs", "parking_lot"] }
tokio = { version = "1.28", features = ["sync"] }
uuid = { version = "^1.2", features = ["v4"] }

[dev-dependencies]
rstest = { workspace = true }
termtree = "0.4.1"
tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
tokio = { version = "1.28", features = [
"macros",
"rt",
"rt-multi-thread",
"sync",
"fs",
"parking_lot",
] }
2 changes: 2 additions & 0 deletions datafusion/substrait/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ object_store = { workspace = true }
prost = "0.12"
prost-types = "0.12"
substrait = "0.22.1"

[dev-dependencies]
tokio = "1.17"

[features]
Expand Down
5 changes: 4 additions & 1 deletion datafusion/wasmtest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ authors = { workspace = true }
rust-version = "1.70"

[lib]
crate-type = ["cdylib", "rlib",]
crate-type = ["cdylib", "rlib"]

[dependencies]

Expand All @@ -37,11 +37,14 @@ crate-type = ["cdylib", "rlib",]
# all the `std::fmt` and `std::panicking` infrastructure, so isn't great for
# code size when deploying.
console_error_panic_hook = { version = "0.1.1", optional = true }
datafusion = { path = "../core", default-features = false }

datafusion-common = { workspace = true }
datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-optimizer = { workspace = true }
datafusion-physical-expr = { workspace = true }
datafusion-physical-plan = { workspace = true }
datafusion-sql = { workspace = true }

# getrandom must be compiled with js feature
Expand Down
3 changes: 3 additions & 0 deletions datafusion/wasmtest/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,13 @@ Then open http://localhost:8080/ in a web browser and check the console to see t

The following DataFusion crates are verified to work in a wasm-pack environment using the default `wasm32-unknown-unknown` target:

- `datafusion` (datafusion-core) with default-features disabled to remove `bzip2-sys` from `async-compression`
- `datafusion-common` with default-features disabled to remove the `parquet` dependency (see below)
- `datafusion-expr`
- `datafusion-execution`
- `datafusion-optimizer`
- `datafusion-physical-expr`
- `datafusion-physical-plan`
- `datafusion-sql`

The difficulty with getting the remaining DataFusion crates compiled to WASM is that they have non-optional dependencies on the [`parquet`](https://docs.rs/crate/parquet/) crate with its default features enabled. Several of the default parquet crate features require native dependencies that are not compatible with WASM, in particular the `lz4` and `zstd` features. If we can arrange our feature flags to make it possible to depend on parquet with these features disabled, then it should be possible to compile the core `datafusion` crate to WASM as well.

0 comments on commit 965f4bc

Please sign in to comment.