From 965f4bcd54ed8f95c0b9d18f4312353727e37ea3 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Sat, 13 Jan 2024 11:11:55 +0800 Subject: [PATCH] feat: add more components to the wasm-pack compatible list (#8843) * feat: add datafusion-physical-plan to compatible list Signed-off-by: Ruihang Xia * feat: add datafusion-execution to the list Signed-off-by: Ruihang Xia * feat: add datafusion to the list Signed-off-by: Ruihang Xia * fix: toml format Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- datafusion/core/Cargo.toml | 7 ++++--- datafusion/core/src/datasource/listing/url.rs | 4 ++++ datafusion/execution/src/object_store.rs | 11 ++++++++++- datafusion/physical-plan/Cargo.toml | 15 ++++++++++++--- datafusion/substrait/Cargo.toml | 2 ++ datafusion/wasmtest/Cargo.toml | 5 ++++- datafusion/wasmtest/README.md | 3 +++ 7 files changed, 39 insertions(+), 8 deletions(-) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 9de6a7f7d6a0..c2e8c2b44531 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -37,7 +37,7 @@ path = "src/lib.rs" # Used to enable the avro format avro = ["apache-avro", "num-traits", "datafusion-common/avro"] backtrace = ["datafusion-common/backtrace"] -compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression"] +compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"] crypto_expressions = ["datafusion-physical-expr/crypto_expressions", "datafusion-optimizer/crypto_expressions"] default = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions", "compression", "parquet"] encoding_expressions = ["datafusion-physical-expr/encoding_expressions"] @@ -87,8 +87,8 @@ pin-project-lite = "^0.2.7" rand = { workspace = true } sqlparser = { workspace = true } tempfile = { workspace = true } -tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } -tokio-util = { version = "0.7.4", features = ["io"] } +tokio = { version = "1.28", features = ["macros", "rt", "sync"] } +tokio-util = { version = "0.7.4", features = ["io"], optional = true } url = { workspace = true } uuid = { version = "1.0", features = ["v4"] } xz2 = { version = "0.1", optional = true } @@ -113,6 +113,7 @@ rust_decimal = { version = "1.27.0", features = ["tokio-pg"] } serde_json = { workspace = true } test-utils = { path = "../../test-utils" } thiserror = { workspace = true } +tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } tokio-postgres = "0.7.7" [target.'cfg(not(target_os = "windows"))'.dev-dependencies] nix = { version = "0.27.1", features = ["fs"] } diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs index 766dee7de901..6421edf77972 100644 --- a/datafusion/core/src/datasource/listing/url.rs +++ b/datafusion/core/src/datasource/listing/url.rs @@ -103,12 +103,14 @@ impl ListingTableUrl { let s = s.as_ref(); // This is necessary to handle the case of a path starting with a drive letter + #[cfg(not(target_arch = "wasm32"))] if std::path::Path::new(s).is_absolute() { return Self::parse_path(s); } match Url::parse(s) { Ok(url) => Self::try_new(url, None), + #[cfg(not(target_arch = "wasm32"))] Err(url::ParseError::RelativeUrlWithoutBase) => Self::parse_path(s), Err(e) => Err(DataFusionError::External(Box::new(e))), } @@ -146,6 +148,7 @@ impl ListingTableUrl { } /// Creates a new [`ListingTableUrl`] interpreting `s` as a filesystem path + #[cfg(not(target_arch = "wasm32"))] fn parse_path(s: &str) -> Result { let (path, glob) = match split_glob_expression(s) { Some((prefix, glob)) => { @@ -282,6 +285,7 @@ impl ListingTableUrl { } /// Creates a file URL from a potentially relative filesystem path +#[cfg(not(target_arch = "wasm32"))] fn url_from_filesystem_path(s: &str) -> Option { let path = std::path::Path::new(s); let is_dir = match path.exists() { diff --git a/datafusion/execution/src/object_store.rs b/datafusion/execution/src/object_store.rs index 5a1cdb769098..7626f8bef162 100644 --- a/datafusion/execution/src/object_store.rs +++ b/datafusion/execution/src/object_store.rs @@ -21,6 +21,7 @@ use dashmap::DashMap; use datafusion_common::{exec_err, DataFusionError, Result}; +#[cfg(not(target_arch = "wasm32"))] use object_store::local::LocalFileSystem; use object_store::ObjectStore; use std::sync::Arc; @@ -169,16 +170,24 @@ impl Default for DefaultObjectStoreRegistry { impl DefaultObjectStoreRegistry { /// This will register [`LocalFileSystem`] to handle `file://` paths + #[cfg(not(target_arch = "wasm32"))] pub fn new() -> Self { let object_stores: DashMap> = DashMap::new(); object_stores.insert("file://".to_string(), Arc::new(LocalFileSystem::new())); Self { object_stores } } + + /// Default without any backend registered. + #[cfg(target_arch = "wasm32")] + pub fn new() -> Self { + Self::default() + } } /// /// Stores are registered based on the scheme, host and port of the provided URL -/// with a [`LocalFileSystem::new`] automatically registered for `file://` +/// with a [`LocalFileSystem::new`] automatically registered for `file://` (if the +/// target arch is not `wasm32`). /// /// For example: /// diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 6c761fc9687c..357e036b6f39 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -33,7 +33,9 @@ name = "datafusion_physical_plan" path = "src/lib.rs" [dependencies] -ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } +ahash = { version = "0.8", default-features = false, features = [ + "runtime-rng", +] } arrow = { workspace = true } arrow-array = { workspace = true } arrow-buffer = { workspace = true } @@ -54,10 +56,17 @@ once_cell = "1.18.0" parking_lot = { workspace = true } pin-project-lite = "^0.2.7" rand = { workspace = true } -tokio = { version = "1.28", features = ["sync", "fs", "parking_lot"] } +tokio = { version = "1.28", features = ["sync"] } uuid = { version = "^1.2", features = ["v4"] } [dev-dependencies] rstest = { workspace = true } termtree = "0.4.1" -tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } +tokio = { version = "1.28", features = [ + "macros", + "rt", + "rt-multi-thread", + "sync", + "fs", + "parking_lot", +] } diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index e15b59999afb..160af37ef961 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -36,6 +36,8 @@ object_store = { workspace = true } prost = "0.12" prost-types = "0.12" substrait = "0.22.1" + +[dev-dependencies] tokio = "1.17" [features] diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml index c5f795d0653a..91af15a6ea62 100644 --- a/datafusion/wasmtest/Cargo.toml +++ b/datafusion/wasmtest/Cargo.toml @@ -28,7 +28,7 @@ authors = { workspace = true } rust-version = "1.70" [lib] -crate-type = ["cdylib", "rlib",] +crate-type = ["cdylib", "rlib"] [dependencies] @@ -37,11 +37,14 @@ crate-type = ["cdylib", "rlib",] # all the `std::fmt` and `std::panicking` infrastructure, so isn't great for # code size when deploying. console_error_panic_hook = { version = "0.1.1", optional = true } +datafusion = { path = "../core", default-features = false } datafusion-common = { workspace = true } +datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-optimizer = { workspace = true } datafusion-physical-expr = { workspace = true } +datafusion-physical-plan = { workspace = true } datafusion-sql = { workspace = true } # getrandom must be compiled with js feature diff --git a/datafusion/wasmtest/README.md b/datafusion/wasmtest/README.md index d26369a18ab9..4af0f94db9e9 100644 --- a/datafusion/wasmtest/README.md +++ b/datafusion/wasmtest/README.md @@ -59,10 +59,13 @@ Then open http://localhost:8080/ in a web browser and check the console to see t The following DataFusion crates are verified to work in a wasm-pack environment using the default `wasm32-unknown-unknown` target: +- `datafusion` (datafusion-core) with default-features disabled to remove `bzip2-sys` from `async-compression` - `datafusion-common` with default-features disabled to remove the `parquet` dependency (see below) - `datafusion-expr` +- `datafusion-execution` - `datafusion-optimizer` - `datafusion-physical-expr` +- `datafusion-physical-plan` - `datafusion-sql` The difficulty with getting the remaining DataFusion crates compiled to WASM is that they have non-optional dependencies on the [`parquet`](https://docs.rs/crate/parquet/) crate with its default features enabled. Several of the default parquet crate features require native dependencies that are not compatible with WASM, in particular the `lz4` and `zstd` features. If we can arrange our feature flags to make it possible to depend on parquet with these features disabled, then it should be possible to compile the core `datafusion` crate to WASM as well.