diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml new file mode 100644 index 000000000000..13a3008b74bc --- /dev/null +++ b/.github/actions/setup-builder/action.yaml @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Prepare Rust Builder +description: 'Prepare Rust Build Environment' +inputs: + rust-version: + description: 'version of rust to install (e.g. stable)' + required: true + default: 'stable' +runs: + using: "composite" + steps: + - name: Install Build Dependencies + shell: bash + run: | + apt-get update + apt-get install -y protobuf-compiler + - name: Setup Rust toolchain + shell: bash + run: | + echo "Installing ${{ inputs.rust-version }}" + rustup toolchain install ${{ inputs.rust-version }} + rustup default ${{ inputs.rust-version }} + rustup component add rustfmt diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index cae7864da158..8646f8058ea1 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -54,10 +54,9 @@ jobs: path: /github/home/target key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}- - name: Setup Rust toolchain - run: | - rustup toolchain install ${{ matrix.rust }} - rustup default ${{ matrix.rust }} - rustup component add rustfmt + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ matrix.rust }} - name: Build workspace in debug mode run: | cargo build @@ -117,10 +116,9 @@ jobs: # this key equals the ones on `linux-build-lib` for re-use key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }} - name: Setup Rust toolchain - run: | - rustup toolchain install ${{ matrix.rust }} - rustup default ${{ matrix.rust }} - rustup component add rustfmt + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ matrix.rust }} - name: Run tests run: | export ARROW_TEST_DATA=$(pwd)/testing/data @@ -285,10 +283,9 @@ jobs: echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV python -m pip install pyarrow - name: Setup Rust toolchain - run: | - rustup toolchain install ${{ matrix.rust }} - rustup default ${{ matrix.rust }} - rustup component add rustfmt + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ matrix.rust }} - name: Run tests run: | cd datafusion @@ -343,10 +340,12 @@ jobs: # this key equals the ones on `linux-build-lib` for re-use key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }} - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ matrix.rust }} + - name: Install Clippy run: | - rustup toolchain install ${{ matrix.rust }} - rustup default ${{ matrix.rust }} - rustup component add rustfmt clippy + rustup component add clippy - name: Run clippy run: | cargo clippy --all-targets --workspace -- -D warnings @@ -420,10 +419,9 @@ jobs: # this key equals the ones on `linux-build-lib` for re-use key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }} - name: Setup Rust toolchain - run: | - rustup toolchain install ${{ matrix.rust }} - rustup default ${{ matrix.rust }} - rustup component add rustfmt + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ matrix.rust }} - name: Run tests run: | export ARROW_TEST_DATA=$(pwd)/testing/data @@ -466,9 +464,9 @@ jobs: # this key equals the ones on `linux-build-lib` for re-use key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }} - name: Setup Rust toolchain - run: | - rustup toolchain install ${{ matrix.rust }} - rustup default ${{ matrix.rust }} + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ matrix.rust }} - name: Install cargo-tomlfmt run: | which cargo-tomlfmt || cargo install cargo-tomlfmt diff --git a/ballista-examples/Cargo.toml b/ballista-examples/Cargo.toml index fb956ea2cd9c..afdd4862b459 100644 --- a/ballista-examples/Cargo.toml +++ b/ballista-examples/Cargo.toml @@ -39,6 +39,6 @@ ballista = { path = "../ballista/rust/client", version = "0.6.0" } datafusion = { path = "../datafusion/core" } futures = "0.3" num_cpus = "1.13.0" -prost = "0.9" +prost = "0.10" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } -tonic = "0.6" +tonic = "0.7" diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml index 8e27683414d7..94c6dfdb7cda 100644 --- a/ballista/rust/core/Cargo.toml +++ b/ballista/rust/core/Cargo.toml @@ -34,7 +34,7 @@ simd = ["datafusion/simd"] [dependencies] ahash = { version = "0.7", default-features = false } -arrow-flight = { version = "11.1" } +arrow-flight = { version = "12" } async-trait = "0.1.41" chrono = { version = "0.4", default-features = false } clap = { version = "3", features = ["derive", "cargo"] } @@ -49,12 +49,12 @@ once_cell = "1.9.0" parking_lot = "0.12" parse_arg = "0.1.3" -prost = "0.9" -prost-types = "0.9" +prost = "0.10" +prost-types = "0.10" serde = { version = "1", features = ["derive"] } sqlparser = "0.16" tokio = "1.0" -tonic = "0.6" +tonic = "0.7" uuid = { version = "0.8", features = ["v4"] } walkdir = "2.3.2" @@ -63,4 +63,4 @@ tempfile = "3" [build-dependencies] rustc_version = "0.4.0" -tonic-build = { version = "0.6" } +tonic-build = { version = "0.7" } diff --git a/ballista/rust/executor/Cargo.toml b/ballista/rust/executor/Cargo.toml index 9ee793b7bfac..c0dfe1046b50 100644 --- a/ballista/rust/executor/Cargo.toml +++ b/ballista/rust/executor/Cargo.toml @@ -33,8 +33,8 @@ snmalloc = ["snmalloc-rs"] [dependencies] anyhow = "1" -arrow = { version = "11.1" } -arrow-flight = { version = "11.1" } +arrow = { version = "12" } +arrow-flight = { version = "12" } async-trait = "0.1.41" ballista-core = { path = "../core", version = "0.6.0" } chrono = { version = "0.4", default-features = false } @@ -49,7 +49,7 @@ snmalloc-rs = { version = "0.2", optional = true } tempfile = "3" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] } tokio-stream = { version = "0.1", features = ["net"] } -tonic = "0.6" +tonic = "0.7" uuid = { version = "0.8", features = ["v4"] } [dev-dependencies] diff --git a/ballista/rust/scheduler/Cargo.toml b/ballista/rust/scheduler/Cargo.toml index 25465adf53e5..884573c2dce4 100644 --- a/ballista/rust/scheduler/Cargo.toml +++ b/ballista/rust/scheduler/Cargo.toml @@ -42,7 +42,7 @@ clap = { version = "3", features = ["derive", "cargo"] } configure_me = "0.4.0" datafusion = { path = "../../../datafusion/core", version = "7.0.0" } env_logger = "0.9" -etcd-client = { version = "0.8", optional = true } +etcd-client = { version = "0.9", optional = true } futures = "0.3" http = "0.2" http-body = "0.4" @@ -50,13 +50,13 @@ hyper = "0.14.4" log = "0.4" parking_lot = "0.12" parse_arg = "0.1.3" -prost = "0.9" +prost = "0.10" rand = "0.8" serde = { version = "1", features = ["derive"] } sled_package = { package = "sled", version = "0.34", optional = true } tokio = { version = "1.0", features = ["full"] } tokio-stream = { version = "0.1", features = ["net"], optional = true } -tonic = "0.6" +tonic = "0.7" tower = { version = "0.4" } warp = "0.3" @@ -66,4 +66,4 @@ uuid = { version = "0.8", features = ["v4"] } [build-dependencies] configure_me_codegen = "0.4.1" -tonic-build = { version = "0.6" } +tonic-build = { version = "0.7" } diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 3a9a97083353..e9895deb183e 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -28,7 +28,7 @@ repository = "https://github.com/apache/arrow-datafusion" rust-version = "1.59" [dependencies] -arrow = { version = "11.1" } +arrow = { version = "12" } ballista = { path = "../ballista/rust/client", version = "0.6.0", optional = true } clap = { version = "3", features = ["derive", "cargo"] } datafusion = { path = "../datafusion/core", version = "7.0.0" } diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 8a9c2feb4e26..e64a44f701ee 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -34,11 +34,11 @@ path = "examples/avro_sql.rs" required-features = ["datafusion/avro"] [dev-dependencies] -arrow-flight = { version = "11.1" } +arrow-flight = { version = "12" } async-trait = "0.1.41" datafusion = { path = "../datafusion/core" } futures = "0.3" num_cpus = "1.13.0" -prost = "0.9" +prost = "0.10" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } -tonic = "0.6" +tonic = "0.7" diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 4ac4e978e709..1ba19a056fc7 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -38,10 +38,10 @@ jit = ["cranelift-module"] pyarrow = ["pyo3"] [dependencies] -arrow = { version = "11.1", features = ["prettyprint"] } +arrow = { version = "12", features = ["prettyprint"] } avro-rs = { version = "0.13", features = ["snappy"], optional = true } cranelift-module = { version = "0.82.0", optional = true } ordered-float = "2.10" -parquet = { version = "11.1", features = ["arrow"], optional = true } +parquet = { version = "12", features = ["arrow"], optional = true } pyo3 = { version = "0.16", optional = true } sqlparser = "0.16" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 77d82cb46e41..467fe7b5449e 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -55,7 +55,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions"] [dependencies] ahash = { version = "0.7", default-features = false } -arrow = { version = "11.1", features = ["prettyprint"] } +arrow = { version = "12", features = ["prettyprint"] } async-trait = "0.1.41" avro-rs = { version = "0.13", features = ["snappy"], optional = true } chrono = { version = "0.4", default-features = false } @@ -72,7 +72,7 @@ num-traits = { version = "0.2", optional = true } num_cpus = "1.13.0" ordered-float = "2.10" parking_lot = "0.12" -parquet = { version = "11.1", features = ["arrow"] } +parquet = { version = "12", features = ["arrow"] } paste = "^1.0" pin-project-lite= "^0.2.7" pyo3 = { version = "0.16", optional = true } diff --git a/datafusion/core/fuzz-utils/Cargo.toml b/datafusion/core/fuzz-utils/Cargo.toml index 65e36797a782..9255920a3414 100644 --- a/datafusion/core/fuzz-utils/Cargo.toml +++ b/datafusion/core/fuzz-utils/Cargo.toml @@ -23,6 +23,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -arrow = { version = "11.1", features = ["prettyprint"] } +arrow = { version = "12", features = ["prettyprint"] } env_logger = "0.9.0" rand = "0.8" diff --git a/datafusion/core/src/physical_plan/file_format/json.rs b/datafusion/core/src/physical_plan/file_format/json.rs index ef9d498a0122..5c02a9c92d7a 100644 --- a/datafusion/core/src/physical_plan/file_format/json.rs +++ b/datafusion/core/src/physical_plan/file_format/json.rs @@ -16,6 +16,7 @@ // under the License. //! Execution plan for reading line-delimited JSON files +use arrow::json::reader::DecoderOptions; use async_trait::async_trait; use crate::error::{DataFusionError, Result}; @@ -102,12 +103,19 @@ impl ExecutionPlan for NdJsonExec { // The json reader cannot limit the number of records, so `remaining` is ignored. let fun = move |file, _remaining: &Option| { - Box::new(json::Reader::new( - file, - Arc::clone(&file_schema), - batch_size, - proj.clone(), - )) as BatchIter + // TODO: make DecoderOptions implement Clone so we can + // clone here rather than recreating the options each time + // https://github.com/apache/arrow-rs/issues/1580 + let options = DecoderOptions::new().with_batch_size(batch_size); + + let options = if let Some(proj) = proj.clone() { + options.with_projection(proj) + } else { + options + }; + + Box::new(json::Reader::new(file, Arc::clone(&file_schema), options)) + as BatchIter }; Ok(Box::pin(FileStream::new( diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 33f51e38745d..7459490ef7c7 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -36,6 +36,6 @@ path = "src/lib.rs" [dependencies] ahash = { version = "0.7", default-features = false } -arrow = { version = "11.1", features = ["prettyprint"] } +arrow = { version = "12", features = ["prettyprint"] } datafusion-common = { path = "../common", version = "7.0.0" } sqlparser = "0.16" diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index 052f5d82e54c..1ccfd9418c1e 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -36,7 +36,7 @@ path = "src/lib.rs" jit = [] [dependencies] -arrow = { version = "11.1" } +arrow = { version = "12" } cranelift = "0.82.0" cranelift-jit = "0.82.0" cranelift-module = "0.82.0" diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 6c39a004951e..2ae5fa319840 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -40,7 +40,7 @@ unicode_expressions = ["unicode-segmentation"] [dependencies] ahash = { version = "0.7", default-features = false } -arrow = { version = "11.1", features = ["prettyprint"] } +arrow = { version = "12", features = ["prettyprint"] } blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.0", optional = true } chrono = { version = "0.4", default-features = false } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 5b1cdae72cb2..bafc327125c3 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -36,7 +36,7 @@ path = "src/lib.rs" [dependencies] datafusion = { path = "../core", version = "7.0.0" } -prost = "0.9" +prost = "0.10" [build-dependencies] -tonic-build = { version = "0.6" } +tonic-build = { version = "0.7" }