From 51bd5bbe60a1af15ebd20b1a7632934c5c1dbe9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Milenkovi=C4=87?= Date: Sat, 8 Feb 2025 17:07:13 +0000 Subject: [PATCH] update detafusion to ver.45 --- Cargo.toml | 20 +++++++++---------- ballista-cli/Cargo.toml | 8 ++++---- ballista/client/Cargo.toml | 12 +++++------ ballista/core/Cargo.toml | 2 +- .../src/execution_plans/shuffle_reader.rs | 4 ++-- ballista/executor/Cargo.toml | 4 ++-- ballista/scheduler/Cargo.toml | 4 ++-- ballista/scheduler/src/planner.rs | 4 ++-- ballista/scheduler/src/state/task_manager.rs | 6 +++--- benchmarks/Cargo.toml | 6 +++--- benchmarks/src/bin/tpch.rs | 2 +- examples/Cargo.toml | 10 +++++----- 12 files changed, 41 insertions(+), 41 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2a3e66323..c02f4bf68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,15 +21,15 @@ members = ["ballista-cli", "ballista/client", "ballista/core", "ballista/executo resolver = "2" [workspace.dependencies] -arrow = { version = "53", features = ["ipc_compression"] } -arrow-flight = { version = "53", features = ["flight-sql-experimental"] } +arrow = { version = "54", features = ["ipc_compression"] } +arrow-flight = { version = "54", features = ["flight-sql-experimental"] } clap = { version = "4.5", features = ["derive", "cargo"] } configure_me = { version = "0.4.0" } configure_me_codegen = { version = "0.4.4" } -datafusion = "44.0.0" -datafusion-cli = "44.0.0" -datafusion-proto = "44.0.0" -datafusion-proto-common = "44.0.0" +datafusion = "45.0.0" +datafusion-cli = "45.0.0" +datafusion-proto = "45.0.0" +datafusion-proto-common = "45.0.0" object_store = "0.11" prost = "0.13" prost-types = "0.13" @@ -45,15 +45,15 @@ ctor = { version = "0.2" } mimalloc = { version = "0.1" } tokio = { version = "1" } -uuid = { version = "1.10", features = ["v4", "v7"] } -rand = { version = "0.8" } +uuid = { version = "1.13", features = ["v4", "v7"] } +rand = { version = "0.9" } env_logger = { version = "0.11" } futures = { version = "0.3" } log = { version = "0.4" } parking_lot = { version = "0.12" } -tempfile = { version = "3" } +tempfile = { version = "3.16" } dashmap = { version = "6.1" } -async-trait = { version = "0.1.4" } +async-trait = { version = "0.1" } serde = { version = "1.0" } tokio-stream = { version = "0.1" } url = { version = "2.5" } diff --git a/ballista-cli/Cargo.toml b/ballista-cli/Cargo.toml index 46587d187..7061e8216 100644 --- a/ballista-cli/Cargo.toml +++ b/ballista-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "ballista-cli" description = "Command Line Client for Ballista distributed query engine." -version = "44.0.0" +version = "45.0.0" authors = ["Apache DataFusion "] edition = "2021" keywords = ["ballista", "cli"] @@ -28,14 +28,14 @@ repository = "https://github.com/apache/arrow-ballista" readme = "README.md" [dependencies] -ballista = { path = "../ballista/client", version = "44.0.0", features = ["standalone"] } +ballista = { path = "../ballista/client", version = "45.0.0", features = ["standalone"] } clap = { workspace = true, features = ["derive", "cargo"] } datafusion = { workspace = true } datafusion-cli = { workspace = true } -dirs = "5.0.1" +dirs = "6.0" env_logger = { workspace = true } mimalloc = { workspace = true } -rustyline = "14.0.0" +rustyline = "15.0.0" tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } [features] diff --git a/ballista/client/Cargo.toml b/ballista/client/Cargo.toml index a509a30f1..106b7b672 100644 --- a/ballista/client/Cargo.toml +++ b/ballista/client/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista" description = "Ballista Distributed Compute" license = "Apache-2.0" -version = "44.0.0" +version = "45.0.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" readme = "README.md" @@ -28,9 +28,9 @@ edition = "2021" [dependencies] async-trait = { workspace = true } -ballista-core = { path = "../core", version = "44.0.0" } -ballista-executor = { path = "../executor", version = "44.0.0", optional = true } -ballista-scheduler = { path = "../scheduler", version = "44.0.0", optional = true } +ballista-core = { path = "../core", version = "45.0.0" } +ballista-executor = { path = "../executor", version = "45.0.0", optional = true } +ballista-scheduler = { path = "../scheduler", version = "45.0.0", optional = true } datafusion = { workspace = true } log = { workspace = true } @@ -38,8 +38,8 @@ tokio = { workspace = true } url = { workspace = true } [dev-dependencies] -ballista-executor = { path = "../executor", version = "44.0.0" } -ballista-scheduler = { path = "../scheduler", version = "44.0.0" } +ballista-executor = { path = "../executor", version = "45.0.0" } +ballista-scheduler = { path = "../scheduler", version = "45.0.0" } ctor = { workspace = true } datafusion-proto = { workspace = true } env_logger = { workspace = true } diff --git a/ballista/core/Cargo.toml b/ballista/core/Cargo.toml index 731b32c0e..4f67525a5 100644 --- a/ballista/core/Cargo.toml +++ b/ballista/core/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-core" description = "Ballista Distributed Compute" license = "Apache-2.0" -version = "44.0.0" +version = "45.0.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" readme = "README.md" diff --git a/ballista/core/src/execution_plans/shuffle_reader.rs b/ballista/core/src/execution_plans/shuffle_reader.rs index 7a20f1215..a9521dc88 100644 --- a/ballista/core/src/execution_plans/shuffle_reader.rs +++ b/ballista/core/src/execution_plans/shuffle_reader.rs @@ -50,7 +50,7 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use itertools::Itertools; use log::{error, info}; use rand::prelude::SliceRandom; -use rand::thread_rng; +use rand::rng; use tokio::sync::{mpsc, Semaphore}; use tokio_stream::wrappers::ReceiverStream; @@ -163,7 +163,7 @@ impl ExecutionPlan for ShuffleReaderExec { .map(|(_, p)| p) .collect(); // Shuffle partitions for evenly send fetching partition requests to avoid hot executors within multiple tasks - partition_locations.shuffle(&mut thread_rng()); + partition_locations.shuffle(&mut rng()); let response_receiver = send_fetch_partitions(partition_locations, max_request_num); diff --git a/ballista/executor/Cargo.toml b/ballista/executor/Cargo.toml index facde01ed..d5bd5efb5 100644 --- a/ballista/executor/Cargo.toml +++ b/ballista/executor/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-executor" description = "Ballista Distributed Compute - Executor" license = "Apache-2.0" -version = "44.0.0" +version = "45.0.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" readme = "README.md" @@ -42,7 +42,7 @@ default = ["build-binary", "mimalloc"] arrow = { workspace = true } arrow-flight = { workspace = true } async-trait = { workspace = true } -ballista-core = { path = "../core", version = "44.0.0" } +ballista-core = { path = "../core", version = "45.0.0" } configure_me = { workspace = true, optional = true } dashmap = { workspace = true } datafusion = { workspace = true } diff --git a/ballista/scheduler/Cargo.toml b/ballista/scheduler/Cargo.toml index 585dab985..ebfcbde38 100644 --- a/ballista/scheduler/Cargo.toml +++ b/ballista/scheduler/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-scheduler" description = "Ballista Distributed Compute - Scheduler" license = "Apache-2.0" -version = "44.0.0" +version = "45.0.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" readme = "README.md" @@ -46,7 +46,7 @@ rest-api = ["graphviz-rust"] arrow-flight = { workspace = true } async-trait = { workspace = true } axum = "0.7.7" -ballista-core = { path = "../core", version = "44.0.0" } +ballista-core = { path = "../core", version = "45.0.0" } base64 = { version = "0.22", optional = true } clap = { workspace = true, optional = true } configure_me = { workspace = true, optional = true } diff --git a/ballista/scheduler/src/planner.rs b/ballista/scheduler/src/planner.rs index a01267091..7d8a19bd0 100644 --- a/ballista/scheduler/src/planner.rs +++ b/ballista/scheduler/src/planner.rs @@ -556,7 +556,7 @@ order by let join = coalesce_batches.children()[0].clone(); let join = downcast_exec!(join, HashJoinExec); - assert!(join.contain_projection()); + assert!(join.contains_projection()); let join_input_1 = join.children()[0].clone(); // skip CoalesceBatches @@ -687,7 +687,7 @@ order by assert_eq!(Some(&Column::new("l_shipmode", 1)), partition_by); assert_eq!(InputOrderMode::Sorted, window.input_order_mode); let sort = downcast_exec!(window.children()[0], SortExec); - match &sort.expr().inner[..] { + match &sort.expr().iter().collect::>()[..] { [expr1, expr2] => { assert_eq!( SortOptions { diff --git a/ballista/scheduler/src/state/task_manager.rs b/ballista/scheduler/src/state/task_manager.rs index 8bad64f62..53a352bd1 100644 --- a/ballista/scheduler/src/state/task_manager.rs +++ b/ballista/scheduler/src/state/task_manager.rs @@ -26,6 +26,7 @@ use ballista_core::error::BallistaError; use ballista_core::error::Result; use ballista_core::extension::SessionConfigHelperExt; use datafusion::prelude::SessionConfig; +use rand::distr::Alphanumeric; use crate::cluster::JobState; use ballista_core::serde::protobuf::{ @@ -39,8 +40,7 @@ use datafusion::physical_plan::ExecutionPlan; use datafusion_proto::logical_plan::AsLogicalPlan; use datafusion_proto::physical_plan::AsExecutionPlan; use log::{debug, error, info, trace, warn}; -use rand::distributions::Alphanumeric; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use std::collections::{HashMap, HashSet}; use std::ops::Deref; use std::sync::Arc; @@ -644,7 +644,7 @@ impl TaskManager /// Generate a new random Job ID pub fn generate_job_id(&self) -> String { - let mut rng = thread_rng(); + let mut rng = rng(); std::iter::repeat(()) .map(|()| rng.sample(Alphanumeric)) .map(char::from) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 4d851cdcb..0c0655486 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "ballista-benchmarks" description = "Ballista Benchmarks" -version = "44.0.0" +version = "45.0.0" edition = "2021" authors = ["Apache DataFusion "] homepage = "https://github.com/apache/arrow-ballista" @@ -32,7 +32,7 @@ default = ["mimalloc"] snmalloc = ["snmalloc-rs"] [dependencies] -ballista = { path = "../ballista/client", version = "44.0.0" } +ballista = { path = "../ballista/client", version = "45.0.0" } datafusion = { workspace = true } datafusion-proto = { workspace = true } env_logger = { workspace = true } @@ -51,4 +51,4 @@ tokio = { version = "^1.0", features = [ ] } [dev-dependencies] -ballista-core = { path = "../ballista/core", version = "44.0.0" } +ballista-core = { path = "../ballista/core", version = "45.0.0" } diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index 72cc848df..1e9f4a37f 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -501,7 +501,7 @@ async fn loadtest_ballista(opt: BallistaLoadtestOpt) -> Result<()> { let query_id = query_list_clone .get( (0..query_list_clone.len()) - .choose(&mut rand::thread_rng()) + .choose(&mut rand::rng()) .unwrap(), ) .unwrap(); diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 1df640fcb..266d01018 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "ballista-examples" description = "Ballista usage examples" -version = "44.0.0" +version = "45.0.0" homepage = "https://github.com/apache/arrow-ballista" repository = "https://github.com/apache/arrow-ballista" authors = ["Apache DataFusion "] @@ -33,10 +33,10 @@ path = "examples/standalone-sql.rs" required-features = ["ballista/standalone"] [dependencies] -ballista = { path = "../ballista/client", version = "44.0.0" } -ballista-core = { path = "../ballista/core", version = "44.0.0" } -ballista-executor = { path = "../ballista/executor", version = "44.0.0", default-features = false } -ballista-scheduler = { path = "../ballista/scheduler", version = "44.0.0", default-features = false } +ballista = { path = "../ballista/client", version = "45.0.0" } +ballista-core = { path = "../ballista/core", version = "45.0.0" } +ballista-executor = { path = "../ballista/executor", version = "45.0.0", default-features = false } +ballista-scheduler = { path = "../ballista/scheduler", version = "45.0.0", default-features = false } datafusion = { workspace = true } env_logger = { workspace = true } log = { workspace = true }