From 34e0f1158156bb38ed51cafa36b86ca2cb90a215 Mon Sep 17 00:00:00 2001 From: Madhavan Sridharan Date: Mon, 26 Aug 2024 10:11:46 -0400 Subject: [PATCH] Bump Spark to 3.5.2 --- Dockerfile | 8 ++++---- README.md | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index c1a147d0..8c76e27f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,9 @@ RUN mkdir -p /assets/ && cd /assets && \ curl -OL https://downloads.datastax.com/enterprise/cqlsh-astra.tar.gz && \ tar -xzf ./cqlsh-astra.tar.gz && \ rm ./cqlsh-astra.tar.gz && \ - curl -OL https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz && \ - tar -xzf ./spark-3.5.1-bin-hadoop3-scala2.13.tgz && \ - rm ./spark-3.5.1-bin-hadoop3-scala2.13.tgz + curl -OL https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3-scala2.13.tgz && \ + tar -xzf ./spark-3.5.2-bin-hadoop3-scala2.13.tgz && \ + rm ./spark-3.5.2-bin-hadoop3-scala2.13.tgz RUN apt-get update && apt-get install -y openssh-server vim python3 --no-install-recommends && \ rm -rf /var/lib/apt/lists/* && \ @@ -44,7 +44,7 @@ RUN chmod +x ./get-latest-maven-version.sh && \ rm -rf "$USER_HOME_DIR/.m2" # Add all migration tools to path -ENV PATH="${PATH}:/assets/dsbulk/bin/:/assets/cqlsh-astra/bin/:/assets/spark-3.5.1-bin-hadoop3-scala2.13/bin/" +ENV PATH="${PATH}:/assets/dsbulk/bin/:/assets/cqlsh-astra/bin/:/assets/spark-3.5.2-bin-hadoop3-scala2.13/bin/" EXPOSE 22 diff --git a/README.md b/README.md index 9fe2e1b2..e21af958 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,10 @@ Migrate and Validate Tables between Origin and Target Cassandra Clusters. ### Prerequisite - Install **Java11** (minimum) as Spark binaries are compiled with it. -- Install Spark version [`3.5.1`](https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz) on a single VM (no cluster necessary) where you want to run this job. Spark can be installed by running the following: - +- Install Spark version [`3.5.2`](https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3-scala2.13.tgz) on a single VM (no cluster necessary) where you want to run this job. Spark can be installed by running the following: - ``` -wget https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz -tar -xvzf spark-3.5.1-bin-hadoop3-scala2.13.tgz +wget https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3-scala2.13.tgz +tar -xvzf spark-3.5.2-bin-hadoop3-scala2.13.tgz ``` > :warning: If the above Spark and Scala version is not properly installed, you'll then see a similar exception like below when running the CDM jobs,