From b5ce47c95faba2c25908f0197f3be7d7d1c6142f Mon Sep 17 00:00:00 2001 From: skirui-source Date: Thu, 1 Feb 2024 11:09:27 -0800 Subject: [PATCH 1/4] create pr -update dataproc --- source/cloud/gcp/dataproc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/cloud/gcp/dataproc.md b/source/cloud/gcp/dataproc.md index bfe9c571..8dcae3f1 100644 --- a/source/cloud/gcp/dataproc.md +++ b/source/cloud/gcp/dataproc.md @@ -31,7 +31,7 @@ $ gcloud dataproc clusters create $CLUSTER_NAME\ --initialization-actions=gs://$GCS_BUCKET/install_gpu_driver.sh,gs://$GCS_BUCKET/dask.sh,gs://$GCS_BUCKET/rapids.sh\ --initialization-action-timeout 60m\ --optional-components=JUPYTER\ - --metadata gpu-driver-provider=NVIDIA,dask-runtime=$DASK_RUNTIME,rapids-runtime=DASK\ + --metadata gpu-driver-provider=NVIDIA,dask-runtime=$DASK_RUNTIME,rapids-runtime=DASK,rapids-version=$RAPIDS_VERSION,cuda-version=12.0\ --enable-component-gateway ``` From ea2dc35de07583d6959c904094e47b181788dc05 Mon Sep 17 00:00:00 2001 From: skirui-source Date: Wed, 7 Feb 2024 18:38:50 -0800 Subject: [PATCH 2/4] add warning --compatible only wiht rapids v23.12 or earlier --- source/cloud/gcp/dataproc.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/source/cloud/gcp/dataproc.md b/source/cloud/gcp/dataproc.md index 8dcae3f1..bdc7ec10 100644 --- a/source/cloud/gcp/dataproc.md +++ b/source/cloud/gcp/dataproc.md @@ -18,9 +18,18 @@ $ gsutil cp gs://goog-dataproc-initialization-actions-${REGION}/rapids/rapids.sh **1. Create Dataproc cluster with Dask RAPIDS.** Use the gcloud command to create a new cluster. Because of an Anaconda version conflict, script deployment on older images is slow, we recommend using Dask with Dataproc 2.0+. +````{warning} +At the time of writing Dataproc only supports RAPIDS version 23.12 and earlier with CUDA<=11.8 and Ubuntu 18.04. + +Please ensure that your setup complies with this compatibility requirement. Using newer RAPIDS versions may result in unexpected behavior or errors. + + ```console $ CLUSTER_NAME= $ DASK_RUNTIME=yarn +$ RAPIDS_VERSION=23.12 +$ CUDA_VERSION=11.8 + $ gcloud dataproc clusters create $CLUSTER_NAME\ --region $REGION\ --image-version 2.0-ubuntu18\ @@ -31,11 +40,13 @@ $ gcloud dataproc clusters create $CLUSTER_NAME\ --initialization-actions=gs://$GCS_BUCKET/install_gpu_driver.sh,gs://$GCS_BUCKET/dask.sh,gs://$GCS_BUCKET/rapids.sh\ --initialization-action-timeout 60m\ --optional-components=JUPYTER\ - --metadata gpu-driver-provider=NVIDIA,dask-runtime=$DASK_RUNTIME,rapids-runtime=DASK,rapids-version=$RAPIDS_VERSION,cuda-version=12.0\ + --metadata gpu-driver-provider=NVIDIA,dask-runtime=$DASK_RUNTIME,rapids-runtime=DASK,rapids-version=$RAPIDS_VERSION,cuda-version=$CUDA_VERSION\ --enable-component-gateway ``` +```` + [GCS_BUCKET] = name of the bucket to use.\ [CLUSTER_NAME] = name of the cluster.\ [REGION] = name of region where cluster is to be created.\ From 4531917b4d5eebc8ae4cdf0e4d9390ea1f5eaef8 Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:15:33 -0800 Subject: [PATCH 3/4] Update source/cloud/gcp/dataproc.md Co-authored-by: Jacob Tomlinson --- source/cloud/gcp/dataproc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/cloud/gcp/dataproc.md b/source/cloud/gcp/dataproc.md index bdc7ec10..a44bac78 100644 --- a/source/cloud/gcp/dataproc.md +++ b/source/cloud/gcp/dataproc.md @@ -19,7 +19,7 @@ $ gsutil cp gs://goog-dataproc-initialization-actions-${REGION}/rapids/rapids.sh **1. Create Dataproc cluster with Dask RAPIDS.** Use the gcloud command to create a new cluster. Because of an Anaconda version conflict, script deployment on older images is slow, we recommend using Dask with Dataproc 2.0+. ````{warning} -At the time of writing Dataproc only supports RAPIDS version 23.12 and earlier with CUDA<=11.8 and Ubuntu 18.04. +At the time of writing [Dataproc only supports RAPIDS version 23.12 and earlier with CUDA<=11.8 and Ubuntu 18.04](https://github.com/GoogleCloudDataproc/initialization-actions/issues/1137). Please ensure that your setup complies with this compatibility requirement. Using newer RAPIDS versions may result in unexpected behavior or errors. From 3bd213a474b6caf4ce47b44fca17d323d1b72eaf Mon Sep 17 00:00:00 2001 From: skirui-source Date: Thu, 8 Feb 2024 12:19:18 -0800 Subject: [PATCH 4/4] address reviews --- source/cloud/gcp/dataproc.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/cloud/gcp/dataproc.md b/source/cloud/gcp/dataproc.md index a44bac78..eb78cd48 100644 --- a/source/cloud/gcp/dataproc.md +++ b/source/cloud/gcp/dataproc.md @@ -18,11 +18,11 @@ $ gsutil cp gs://goog-dataproc-initialization-actions-${REGION}/rapids/rapids.sh **1. Create Dataproc cluster with Dask RAPIDS.** Use the gcloud command to create a new cluster. Because of an Anaconda version conflict, script deployment on older images is slow, we recommend using Dask with Dataproc 2.0+. -````{warning} +```{warning} At the time of writing [Dataproc only supports RAPIDS version 23.12 and earlier with CUDA<=11.8 and Ubuntu 18.04](https://github.com/GoogleCloudDataproc/initialization-actions/issues/1137). Please ensure that your setup complies with this compatibility requirement. Using newer RAPIDS versions may result in unexpected behavior or errors. - +``` ```console $ CLUSTER_NAME= @@ -45,8 +45,6 @@ $ gcloud dataproc clusters create $CLUSTER_NAME\ ``` -```` - [GCS_BUCKET] = name of the bucket to use.\ [CLUSTER_NAME] = name of the cluster.\ [REGION] = name of region where cluster is to be created.\