rewrite scipy, fix rstudio

ucsd-ets · Dec 19, 2024 · 6b737c7 · 6b737c7
1 parent e5457ef
commit 6b737c7
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 67 deletions.
diff --git a/images/rstudio-notebook/Dockerfile b/images/rstudio-notebook/Dockerfile
@@ -7,17 +7,22 @@ USER root
 
 # Ubuntu 22 setup with v 2024.04.2-764
 ## Follow instructions at https://www.rstudio.com/products/rstudio/download-server/
+## https://posit.co/code-signing/ - find latest pubkey here
 ENV RSTUDIO_PKG=rstudio-server-2024.09.1-394-amd64.deb
 ENV RSTUDIO_URL=https://download2.rstudio.org/server/jammy/amd64/${RSTUDIO_PKG}
+ENV RSTUDIO_PUBKEY=51C0B5BB19F92D60
 
 ## rstudio installation expects R to live in /usr/bin, /bin/, etc.
 RUN ln -s /opt/conda/bin/R /usr/bin/R && \
     apt-get update && \
-    apt-get -qq install -y apt-utils gdebi-core dpkg-sig && \
+    apt-get -qq install -y apt-utils gdebi-core wget gnupg && \
     wget ${RSTUDIO_URL} && \
+    gpg --keyserver keys.openpgp.org --recv-keys ${RSTUDIO_PUBKEY} && \
+    gpg --verify ${RSTUDIO_PKG} && \
     gdebi -n ${RSTUDIO_PKG} && \
     rm -f ${RSTUDIO_PKG} && \
-    echo '/opt/conda/lib/R/lib' > /etc/ld.so.conf.d/r.conf && /sbin/ldconfig -v && \
+    echo '/opt/conda/lib/R/lib' > /etc/ld.so.conf.d/r.conf && \
+    /sbin/ldconfig -v && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     rm -f /usr/bin/R && \
     chmod -R g=u /var/lib/rstudio-server && \

diff --git a/images/scipy-ml-notebook/Dockerfile b/images/scipy-ml-notebook/Dockerfile
@@ -3,23 +3,21 @@ FROM ghcr.io/ucsd-ets/datascience-notebook:${BASE_TAG}
 
 USER root
 
-# tensorflow, pytorch stable versions
-# https://pytorch.org/get-started/previous-versions/
-# https://www.tensorflow.org/install/source#linux
-
-# Python/Mamba deps
-## Package versions
-## tf 2.13 does not work with torch 2.2.1. Both require conflicting versions of typing-extensions
-ARG CUDA_VERSION=12.1 CUDNN_VERSION=8.9.2.26 LIBNVINFER=7.2.2 LIBNVINFER_MAJOR_VERSION=7 \
-  TENSORFLOW_VERSION=2.17.0 KERAS_VERSION=3.5.0 TENSORRT_VERSION=8.6.1 TORCH_VERSION=2.3.1 \
-  PROTOBUF_VERSION=3.20.3 
+# Package versions (adjust as needed)
+ARG CUDA_VERSION=12.1
+ARG CUDNN_VERSION=8.9.7.29
+ARG TENSORFLOW_VERSION=2.17.0
+ARG KERAS_VERSION=3.5.0
+ARG TENSORRT_VERSION=8.6.1
+ARG TORCH_VERSION=2.3.1
+ARG PROTOBUF_VERSION=3.20.3
 
 # apt deps
 RUN apt-get update && \
-  apt-get install -y \
-  libtinfo5 build-essential && \
+  apt-get install -y libtinfo5 build-essential && \
   apt-get clean && rm -rf /var/lib/apt/lists/*
-## Symbolic link for Stata 17 dependency on libncurses5
+
+# Symbolic link for Stata 17 dependency on libncurses5
 RUN ln -s libncurses.so.6 /usr/lib/x86_64-linux-gnu/libncurses.so.5
 
 # Jupyter setup
@@ -37,82 +35,72 @@ ADD manual_tests /opt/manual_tests
 
 RUN chmod 777 /etc/datahub-profile.d/*.sh /tmp/activate.sh
 
-# cudnn (TBD)
-#RUN apt update && apt install -y wget && \
-#    wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/libcudnn8_8.9.6.50-1+cuda11.8_amd64.deb && \
-#    dpkg -i libcudnn8_8.9.6.50-1+cuda11.8_amd64.deb && \
-#    rm libcudnn8_8.9.6.50-1+cuda11.8_amd64.deb && \
-#    apt-get clean && \
-#    rm -rf /var/lib/apt/lists/*
-
+# Switch to non-root user for installing packages via mamba/pip
 USER jovyan
 
 # Install nvdashboard for GPU monitoring
-RUN mamba install -c rapidsai-nightly -c conda-forge jupyterlab-nvdashboard
-
-# CUDA setup w/mamba
-## TODO: Investigate this command, seems to duplicate cuda packages for nvidia (pypi + conda-forge). 
-# cuda-toolkit is a skeleton package on CUDA 12, unlike CUDA <= 11
-RUN mamba install -c "nvidia/label/cuda-12.1.1" cuda-nvcc \
-    cuda-toolkit=$CUDA_VERSION \
-    cuda-version=$CUDA_VERSION \
+RUN mamba install -c rapidsai-nightly -c conda-forge jupyterlab-nvdashboard && \
+    mamba clean -a -y
+
+# Install CUDA toolkit, NCCL, cuDNN via Conda
+RUN mamba install -c "nvidia/label/cuda-12.1.1" \
+    cuda-nvcc \
+    cuda-toolkit=${CUDA_VERSION} \
+    cuda-version=${CUDA_VERSION} \
     nccl \
+    cudnn=${CUDNN_VERSION} \
     -y && \
     fix-permissions $CONDA_DIR && \
     fix-permissions /home/$NB_USER && \
     mamba clean -a -y
 
-# Install scipy pip packages
-## install protobuf to avoid weird base type error. seems like if we don't then it'll be installed twice.
-## https://github.com/spesmilo/electrum/issues/7825
-## pip cache purge didnt work here for some reason.
-RUN pip install --no-cache-dir protobuf==$PROTOBUF_VERSION
-## cuda-python installed to have parity with tensorflow and cudnn
-## Install pillow<7 due to dependency issue https://github.com/pytorch/vision/issues/1712
-## tensorrt installed to fix not having libnvinfer that has caused tensorflow issues.
-RUN pip install opencv-contrib-python-headless \
-    opencv-python && \
-    fix-permissions $CONDA_DIR && \ 
+# Install protobuf via pip to ensure a specific version
+RUN pip install --no-cache-dir protobuf==${PROTOBUF_VERSION}
+
+# Install other Python packages that are simpler via pip
+RUN pip install --no-cache-dir opencv-contrib-python-headless opencv-python && \
+    fix-permissions $CONDA_DIR && \
     fix-permissions /home/$NB_USER && \
     pip cache purge
 
+# Install common packages via conda-forge
 RUN mamba install -c conda-forge pyqt pycocotools pillow scapy && \
     fix-permissions $CONDA_DIR && \
     fix-permissions /home/$NB_USER && \
     mamba clean --all
 
-# Install CUDA/Torch/Tensorflow/Keras w/pip
-# TF Compatibility Matrix: https://www.tensorflow.org/install/source?hl=en#gpu
-## no purge required but no-cache-dir is used. pip purge will actually break the build here!
-## Beware of potentially needing to update these if we update the drivers.
-## Check tensorrt_env_vars.sh if you have to bump tensorrt!
-RUN pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \
-  pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TENSORFLOW_VERSION && \
-  fix-permissions $CONDA_DIR && \
-  fix-permissions /home/$NB_USER && \
-  mamba clean -a -y && \
-  pip cache purge
-
-RUN pip install transformers datasets accelerate huggingface-hub timm && \
-  fix-permissions $CONDA_DIR && \
-  fix-permissions /home/$NB_USER && \
-  mamba clean -a -y && \
-  pip cache purge
+# Install PyTorch and GPU support from Conda
+# Use pytorch & nvidia channels to ensure proper CUDA integration
+RUN mamba install pytorch==${TORCH_VERSION} torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia -y && \
+    fix-permissions $CONDA_DIR && \
+    fix-permissions /home/$NB_USER && \
+    mamba clean -a -y
+
+# Install TensorFlow, Keras, and TF datasets from conda-forge if available
+# Note: Check if these versions are available and GPU-accelerated on conda-forge.
+RUN mamba install tensorflow==${TENSORFLOW_VERSION} keras==${KERAS_VERSION} tensorflow-datasets -c conda-forge -y && \
+    fix-permissions $CONDA_DIR && \
+    fix-permissions /home/$NB_USER && \
+    mamba clean -a -y
+
+# Additional ML packages via pip
+RUN pip install --no-cache-dir transformers datasets accelerate huggingface-hub timm && \
+    fix-permissions $CONDA_DIR && \
+    fix-permissions /home/$NB_USER && \
+    mamba clean -a -y && \
+    pip cache purge
 
 USER $NB_UID:$NB_GID
 ENV PATH=${PATH}:/usr/local/nvidia/bin:/opt/conda/bin
 
 # CUDA fixes for CONDA
-## Copy libdevice file to the required path
 RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \
     cp $CONDA_DIR/nvvm/libdevice/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/
-    #CUDA 11: cp $CONDA_DIR/lib/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/
 
-# TensorRT fix for tensorflow
-## https://github.com/tensorflow/tensorflow/issues/61468 (could not find TensorRT)
-## This will most definitely have to be changed after 8.6.1...
-RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.$TENSORRT_VERSION && \
-    ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.$TENSORRT_VERSION
+# TensorRT fix for TensorFlow (if needed)
+# Adjust paths as necessary, depending on how tensorrt is installed.
+#RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.${TENSORRT_VERSION} && \
+#    ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.${TENSORRT_VERSION}
 
 # Run datahub scripts
-RUN . /tmp/activate.sh
+RUN . /tmp/activate.sh