Skip to content

Commit

Permalink
[DAPHNE-daphne-eu#844] HPC Containers for multiple CUDA platforms
Browse files Browse the repository at this point in the history
- docker container that converts seamlessly to singularity
  The docker dev container contains an entrypoint script for ssh access that is not working in the singularity converted container without super user privileges. A separate daphne-dev-hpc container avoids this convenience functionality
- compile cuda for all hardware generations
  • Loading branch information
corepointer committed Oct 15, 2024
1 parent bb9c158 commit 7e1a96f
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 20 deletions.
28 changes: 14 additions & 14 deletions containers/build-containers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ DAPHNE_TARGET=daphne-deps
BASE_IMAGE=ubuntu:${ubuntuVersion}
DAPHNE_TAG=$TIMESTAMP_DATE_${ARCH}
IMAGE_REPO=daphneeu/$DAPHNE_TARGET
DAPHNE_BUILD_FLAGS="--hdfs --mpi"
#bulid deps stage
build_daphne -deps

Expand All @@ -106,7 +107,6 @@ BASE_IMAGE=ubuntu:${ubuntuVersion}
DAPHNE_TAG=${TIMESTAMP_DATE}_${ARCH}_BASE_ubuntu${ubuntuVersion}
IMAGE_REPO=daphneeu/$DAPHNE_TARGET
build_daphne -dev

$USE_SUDO docker tag $IMAGE_REPO:$DAPHNE_TAG daphneeu/daphne-dev:latest_${ARCH}_BASE

#------------------------------------------------------------------------------
Expand All @@ -118,19 +118,8 @@ BASE_IMAGE=nvidia/cuda:$CUDA_TAG
DAPHNE_TAG=${TIMESTAMP_DATE}_${ARCH}_CUDA_${CUDA_TAG}
IMAGE_REPO=daphneeu/$DAPHNE_TARGET
build_daphne -dev

$USE_SUDO docker tag $IMAGE_REPO:$DAPHNE_TAG daphneeu/daphne-dev:latest_${ARCH}_CUDA

#-----------------------------------------------------------------------------
# Images for DAPHNE development (OneAPI)
#------------------------------------------------------------------------------
#DAPHNE_TARGET=daphne-dev
#ONEAPI_TAG=2023.1.0-devel-ubuntu${ubuntuVersion}
#BASE_IMAGE=intel/oneapi:$ONEAPI_TAG
#DAPHNE_TAG=${TIMESTAMP_DATE}_${ONEAPI_TAG}
#IMAGE_REPO=daphneeu/$DAPHNE_TARGET
#build_daphne -dev

#------------------------------------------------------------------------------
# Images for running DAPHNE
#------------------------------------------------------------------------------
Expand All @@ -139,7 +128,7 @@ BASE_IMAGE=daphneeu/daphne-deps
FINAL_BASE_IMAGE=ubuntu:${ubuntuVersion}
DAPHNE_TAG=${TIMESTAMP_DATE}_${ARCH}_BASE_ubuntu${ubuntuVersion}
IMAGE_REPO=daphneeu/$DAPHNE_TARGET
DAPHNE_BUILD_FLAGS="--mpi"
DAPHNE_BUILD_FLAGS="--hdfs --mpi"
build_daphne
$USE_SUDO docker tag $IMAGE_REPO:$DAPHNE_TAG daphneeu/daphne:latest_${ARCH}_BASE

Expand All @@ -152,8 +141,19 @@ DAPHNE_TAG=${TIMESTAMP_DATE}_${ARCH}_CUDA_${CUDA_TAG}
IMAGE_REPO=daphneeu/$DAPHNE_TARGET
BASE_IMAGE=daphneeu/daphne-dev
FINAL_BASE_IMAGE=nvidia/cuda:$CUDA_TAG
DAPHNE_BUILD_FLAGS="--mpi --cuda"
DAPHNE_BUILD_FLAGS="--hdfs --mpi --cuda"
build_daphne
$USE_SUDO docker tag $IMAGE_REPO:$DAPHNE_TAG daphneeu/daphne:latest_${ARCH}_CUDA

#-----------------------------------------------------------------------------
# Images for conversion to singularity for DAPHNE compilation
#------------------------------------------------------------------------------
DAPHNE_TARGET=daphne-dev-hpc
CUDA_TAG=${cudaVersion}-cudnn-devel-ubuntu${ubuntuVersion}
BASE_IMAGE=nvidia/cuda:$CUDA_TAG
DAPHNE_TAG=${TIMESTAMP_DATE}_${ARCH}_CUDA_${CUDA_TAG}
IMAGE_REPO=daphneeu/$DAPHNE_TARGET
build_daphne -dev-hpc
$USE_SUDO docker tag $IMAGE_REPO:$DAPHNE_TAG daphneeu/daphne-dev:latest_${ARCH}_HPC

set +e
3 changes: 2 additions & 1 deletion containers/daphne-deps.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,10 @@ FROM build-cmake AS build
ARG DAPHNE_DIR=/daphne
ARG DAPHNE_REPO=https://github.com/daphne-eu/daphne.git
ARG DAPHNE_BRANCH=main
ARG DAPHNE_BUILD_FLAGS="--mpi --hdfs"
RUN git clone --depth=1 --single-branch --branch=$DAPHNE_BRANCH $DAPHNE_REPO $DAPHNE_DIR
WORKDIR $DAPHNE_DIR
RUN ./build.sh --no-fancy --no-submodule-update --installPrefix /usr/local
RUN PATH=/usr/local/bin:$PATH LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH ./build.sh --no-fancy --no-submodule-update --installPrefix /usr/local $DAPHNE_BUILD_FLAGS
RUN find /usr/local -exec file {} \; | grep -e "not stripped" | cut -d ":" -f 1 | xargs strip --strip-unneeded
RUN rm -rf $DAPHNE_DIR
RUN ldconfig
Expand Down
51 changes: 51 additions & 0 deletions containers/daphne-dev-hpc.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# syntax=docker/dockerfile:1

# Copyright 2023 The DAPHNE Consortium
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# This Dockerfile provides a basic DAPHNE compilation environment with all
# third party dependencies precompiled (use ''./build.sh --no-deps --installPrefix /usr/local'' to compile DAPHNE)

ARG BASE_IMAGE=ubuntu:20.04
#ARG FINAL_BASE_IMAGE=ubuntu:20.04
ARG CMAKE_VERSION=3.29.3
ARG TIMESTAMP=0
ARG TZ=Etc/UTC

FROM ${BASE_IMAGE} AS daphne-dev-hpc
ARG DEBIAN_FRONTEND="noninteractive"
ARG TZ
RUN apt-get -qq -y update && apt-get -y upgrade && apt-get -y --no-install-recommends install \
ca-certificates file git openssh-client unzip wget tar \
libomp-dev libpfm4-dev libssl-dev libxml2-dev uuid-dev zlib1g-dev libgsasl-dev libkrb5-dev \
build-essential clang gfortran lld llvm llvm-18-tools ninja-build openjdk-11-jdk-headless pkg-config python3-numpy python3-pandas \
vim nano rsync sudo iputils-ping virtualenv openssh-server iproute2 git htop gdb lldb lld gpg-agent net-tools \
software-properties-common ca-certificates file unzip wget tar zstd \
ccache python3-pip python3-networkx python3-dev graphviz-dev clang-format \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

COPY --from=daphneeu/daphne-deps /usr/local/bin/ /usr/local/bin/
COPY --from=daphneeu/daphne-deps /usr/local/include/ /usr/local/include/
COPY --from=daphneeu/daphne-deps /usr/local/lib/ /usr/local/lib/
COPY --from=daphneeu/daphne-deps /usr/local/share/ /usr/local/share/
RUN ldconfig
# this is a temporary workaround to make the lit code (from the llvm-*-tools package) available to some pre-Ubuntu24 \
# test cases when run locally in the dev container
RUN ln -s /usr/lib/llvm-18 /usr/lib/llvm-10
RUN ln -fs /usr/share/zoneinfo/$TZ /etc/localtime
#COPY entrypoint-interactive.sh /
#RUN mkdir -p /var/run/sshd
#EXPOSE 22
#ENTRYPOINT [ "/entrypoint-interactive.sh"]
8 changes: 4 additions & 4 deletions containers/publish.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ fi
$USE_SUDO docker push -a daphneeu/github-action

# cuda dev image
$USE_SUDO docker tag daphneeu/daphne-dev:${TIMESTAMP_DATE}_${ARCH}_CUDA_${cudaVersion}-cudnn8-devel-ubuntu${ubuntuVersion} daphneeu/daphne-dev:${VERSION}_${ARCH}_CUDA_${cudaVersion}-cudnn8-devel-ubuntu${ubuntuVersion}
$USE_SUDO docker push daphneeu/daphne-dev:${VERSION}_${ARCH}_CUDA_${cudaVersion}-cudnn8-devel-ubuntu${ubuntuVersion}
$USE_SUDO docker tag daphneeu/daphne-dev:${TIMESTAMP_DATE}_${ARCH}_CUDA_${cudaVersion}-cudnn-devel-ubuntu${ubuntuVersion} daphneeu/daphne-dev:${VERSION}_${ARCH}_CUDA_${cudaVersion}-cudnn-devel-ubuntu${ubuntuVersion}
$USE_SUDO docker push daphneeu/daphne-dev:${VERSION}_${ARCH}_CUDA_${cudaVersion}-cudnn-devel-ubuntu${ubuntuVersion}
$USE_SUDO docker push daphneeu/daphne-dev:latest_${ARCH}_CUDA

# base dev image
Expand All @@ -54,8 +54,8 @@ $USE_SUDO docker push daphneeu/daphne-dev:${VERSION}_${ARCH}_BASE_ubuntu${ubuntu
$USE_SUDO docker push daphneeu/daphne-dev:latest_${ARCH}_BASE

# cuda run image
$USE_SUDO docker tag daphneeu/daphne:${TIMESTAMP_DATE}_${ARCH}_CUDA_${cudaVersion}-cudnn8-runtime-ubuntu${ubuntuVersion} daphneeu/daphne:${VERSION}_${ARCH}_CUDA_${cudaVersion}-cudnn8-runtime-ubuntu${ubuntuVersion}
$USE_SUDO docker push daphneeu/daphne:${VERSION}_${ARCH}_CUDA_${cudaVersion}-cudnn8-runtime-ubuntu${ubuntuVersion}
$USE_SUDO docker tag daphneeu/daphne:${TIMESTAMP_DATE}_${ARCH}_CUDA_${cudaVersion}-cudnn-runtime-ubuntu${ubuntuVersion} daphneeu/daphne:${VERSION}_${ARCH}_CUDA_${cudaVersion}-cudnn-runtime-ubuntu${ubuntuVersion}
$USE_SUDO docker push daphneeu/daphne:${VERSION}_${ARCH}_CUDA_${cudaVersion}-cudnn-runtime-ubuntu${ubuntuVersion}
$USE_SUDO docker push daphneeu/daphne:latest_${ARCH}_CUDA

# base run image
Expand Down
2 changes: 1 addition & 1 deletion software-package-versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ abslVersion=20230802.1
antlrVersion=4.9.2
arrowVersion=13.0.0
catch2Version=2.13.8
cmakeVersion=3.30.3
cmakeVersion=3.30.5
cudaVersion=12.6.1
eigenVersion=3.4.0
grpcVersion=1.38.0
Expand Down
1 change: 1 addition & 0 deletions src/runtime/local/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER)
target_link_libraries(CUDAKernels PUBLIC DataStructures LLVMSupport MLIRDaphne MLIRDaphneTransforms CUDA::cudart CUDA::cublasLt CUDA::cublas
CUDA::cusparse ${CUDA_cudnn_LIBRARY} CUDA::cusolver Util MLIRDaphneInference fmt::fmt)
set_target_properties(CUDAKernels PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
set_property(TARGET CUDAKernels PROPERTY CUDA_ARCHITECTURES all)
endif()

execute_process(
Expand Down

0 comments on commit 7e1a96f

Please sign in to comment.