From 9e6f6213a1da5ed0b1385b70da7ebe1484cb99e6 Mon Sep 17 00:00:00 2001 From: mloubout Date: Wed, 29 Jan 2025 09:04:17 -0500 Subject: [PATCH] docker: update compilers to more recent versions --- .github/workflows/pytest-gpu.yml | 4 ++-- docker/Dockerfile.amd | 29 +++++++++++++++++++---------- docker/Dockerfile.cpu | 10 ++++++++-- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/.github/workflows/pytest-gpu.yml b/.github/workflows/pytest-gpu.yml index 496841d688..a387a9c357 100644 --- a/.github/workflows/pytest-gpu.yml +++ b/.github/workflows/pytest-gpu.yml @@ -57,7 +57,7 @@ jobs: base: "devitocodes/bases:nvidia-nvc" tags: ["self-hosted", "nvidiagpu"] test_drive_cmd: "nvidia-smi" - flags: '--gpus all --rm -t --name testrun-nvc' + flags: '--init --gpus all --rm -t --name testrun-nvc' - name: pytest-gpu-omp-amd test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py" @@ -66,7 +66,7 @@ jobs: test_drive_cmd: "rocm-smi" # Attach the AMD GPU devices `/dev` and add user to video and render (109 on wampa) group # Options from https://rocmdocs.amd.com/en/latest/ROCm_Virtualization_Containers/ROCm-Virtualization-&-Containers.html - flags: "--network=host --device=/dev/kfd --device=/dev/dri --ipc=host --group-add video --group-add $(getent group render | cut -d: -f3) --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --rm -t --name testrun-amd" + flags: "--init --network=host --device=/dev/kfd --device=/dev/dri --ipc=host --group-add video --group-add $(getent group render | cut -d: -f3) --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --rm -t --name testrun-amd" steps: - name: Checkout devito diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd index cb42f8e4b0..006682a03a 100644 --- a/docker/Dockerfile.amd +++ b/docker/Dockerfile.amd @@ -3,12 +3,12 @@ # Based on https://github.com/amd/InfinityHub-CI/tree/main/base-gpu-mpi-rocm-docker ############################################################## -ARG ROCM_VERSION=5.5.1 +ARG ROCM_VERSION=6.3.2 FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete as sdk-base -ARG UCX_BRANCH="v1.13.1" -ARG OMPI_BRANCH="v4.1.4" +ARG UCX_BRANCH="v1.14.1" +ARG OMPI_BRANCH="v5.0.6" # Update and Install basic Linux development tools RUN rm /etc/apt/sources.list.d/* \ @@ -46,7 +46,7 @@ ENV ROCM_HOME=/opt/rocm \ OMPI_HOME=/opt/ompi # Until rocm base has it fixed -RUN ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch +RUN ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch | echo "offload-arch already exis" # Install tmpi RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi @@ -73,10 +73,14 @@ RUN cd /tmp/ \ --without-knem \ --without-xpmem \ --without-cuda \ + --without-java \ + --enable-mt \ --enable-optimizations \ --disable-logging \ --disable-debug \ --disable-examples \ + --disable-assertions \ + --disable-params-check \ && make -j ${nproc} \ && make install @@ -87,17 +91,22 @@ RUN cd /tmp \ && ./autogen.pl \ && mkdir build \ && cd build \ - && ../configure --prefix=$OMPI_HOME --with-ucx=$UCX_HOME \ - CC=amdclang CXX=amdclang++ FC=amdflang F90=amdflang \ - --enable-mca-no-build=btl-uct \ + && ../configure CC=amdclang CXX=amdclang++ FC=amdflang F90=amdflang \ + --prefix=$OMPI_HOME \ + --with-ucx=$UCX_HOME \ + --with-rocm=$ROCM_HOME \ + --enable-mca-no-build=btl-uct \ --without-verbs \ - --with-pmix \ - --enable-mpi \ - --enable-mpi-fortran=yes \ + --enable-mpi1-compatibility \ + --enable-mpi-fortran=no \ --disable-debug \ && make -j ${nproc} \ && make install +# UCX config +ENV UCX_WARN_UNUSED_ENV_VARS=n +ENV UCX_TLS=sm,shm,self,rocm + # Cleanup RUN rm -rf /tmp/ucx && rm -rf /tmp/ompi diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index bed0bbad24..3b2ae714c7 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -18,7 +18,7 @@ RUN apt-get update && \ # Install for basic base not containing it RUN apt-get install -y vim wget git flex libnuma-dev tmux \ numactl hwloc curl \ - autoconf libtool build-essential procps + autoconf libtool build-essential procps software-properties-common # Install tmpi RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi @@ -37,6 +37,12 @@ CMD ["/bin/bash"] ############################################################## FROM base as gcc +# Install gcc 13 for better hardware and software support +RUN add-apt-repository ppa:ubuntu-toolchain-r/test -y && apt update && \ + apt install gcc-13 g++-13 -y && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 100 + ARG OMPI_BRANCH="v4.1.4" # Install OpenMPI RUN mkdir -p /deps && mkdir -p /opt/openmpi && cd /deps && \ @@ -47,7 +53,7 @@ RUN mkdir -p /deps && mkdir -p /opt/openmpi && cd /deps && \ --enable-mca-no-build=btl-uct --enable-mpi1-compatibility && \ make -j ${nproc} && \ make install && \ - rm -rf /deps/openmpi + cd /deps && rm -rf /deps/openmpi # Set OpenMPI path ENV PATH=${PATH}:/opt/openmpi/bin