From 8deff01efa3d4f9758aae74a6b88a436f282c66e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 25 Sep 2023 10:56:42 +0200 Subject: [PATCH 001/207] added sarus doc :hamburger: --- doc/src/container.md | 32 +++++++++++++++++++++++--------- doc/src/installation_user.md | 4 ++-- karabo/version.py | 2 +- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index 73f589e6..99390577 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -10,29 +10,43 @@ docker pull ghcr.io/i4ds/karabo-pipeline:latest Docker images have the advantage that the packages needed for karabo-pipeline are already pre-installed and you can usually run them on other operating systems. In addition, Docker images can easily create singularity containers (see [Singularity Container](#singularity-container)), which are often used in HPC clusters. +## Docker Container + +What is possible with Docker is far too extensive to describe here. We refer to the official [Docker reference](https://docs.docker.com/reference/) for this. We only show here a minimal example of how Docker could be used, so you can use a [Jupyter Notebook](https://jupyter.org/) with sample code and working Karabo environment. + +```shell +docker run -it --rm -p 8888:8888 ghcr.io/i4ds/karabo-pipeline:latest +``` + +This starts the Docker container of the image interactively, where we have port 8888 forwarded here. After that, we start the jupyter service in the container with the following command: + +```shell +jupyter lab --ip 0.0.0.0 --no-browser --port=8888 --allow-root +``` + +This will start the server on the same port we forwarded. Then copy the url which is given at the bottom and replace `hostname` with `localhost` and open it in the browser. + ## Singularity Container Singularity containers are often standard on HPC clusters, which do not require special permissions (unlike Docker). We do not provide ready-made [Singularity containers](https://sylabs.io/). However, they can be easily created from Docker images with the following command (may take a while): ```shell -singularity pull https://ghcr.io/i4ds/karabo-pipeline:latest +singularity pull docker://ghcr.io/i4ds/karabo-pipeline:latest ``` How to use Singularity containers can be seen in the [Singularity documentation](https://docs.sylabs.io/guides/3.1/user-guide/cli.html). -## Docker Container +## Sarus Container -What is possible with Docker is far too extensive to describe here. We refer to the official [Docker reference](https://docs.docker.com/reference/) for this. We only show here a minimal example of how Docker could be used, so you can use a [Jupyter Notebook](https://jupyter.org/) with sample code and working Karabo environment. +On CSCS it is recommended to use [Sarus containers](https://user.cscs.ch/tools/containers/sarus/). Sarus commands are similar to Docker or Singularity. It is recommended to create a sarus image in an interactive SLURM job using `srun --pty bash`. You may have to load Sarus first using `module load sarus`. ```shell -docker run -it --rm -p 8888:8888 ghcr.io/i4ds/karabo-pipeline:latest +sarus pull ghcr.io/i4ds/karabo-pipeline:latest ``` -This starts the Docker container of the image interactively, where we have port 8888 forwarded here. After that, we start the jupyter service in the container with the following command: +Then you can create a container similar to Docker. An overview of the images are given using the `sarus images` command. Afterwards, you can run a container as follows. ```shell -jupyter lab --ip 0.0.0.0 --no-browser --port=8888 --allow-root -``` - -This will start the server on the same port we forwarded. Then copy the url which is given at the bottom and replace `hostname` with `localhost` and open it in the browser. \ No newline at end of file +sarus run ghcr.io/i4ds/karabo-pipeline:latest +``` \ No newline at end of file diff --git a/doc/src/installation_user.md b/doc/src/installation_user.md index f744fbf8..e40e3177 100644 --- a/doc/src/installation_user.md +++ b/doc/src/installation_user.md @@ -23,9 +23,9 @@ conda install -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipel conda clean --all -y ``` -Karabo releases older than `v0.15.0` are deprecated and therefore we don't guarantee a successful installation. +Karabo versions older than `v0.15.0` are deprecated and therefore installation will most likely fail. In addition, we do not support versions of Karabo older than latest-patch when dependency resolving or online resources are outdated. Therefore, we strongly recommend using the latest version of Karabo. If an older version of Karabo is required, we strongly recommend using a [container](container.md), as the environment is fixed in a container. However, outdated online resources may still occur. -## Update to the current Karabo version +## Update to latest Karabo version A Karabo installation can be updated the following way: ``` conda update -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline diff --git a/karabo/version.py b/karabo/version.py index 8261536c..4c5578cd 100644 --- a/karabo/version.py +++ b/karabo/version.py @@ -1 +1 @@ -__version__ = "0.19.4" +__version__ = "0.19.6" From 1982cf94f2d0739be1481706d7e48afa935bf29a Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 25 Sep 2023 16:12:57 +0200 Subject: [PATCH 002/207] enhanced doc :flags: --- doc/src/container.md | 25 ++++++++++++++++++++----- doc/src/installation_user.md | 2 +- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index 99390577..81bb38c5 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -39,14 +39,29 @@ How to use Singularity containers can be seen in the [Singularity documentation] ## Sarus Container -On CSCS it is recommended to use [Sarus containers](https://user.cscs.ch/tools/containers/sarus/). Sarus commands are similar to Docker or Singularity. It is recommended to create a sarus image in an interactive SLURM job using `srun --pty bash`. You may have to load Sarus first using `module load sarus`. +On CSCS it is recommended to use [Sarus containers](https://sarus.readthedocs.io/en/stable/index.html) (see CSCS [Sarus guide](https://user.cscs.ch/tools/containers/sarus/)). Sarus commands are similar to Docker or Singularity. It is recommended to create a sarus image in an interactive SLURM job using `srun --pty bash`. + +**Setup** + +You should load `daint-gpu` or `daint-mc` before loading the `sarus` modulefile: ```shell -sarus pull ghcr.io/i4ds/karabo-pipeline:latest +module load daint-gpu \# or daint-mc +module load sarus ``` -Then you can create a container similar to Docker. An overview of the images are given using the `sarus images` command. Afterwards, you can run a container as follows. +Then you can pull a docker image to a sarus image as follows: ```shell -sarus run ghcr.io/i4ds/karabo-pipeline:latest -``` \ No newline at end of file +sarus pull ghcr.io/i4ds/karabo-pipeline:latest +``` + +**Native MPI support (MPICH-based)** + +In order to access the high-speed Cray Aries interconnect, the container application must be dynamically linked to an MPI implementation that is [ABI-compatible](https://www.mpich.org/abi/) with the compute node's MPI on Piz Daint, CSCS recommends one of the following MPI implementations: + +[MPICH v3.1.4](http://www.mpich.org/static/downloads/3.1.4/mpich-3.1.4.tar.gz) (Feburary 2015) +[MVAPICH2 2.2](http://mvapich.cse.ohio-state.edu/download/mvapich/mv2/mvapich2-2.2.tar.gz) (September 2016) +Intel MPI Library 2017 Update 1 + +How to use: TODO \ No newline at end of file diff --git a/doc/src/installation_user.md b/doc/src/installation_user.md index e40e3177..27b243ea 100644 --- a/doc/src/installation_user.md +++ b/doc/src/installation_user.md @@ -23,7 +23,7 @@ conda install -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipel conda clean --all -y ``` -Karabo versions older than `v0.15.0` are deprecated and therefore installation will most likely fail. In addition, we do not support versions of Karabo older than latest-patch when dependency resolving or online resources are outdated. Therefore, we strongly recommend using the latest version of Karabo. If an older version of Karabo is required, we strongly recommend using a [container](container.md), as the environment is fixed in a container. However, outdated online resources may still occur. +Karabo versions older than `v0.15.0` are deprecated and therefore installation will most likely fail. In addition, we do not support Karabo older than latest-minor version in case dependency resolving or online resources is outdated. Therefore, we strongly recommend using the latest version of Karabo. If an older version of Karabo is required, we strongly recommend using a [container](container.md), as the environment is fixed in a container. However, outdated online resources may still occur. ## Update to latest Karabo version A Karabo installation can be updated the following way: From 49cef1cd27db47a45d2275f6decf04c4771fa367 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 26 Sep 2023 13:11:51 +0200 Subject: [PATCH 003/207] added conda-prefix in dockerfile to have correct set env-var in sarus container :man: --- docker/user/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 0b6d271c..c62f278f 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -9,7 +9,7 @@ ARG KARABO_TAG RUN apt-get update && apt-get install -y libarchive13 wget curl nano RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p /opt/conda -ENV PATH=/opt/conda/bin:$PATH +ENV PATH=/opt/conda/bin:$PATH CONDA_PREFIX=/opt/conda RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda update -y conda && \ From b77cf49b231d867f87d7257bf1a03eb43b57fe30 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 26 Sep 2023 14:13:00 +0200 Subject: [PATCH 004/207] bugfix gpu-testing :ledger: --- karabo/test/conftest.py | 1 + karabo/test/test_source_detection.py | 4 +--- karabo/test/test_utils.py | 26 ++++++++++++++------------ 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/karabo/test/conftest.py b/karabo/test/conftest.py index d2366f21..932387ad 100644 --- a/karabo/test/conftest.py +++ b/karabo/test/conftest.py @@ -14,6 +14,7 @@ NNImageDiffCallable = Callable[[str, str], float] IS_GITHUB_RUNNER = os.environ.get("IS_GITHUB_RUNNER", "false").lower() == "true" +RUN_GPU_TESTS = os.environ.get("RUN_GPU_TESTS", "false").lower() == "true" file_handler_test_dir = os.path.join(os.path.dirname(__file__), "karabo_test") diff --git a/karabo/test/test_source_detection.py b/karabo/test/test_source_detection.py index 655931ba..fdef27bd 100644 --- a/karabo/test/test_source_detection.py +++ b/karabo/test/test_source_detection.py @@ -14,9 +14,7 @@ PyBDSFSourceDetectionResult, SourceDetectionResult, ) -from karabo.test.conftest import NNImageDiffCallable, TFiles - -RUN_GPU_TESTS = os.environ.get("RUN_GPU_TESTS", "false").lower() == "true" +from karabo.test.conftest import RUN_GPU_TESTS, NNImageDiffCallable, TFiles def test_source_detection_plot( diff --git a/karabo/test/test_utils.py b/karabo/test/test_utils.py index 1d1cca35..759ae9cb 100644 --- a/karabo/test/test_utils.py +++ b/karabo/test/test_utils.py @@ -1,33 +1,35 @@ -import os - import pytest +from karabo.test.conftest import RUN_GPU_TESTS from karabo.util.gpu_util import get_gpu_memory, is_cuda_available from karabo.version import __version__ -RUN_GPU_TESTS = os.environ.get("RUN_GPU_TESTS", "false").lower() == "true" +def test_is_cuda_available(): + assert isinstance(is_cuda_available(), bool) -@pytest.mark.skipif(not RUN_GPU_TESTS, reason="GPU tests are disabled") -def test_get_gpu_memory(): - memory = get_gpu_memory() - assert isinstance(memory, int) - assert memory > 0 +CUDA_AVAILABLE = is_cuda_available() -@pytest.mark.skipif(RUN_GPU_TESTS, reason="Does not fail when GPU is available") + +@pytest.mark.skipif(CUDA_AVAILABLE, reason="Doesn't make sense if cuda is available") def test_gpu_memory_error(): with pytest.raises(RuntimeError): get_gpu_memory() -def test_is_cuda_available(): - assert isinstance(is_cuda_available(), bool) +@pytest.mark.skipif( + not CUDA_AVAILABLE, reason="Test doesn't make sense if cuda is not available" +) +def test_get_gpu_memory(): + memory = get_gpu_memory() + assert isinstance(memory, int) + assert memory > 0 @pytest.mark.skipif(not RUN_GPU_TESTS, reason="GPU tests are disabled") def test_is_cuda_available_true(): - assert is_cuda_available() + assert CUDA_AVAILABLE def test_version(): From 7fbced9f8902bdb38479e88c5936e6524255b85d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 26 Sep 2023 15:04:20 +0200 Subject: [PATCH 005/207] skip pinocchio-test because it seems not to work properly on each platform :confounded: --- karabo/test/test_pinocchio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/karabo/test/test_pinocchio.py b/karabo/test/test_pinocchio.py index 5a2b015c..1f737fd8 100644 --- a/karabo/test/test_pinocchio.py +++ b/karabo/test/test_pinocchio.py @@ -2,10 +2,12 @@ from pathlib import Path import numpy as np +import pytest from karabo.simulation.pinocchio import Pinocchio +@pytest.mark.skip(reason="`pinocchio.test.plc.out` not found on CSCS Sarus container") def test_pinocchio_run(): """Validate a simple PINOCCHIO run. From e67532cad49229dc5b0b986bc3024ba79622b8ab Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 2 Oct 2023 09:58:04 +0200 Subject: [PATCH 006/207] integrated user-build testing in workflow :eight: --- .github/workflows/build-user-image.yml | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 97e38260..c036b4a5 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -21,10 +21,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # Aussumes that current repo-tag matches karabo:latest on anaconda.org - name: Get Previous tag @@ -32,7 +35,7 @@ jobs: id: get-latest-tag - name: Log in to the Container registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -40,15 +43,28 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ github.repository }} tags: | type=raw, value=latest type=pep440, pattern={{version}}, value=${{ steps.get-latest-tag.outputs.tag }} + - name: Build and export to Docker + uses: docker/build-push-action@v5 + with: + context: . + push: false + build-args: KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} + load: true + tags: ${{ steps.meta.outputs.tags }} + labels: test + - name: Test container + run: | + docker run --rm ${{ steps.meta.outputs.tags }}:test pytest /opt/conda/lib/python3.9/site-packages/karabo/test + - name: Build and push Docker image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: file: docker/user/Dockerfile context: . From 53254ddda7d2b746acbc9099da1483bdbe1c8320 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 2 Oct 2023 10:26:00 +0200 Subject: [PATCH 007/207] updated dockerfile-dev :santa: --- docker/dev/Dockerfile | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index 5f1144af..b37e9dae 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -1,20 +1,22 @@ -# LEGACY-FILE, has to be checked before usage -# Create build container to not have copied filed in real container afterwards -FROM --platform=amd64 continuumio/miniconda3:4.12.0 as build -COPY environment.yaml environment.yaml -COPY requirements.txt requirements.txt +FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as build +RUN apt-get update && apt-get install -y git +RUN git clone --branch ${KARABO_TAG} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git -FROM --platform=amd64 continuumio/miniconda3:4.12.0 +FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 +RUN apt-get update && apt-get install -y libarchive13 wget curl nano +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda +ENV PATH=/opt/conda/bin:$PATH CONDA_PREFIX=/opt/conda +RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] -RUN apt-get update && apt-get install -y curl && apt-get autoclean && rm -rf /var/lib/apt/lists/* -COPY --from=build environment.yaml environment.yaml -COPY --from=build requirements.txt requirements.txt RUN conda update -y conda && \ - conda clean --all --yes && \ conda install mamba -y -c conda-forge -RUN mamba env update --file environment.yaml -RUN pip install -r requirements.txt -RUN rm environment.yaml requirements.txt -RUN pip install unittest-xml-reporting +COPY --from=build environment.yaml environment.yaml +COPY --from=build requirements.txt requirements.txt +RUN mamba env update --file environment.yaml && \ + pip install -r requirements.txt && \ + pip install jupyterlab ipykernel && \ + python -m ipykernel install --user --name=karabo && \ + rm environment.yaml requirements.txt RUN mkdir /workspace WORKDIR /workspace \ No newline at end of file From a828dfd67ac4cbd476fb5137e2ae3a2f278addea Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 2 Oct 2023 10:29:52 +0200 Subject: [PATCH 008/207] removed test-user-package because testing is now happening before pushing the image :high_heel: --- .github/workflows/test-user-package.yml | 35 ------------------------- 1 file changed, 35 deletions(-) delete mode 100644 .github/workflows/test-user-package.yml diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml deleted file mode 100644 index 103afb95..00000000 --- a/.github/workflows/test-user-package.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Test User Package - -on: - workflow_run: - workflows: ["Conda Build"] - types: - - completed - -jobs: - conda-build: - runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'success' }} - steps: - - name: Install Conda - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - - name: Set variables, Install Package & Dependencies - shell: bash -l {0} - run: | - export IS_GITHUB_RUNNER=true - export RUN_NOTEBOOK_TESTS=false - conda install -y -n base conda-libmamba-solver - conda config --set solver libmamba - conda create -y -n karabo-env python=3.9 - conda activate karabo-env - conda install -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline=${{ env.KARABO_VERSION }} - pip install ipykernel - python -m ipykernel install --user --name python3 - - name: Test Package - shell: bash -l {0} - run: | - conda activate karabo-env - pytest --pyargs karabo.test - From a8916500010c9b0d216110058c487587180b912e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 2 Oct 2023 10:56:05 +0200 Subject: [PATCH 009/207] adapted docker-dev :tropical_drink: --- docker/dev/Dockerfile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index b37e9dae..bc5ecc10 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -1,6 +1,5 @@ -FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as build -RUN apt-get update && apt-get install -y git -RUN git clone --branch ${KARABO_TAG} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git +# This Dockerfile is for developing purpose only +# Make sure to call `docker build` at the root of the repo to have access to the required files FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 RUN apt-get update && apt-get install -y libarchive13 wget curl nano @@ -11,8 +10,7 @@ RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda update -y conda && \ conda install mamba -y -c conda-forge -COPY --from=build environment.yaml environment.yaml -COPY --from=build requirements.txt requirements.txt +COPY environment.yaml requirements.txt ./ RUN mamba env update --file environment.yaml && \ pip install -r requirements.txt && \ pip install jupyterlab ipykernel && \ From 2c943c7cb288608f49891ea5eb823a3609ff00a3 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 2 Oct 2023 11:59:35 +0200 Subject: [PATCH 010/207] Revert "removed test-user-package because testing is now happening before pushing the image :high_heel:" This reverts commit a828dfd67ac4cbd476fb5137e2ae3a2f278addea. --- .github/workflows/test-user-package.yml | 35 +++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/test-user-package.yml diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml new file mode 100644 index 00000000..103afb95 --- /dev/null +++ b/.github/workflows/test-user-package.yml @@ -0,0 +1,35 @@ +name: Test User Package + +on: + workflow_run: + workflows: ["Conda Build"] + types: + - completed + +jobs: + conda-build: + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'success' }} + steps: + - name: Install Conda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + - name: Set variables, Install Package & Dependencies + shell: bash -l {0} + run: | + export IS_GITHUB_RUNNER=true + export RUN_NOTEBOOK_TESTS=false + conda install -y -n base conda-libmamba-solver + conda config --set solver libmamba + conda create -y -n karabo-env python=3.9 + conda activate karabo-env + conda install -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline=${{ env.KARABO_VERSION }} + pip install ipykernel + python -m ipykernel install --user --name python3 + - name: Test Package + shell: bash -l {0} + run: | + conda activate karabo-env + pytest --pyargs karabo.test + From 90d9230128371ff32e9000946db2e0e54cd3f941 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 2 Oct 2023 13:43:48 +0200 Subject: [PATCH 011/207] updated dockerfile-dev :electric_plug: --- .github/workflows/build-dev-image.yml | 49 ++++++++++++++++----------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/.github/workflows/build-dev-image.yml b/.github/workflows/build-dev-image.yml index 58026190..1d1609a8 100644 --- a/.github/workflows/build-dev-image.yml +++ b/.github/workflows/build-dev-image.yml @@ -1,18 +1,19 @@ name: Build Dev Image on: - workflow_call: + workflow_dispatch: inputs: - REGISTRY: + IMAGE_TAG: required: true type: string - IMAGE_NAME: - required: true - type: string - outputs: - imagetag: - description: "Image tag" - value: ${{ jobs.Build_Dev_Image.outputs.tag }} + workflow_run: + workflows: ["Test User Package"] + types: + - completed + +env: + REGISTRY: ghcr.io + IMAGE_NAME: karabo-dev jobs: @@ -21,19 +22,24 @@ jobs: permissions: contents: read packages: write - outputs: - tag: ${{ steps.imagetag.outputs.tag }} steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Get Previous tag + uses: actions-ecosystem/action-get-latest-tag@v1 + id: get-latest-tag - name: Log in to the Container registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: - registry: ${{ inputs.REGISTRY }} + registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} @@ -41,20 +47,23 @@ jobs: id: imagetag shell: bash -l {0} run: | - UUID=$(uuidgen) - IMAGE_TAG=dev-"${UUID:0:8}" + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + IMAGE_TAG=${{ inputs.IMAGE_TAG }} + else + IMAGE_TAG=${{ steps.get-latest-tag.outputs.tag }} + fi echo "tag=$IMAGE_TAG" >> $GITHUB_OUTPUT - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: - images: ${{ inputs.REGISTRY }}/${{ github.repository_owner }}/${{ inputs.IMAGE_NAME }} + images: ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }} tags: | type=raw, value=${{ steps.imagetag.outputs.tag }} - name: Build and push Docker image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: file: docker/dev/Dockerfile context: . From 3265fae68fcdedb74cae9cdadb8cec861e230b68 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 2 Oct 2023 13:57:41 +0200 Subject: [PATCH 012/207] changed dev-workflow for testing :skull: --- .github/workflows/dev-workflow.yml | 80 ++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/.github/workflows/dev-workflow.yml b/.github/workflows/dev-workflow.yml index bae2b17e..59d7929f 100644 --- a/.github/workflows/dev-workflow.yml +++ b/.github/workflows/dev-workflow.yml @@ -1,14 +1,88 @@ # This is a template workflow that exists at the default branch for testing purpose at another branch. # DO NOT CHANGE THIS WORKFLOW when you merge your branch to the default branch! -name: Dev Workflow +# name: Dev Workflow + +# on: +# workflow_dispatch: + +# jobs: +# job-name: +# runs-on: ubuntu-latest +# steps: +# - name: Checkout repository +# uses: actions/checkout@v4 + + +name: Build Dev Image on: workflow_dispatch: + inputs: + IMAGE_TAG: + required: true + type: string + workflow_run: + workflows: ["Test User Package"] + types: + - completed + +env: + REGISTRY: ghcr.io + IMAGE_NAME: karabo-dev + jobs: - job-name: + Build_Dev_Image: runs-on: ubuntu-latest + permissions: + contents: read + packages: write steps: - name: Checkout repository - uses: actions/checkout@v3 \ No newline at end of file + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Get Previous tag + uses: actions-ecosystem/action-get-latest-tag@v1 + id: get-latest-tag + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create image tag + id: imagetag + shell: bash -l {0} + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + IMAGE_TAG=${{ inputs.IMAGE_TAG }} + else + IMAGE_TAG=${{ steps.get-latest-tag.outputs.tag }} + fi + echo "tag=$IMAGE_TAG" >> $GITHUB_OUTPUT + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }} + tags: | + type=raw, value=${{ steps.imagetag.outputs.tag }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + file: docker/dev/Dockerfile + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} From 09b33ff609e1329e74c002cdfe2db990aaee3e41 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 2 Oct 2023 14:13:55 +0200 Subject: [PATCH 013/207] removed workflow-run trigger of dev-image because out of memory :light_rail: --- .github/workflows/build-dev-image.yml | 8 +-- .github/workflows/dev-workflow.yml | 78 +-------------------------- 2 files changed, 6 insertions(+), 80 deletions(-) diff --git a/.github/workflows/build-dev-image.yml b/.github/workflows/build-dev-image.yml index 1d1609a8..2318c52c 100644 --- a/.github/workflows/build-dev-image.yml +++ b/.github/workflows/build-dev-image.yml @@ -6,10 +6,10 @@ on: IMAGE_TAG: required: true type: string - workflow_run: - workflows: ["Test User Package"] - types: - - completed + # workflow_run: + # workflows: ["Test User Package"] + # types: + # - completed env: REGISTRY: ghcr.io diff --git a/.github/workflows/dev-workflow.yml b/.github/workflows/dev-workflow.yml index 59d7929f..bac9e949 100644 --- a/.github/workflows/dev-workflow.yml +++ b/.github/workflows/dev-workflow.yml @@ -1,88 +1,14 @@ # This is a template workflow that exists at the default branch for testing purpose at another branch. # DO NOT CHANGE THIS WORKFLOW when you merge your branch to the default branch! -# name: Dev Workflow - -# on: -# workflow_dispatch: - -# jobs: -# job-name: -# runs-on: ubuntu-latest -# steps: -# - name: Checkout repository -# uses: actions/checkout@v4 - - -name: Build Dev Image +name: Dev Workflow on: workflow_dispatch: - inputs: - IMAGE_TAG: - required: true - type: string - workflow_run: - workflows: ["Test User Package"] - types: - - completed - -env: - REGISTRY: ghcr.io - IMAGE_NAME: karabo-dev - jobs: - Build_Dev_Image: + job-name: runs-on: ubuntu-latest - permissions: - contents: read - packages: write steps: - name: Checkout repository uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Get Previous tag - uses: actions-ecosystem/action-get-latest-tag@v1 - id: get-latest-tag - - - name: Log in to the Container registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Create image tag - id: imagetag - shell: bash -l {0} - run: | - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - IMAGE_TAG=${{ inputs.IMAGE_TAG }} - else - IMAGE_TAG=${{ steps.get-latest-tag.outputs.tag }} - fi - echo "tag=$IMAGE_TAG" >> $GITHUB_OUTPUT - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }} - tags: | - type=raw, value=${{ steps.imagetag.outputs.tag }} - - - name: Build and push Docker image - uses: docker/build-push-action@v5 - with: - file: docker/dev/Dockerfile - context: . - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} From d9e57b2377a48be8c137669613a128d8190c08f5 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 11 Oct 2023 14:29:23 +0200 Subject: [PATCH 014/207] added mpich std-loc in Dockerfile.user :bread: --- docker/user/Dockerfile | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index c62f278f..c983adf1 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -15,6 +15,25 @@ SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda update -y conda && \ conda install mamba -y -c conda-forge RUN mamba install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" +ARG MPICH_VERSION="4.1.2" +ARG MPICH_CONFIGURE_OPTIONS="--enable-fast=all,O3 --prefix=/usr --disable-fortran --with-cuda=/usr/local/cuda" +ARG MPICH_MAKE_OPTIONS="-j4" +# install mpich on standard location (needed for mpi-hook) +RUN mkdir -p /tmp/mpich-build \ + && cd /tmp/mpich-build \ + && wget http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz \ + && tar xvzf mpich-${MPICH_VERSION}.tar.gz \ + && cd mpich-${MPICH_VERSION} \ + && ./configure ${MPICH_CONFIGURE_OPTIONS} \ + && make ${MPICH_MAKE_OPTIONS} \ + && make install \ + && ldconfig \ + && cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ \ + && cd / \ + && rm -rf /tmp/mpich-build +# replace openmpi with mpich +RUN conda remove --force-remove -y openmpi mpi && \ + conda install -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" RUN pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo && \ mkdir /workspace From 34701813415cd3bad172ff2d281f45f8fdf0391f Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 11 Oct 2023 16:55:12 +0200 Subject: [PATCH 015/207] adapted docker-user :ophiuchus: --- docker/user/Dockerfile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 23640021..36f0949e 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -1,23 +1,22 @@ # Create build container to not have copied filed in real container afterwards FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as build ARG KARABO_TAG -ARG IS_DOCKER_CONTAINER=true RUN apt-get update && apt-get install -y git RUN git clone --branch ${KARABO_TAG} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 ARG KARABO_TAG -RUN apt-get update && apt-get install -y libarchive13 wget curl nano +ENV IS_DOCKER_CONTAINER=true +RUN apt-get update && apt-get install -y gcc gfortran libarchive13 wget curl nano RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p /opt/conda ENV PATH=/opt/conda/bin:$PATH CONDA_PREFIX=/opt/conda RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] -RUN conda update -y conda && \ - conda install mamba -y -c conda-forge +RUN conda install mamba -y -c conda-forge RUN mamba install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" ARG MPICH_VERSION="4.1.2" -ARG MPICH_CONFIGURE_OPTIONS="--enable-fast=all,O3 --prefix=/usr --disable-fortran --with-cuda=/usr/local/cuda" +ARG MPICH_CONFIGURE_OPTIONS="--enable-fast=all,O3 --prefix=/usr --with-cuda=/usr/local/cuda" ARG MPICH_MAKE_OPTIONS="-j4" # install mpich on standard location (needed for mpi-hook) RUN mkdir -p /tmp/mpich-build \ From 3b27592c60527dd4f5a847999903452ef0608299 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 12 Oct 2023 11:44:48 +0200 Subject: [PATCH 016/207] minor bugfix dockerfile-user :inbox_tray: --- docker/user/Dockerfile | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 36f0949e..569dca2e 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -14,26 +14,24 @@ ENV PATH=/opt/conda/bin:$PATH CONDA_PREFIX=/opt/conda RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install mamba -y -c conda-forge -RUN mamba install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" ARG MPICH_VERSION="4.1.2" -ARG MPICH_CONFIGURE_OPTIONS="--enable-fast=all,O3 --prefix=/usr --with-cuda=/usr/local/cuda" -ARG MPICH_MAKE_OPTIONS="-j4" # install mpich on standard location (needed for mpi-hook) RUN mkdir -p /tmp/mpich-build \ && cd /tmp/mpich-build \ && wget http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz \ && tar xvzf mpich-${MPICH_VERSION}.tar.gz \ && cd mpich-${MPICH_VERSION} \ - && ./configure ${MPICH_CONFIGURE_OPTIONS} \ - && make ${MPICH_MAKE_OPTIONS} \ + && ./configure --enable-fast=all,O3 --prefix=/usr --with-cuda=/usr/local/cuda \ + && make -j4 \ && make install \ && ldconfig \ && cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ \ && cd / \ && rm -rf /tmp/mpich-build -# replace openmpi with mpich +RUN mamba install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" +# replace openmpi with mpich-dummy (see issue #512) RUN conda remove --force-remove -y openmpi mpi && \ - conda install -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" + mamba install -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" RUN pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo && \ mkdir /workspace From bf266e8daccb1879f63737869e45cd81fed95fb7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 12 Oct 2023 14:41:09 +0200 Subject: [PATCH 017/207] updated environments-files :sparkler: --- conda/meta.yaml | 4 +++- docker/user/Dockerfile | 7 +++---- environment.yaml | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 4fd3fddf..70d40439 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -25,6 +25,7 @@ requirements: - bluebild - cuda-cudart - dask=2022.12.1 + - dask-mpi - distributed - eidos=1.1.0 - healpy @@ -33,6 +34,8 @@ requirements: - katbeam=0.1.0 - libcufft - matplotlib + - montagepy=6.0.0 + - mpi4py - nbformat - nbconvert - numpy=1.22 @@ -49,7 +52,6 @@ requirements: - ska-sdp-func-python=0.1.4 - tools21cm=2.0.2 - xarray - - montagepy=6.0.0 diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 569dca2e..eb1b1bb5 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -5,16 +5,15 @@ RUN apt-get update && apt-get install -y git RUN git clone --branch ${KARABO_TAG} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 -ARG KARABO_TAG +ARG KARABO_TAG MPICH_VERSION="4.1.*" CONDA_PATH="/opt/conda" ENV IS_DOCKER_CONTAINER=true RUN apt-get update && apt-get install -y gcc gfortran libarchive13 wget curl nano RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p /opt/conda -ENV PATH=/opt/conda/bin:$PATH CONDA_PREFIX=/opt/conda + /bin/bash ~/miniconda.sh -b -p ${CONDA_PATH} +ENV PATH=${CONDA_PATH}/bin:$PATH CONDA_PREFIX=${CONDA_PATH} RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install mamba -y -c conda-forge -ARG MPICH_VERSION="4.1.2" # install mpich on standard location (needed for mpi-hook) RUN mkdir -p /tmp/mpich-build \ && cd /tmp/mpich-build \ diff --git a/environment.yaml b/environment.yaml index b7eee39a..af32ab89 100644 --- a/environment.yaml +++ b/environment.yaml @@ -11,7 +11,6 @@ dependencies: - cuda-cudart - dask=2022.12.1 - dask-mpi - - mpi4py - distributed - eidos=1.1.0 - healpy @@ -20,6 +19,8 @@ dependencies: - katbeam=0.1.0 - libcufft - matplotlib + - montagepy=6.0.0 + - mpi4py - nbformat - nbconvert - numpy=1.22 @@ -36,4 +37,3 @@ dependencies: - ska-sdp-func-python=0.1.4 - tools21cm=2.0.2 - xarray - - montagepy=6.0.0 From d6252c2ca0f1a7f52f18120d1cbe7fef787c23cc Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 12 Oct 2023 15:28:41 +0200 Subject: [PATCH 018/207] updated mpi-version in dockerfile :ab: --- docker/user/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index eb1b1bb5..37bd6876 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -5,7 +5,7 @@ RUN apt-get update && apt-get install -y git RUN git clone --branch ${KARABO_TAG} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 -ARG KARABO_TAG MPICH_VERSION="4.1.*" CONDA_PATH="/opt/conda" +ARG KARABO_TAG MPICH_VERSION="4.1.2" CONDA_PATH="/opt/conda" ENV IS_DOCKER_CONTAINER=true RUN apt-get update && apt-get install -y gcc gfortran libarchive13 wget curl nano RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ From aef424a9e6b228a978a336d6f1a7d8e514a3366a Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 13 Oct 2023 13:07:20 +0200 Subject: [PATCH 019/207] updated Dockerfile-user structure :space_invader: --- docker/user/Dockerfile | 18 ++++++++++-------- requirements.txt | 6 +++++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 37bd6876..688b9225 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -5,16 +5,11 @@ RUN apt-get update && apt-get install -y git RUN git clone --branch ${KARABO_TAG} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 -ARG KARABO_TAG MPICH_VERSION="4.1.2" CONDA_PATH="/opt/conda" -ENV IS_DOCKER_CONTAINER=true +# compilers & wget needed for MPI-installation RUN apt-get update && apt-get install -y gcc gfortran libarchive13 wget curl nano -RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p ${CONDA_PATH} -ENV PATH=${CONDA_PATH}/bin:$PATH CONDA_PREFIX=${CONDA_PATH} -RUN conda init -SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] -RUN conda install mamba -y -c conda-forge # install mpich on standard location (needed for mpi-hook) +# starting-layer because compilation can require hours. +ARG MPICH_VERSION="4.1.2" RUN mkdir -p /tmp/mpich-build \ && cd /tmp/mpich-build \ && wget http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz \ @@ -27,6 +22,13 @@ RUN mkdir -p /tmp/mpich-build \ && cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ \ && cd / \ && rm -rf /tmp/mpich-build +ARG KARABO_TAG +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} +RUN conda init +SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] +RUN conda install mamba -y -c conda-forge RUN mamba install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" # replace openmpi with mpich-dummy (see issue #512) RUN conda remove --force-remove -y openmpi mpi && \ diff --git a/requirements.txt b/requirements.txt index 733e79ee..5bf21ae3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ isort==5.12.0 black[jupyter]==23.3.0 pydocstyle==6.3.0 pytest==7.3.1 +pytest-mpi==0.6 pytest-cov==4.1.0 pre-commit==3.2.2 @@ -17,4 +18,7 @@ types-requests # doc tools myst-parser sphinx -sphinx_rtd_theme \ No newline at end of file +sphinx_rtd_theme + +# other +podmena \ No newline at end of file From 962ec48e583eb9ab6b69d23ffc993750e4ae3380 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 18 Oct 2023 08:40:23 +0200 Subject: [PATCH 020/207] added mpi-tests to karabo :seedling: --- karabo/test/test_mpi.py | 90 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 karabo/test/test_mpi.py diff --git a/karabo/test/test_mpi.py b/karabo/test/test_mpi.py new file mode 100644 index 00000000..70c24212 --- /dev/null +++ b/karabo/test/test_mpi.py @@ -0,0 +1,90 @@ +"""MPI tests according to `https://mpi4py.readthedocs.io/en/stable/tutorial.html`.""" +import numpy as np +import pytest +from mpi4py import MPI + + +@pytest.mark.mpi +def test_broadcast_dict(): + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + + if rank == 0: + data = {"key1": [7, 2.72, 2 + 3j], "key2": ("abc", "xyz")} + else: + data = None + data = comm.bcast(data, root=0) + + +@pytest.mark.mpi +def test_scatter_obj(): + comm = MPI.COMM_WORLD + size = comm.Get_size() + rank = comm.Get_rank() + + if rank == 0: + data = [(i + 1) ** 2 for i in range(size)] + else: + data = None + data = comm.scatter(data, root=0) + assert data == (rank + 1) ** 2 + + +@pytest.mark.mpi +def test_gather_obj(): + comm = MPI.COMM_WORLD + size = comm.Get_size() + rank = comm.Get_rank() + + data = (rank + 1) ** 2 + data = comm.gather(data, root=0) + if rank == 0: + for i in range(size): + assert data[i] == (i + 1) ** 2 + else: + assert data is None + + +@pytest.mark.mpi +def test_broadcast_nparrays(): + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + + if rank == 0: + data = np.arange(100, dtype="i") + else: + data = np.empty(100, dtype="i") + comm.Bcast(data, root=0) + for i in range(100): + assert data[i] == i + + +@pytest.mark.mpi +def test_scatter_nparrays(): + comm = MPI.COMM_WORLD + size = comm.Get_size() + rank = comm.Get_rank() + + sendbuf = None + if rank == 0: + sendbuf = np.empty([size, 100], dtype="i") + sendbuf.T[:, :] = range(size) + recvbuf = np.empty(100, dtype="i") + comm.Scatter(sendbuf, recvbuf, root=0) + assert np.allclose(recvbuf, rank) + + +@pytest.mark.mpi +def test_gather_nparrays(): + comm = MPI.COMM_WORLD + size = comm.Get_size() + rank = comm.Get_rank() + + sendbuf = np.zeros(100, dtype="i") + rank + recvbuf = None + if rank == 0: + recvbuf = np.empty([size, 100], dtype="i") + comm.Gather(sendbuf, recvbuf, root=0) + if rank == 0: + for i in range(size): + assert np.allclose(recvbuf[i, :], i) From 7e0e3b3adf89d801e31fdf6bb96d6c07724af41e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 18 Oct 2023 08:56:40 +0200 Subject: [PATCH 021/207] updated requirements.txt with new versions :musical_score: --- requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5bf21ae3..9f8cff74 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,16 @@ # dev & test dependencies # formatting tools -mypy==1.2.0 +mypy==1.6.1 mypy-extensions==1.0.0 -flake8==6.0.0 +flake8==6.1.0 isort==5.12.0 -black[jupyter]==23.3.0 +black[jupyter]==23.10.0 pydocstyle==6.3.0 -pytest==7.3.1 +pytest==7.4.2 pytest-mpi==0.6 pytest-cov==4.1.0 -pre-commit==3.2.2 +pre-commit==3.5.0 # types for mypy types-requests From dff2ba0ef3da90381b6a35b2f92275bb45a84097 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 18 Oct 2023 09:58:45 +0200 Subject: [PATCH 022/207] integrated mpi-tests into github workflow and codecov-report :ng: --- .github/workflows/test.yml | 4 +++- setup.cfg | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 03d9ad3c..30a0bf73 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -45,7 +45,9 @@ jobs: export RUN_GPU_TESTS=false export RUN_NOTEBOOK_TESTS=true pip install . --no-deps - pytest --cov=./ --cov-report=xml + mpirun -n 2 coverage run --rcfile=setup.cfg -m pytest --cov=./ --only-mpi + coverage combine + pytest --cov=./ --cov-append --cov-report=xml - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v3 env: diff --git a/setup.cfg b/setup.cfg index e6ef73f3..888e2723 100644 --- a/setup.cfg +++ b/setup.cfg @@ -96,6 +96,7 @@ warn_unused_ignores = true [coverage:run] branch = False +parallel = true [coverage:report] ; Regexes for lines to exclude from consideration From cc9cdcaf11eceefa4cf2d1691f28dc37fe30596e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 18 Oct 2023 10:02:27 +0200 Subject: [PATCH 023/207] minor fix using pytest-cov instead of coverage in github workflow :ocean: --- .github/workflows/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 30a0bf73..245c0fd7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -45,8 +45,7 @@ jobs: export RUN_GPU_TESTS=false export RUN_NOTEBOOK_TESTS=true pip install . --no-deps - mpirun -n 2 coverage run --rcfile=setup.cfg -m pytest --cov=./ --only-mpi - coverage combine + mpirun -n 2 pytest --cov=./ --only-mpi pytest --cov=./ --cov-append --cov-report=xml - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v3 From f6dd47108ba57cef7d4d42a9f278102640468ad7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 18 Oct 2023 10:04:28 +0200 Subject: [PATCH 024/207] fixed coverage-files discovery (I think) :atm: --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 245c0fd7..326da44d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,6 +53,6 @@ jobs: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} directory: ./coverage/reports/ env_vars: OS,PYTHON - files: ./coverage1.xml,./coverage2.xml,!./cache + files: ./coverage.xml,!./cache fail_ci_if_error: false name: codecov-karabo From 6fc82bd773e3d9c86da13700062f0ca775fd7c30 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 18 Oct 2023 10:15:50 +0200 Subject: [PATCH 025/207] adapted codecov workflow according to github.com/codecov/codecov-action :game_die: --- .github/workflows/test.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 326da44d..b6ab0ede 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -49,10 +49,8 @@ jobs: pytest --cov=./ --cov-append --cov-report=xml - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v3 - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - directory: ./coverage/reports/ + with: + token: ${{ secrets.CODECOV_TOKEN }} env_vars: OS,PYTHON - files: ./coverage.xml,!./cache fail_ci_if_error: false name: codecov-karabo From 8e069a4fca68e8172a503f4ce50d77f31ba29f34 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 18 Oct 2023 10:56:34 +0200 Subject: [PATCH 026/207] adapted container-doc :poultry_leg: --- doc/src/container.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index 81bb38c5..f11a3a13 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -2,29 +2,29 @@ ## Docker Images -We provide for the karabo-pipeline [Docker images](https://www.docker.com/resources/what-container/#:~:text=A%20Docker%20container%20image%20is,tools%2C%20system%20libraries%20and%20settings.) which are hosted by the [ghcr.io](https://github.com/features/packages) registry. An overview of all available images is [here](https://github.com/i4ds/Karabo-Pipeline/pkgs/container/karabo-pipeline), if a specific version and not simply `latest` is desired. Starting from `karabo@v0.15.0`, all versions should be available. Provided you have docker, the image can be installed as follows: +We provide for the Karabo-pipeline [Docker images](https://www.docker.com/resources/what-container/#:~:text=A%20Docker%20container%20image%20is,tools%2C%20system%20libraries%20and%20settings.) which are hosted by the [ghcr.io](https://github.com/features/packages) registry. An overview of all available images is [here](https://github.com/i4ds/Karabo-Pipeline/pkgs/container/karabo-pipeline), if a specific version and not simply `latest` is desired. Starting from `karabo@v0.15.0`, all versions should be available. Provided you have docker, the image can be pulled as follows: ```shell docker pull ghcr.io/i4ds/karabo-pipeline:latest ``` -Docker images have the advantage that the packages needed for karabo-pipeline are already pre-installed and you can usually run them on other operating systems. In addition, Docker images can easily create singularity containers (see [Singularity Container](#singularity-container)), which are often used in HPC clusters. +Docker images have the advantage that the packages needed for Karabo-pipeline are already pre-installed and you can usually run them on other operating systems. So in case the dependency resolvement of older Karabo installations is not up to date anymore, with Docker images you don't have to worry as the installation process has already been performed. In addition, Docker images can easily transform into other containers like Singularity or Sarus, which are often used in HPC-clusters. -## Docker Container +## Launch a Docker Container -What is possible with Docker is far too extensive to describe here. We refer to the official [Docker reference](https://docs.docker.com/reference/) for this. We only show here a minimal example of how Docker could be used, so you can use a [Jupyter Notebook](https://jupyter.org/) with sample code and working Karabo environment. +What the possibilities using Docker are is far too extensive to describe here. We refer to the official [Docker reference](https://docs.docker.com/reference/) for this. We only show here a minimal example of how Docker could be used, so you can use e.g. a [Jupyter Notebook](https://jupyter.org/) with sample code and an existing Karabo environment. ```shell docker run -it --rm -p 8888:8888 ghcr.io/i4ds/karabo-pipeline:latest ``` -This starts the Docker container of the image interactively, where we have port 8888 forwarded here. After that, we start the jupyter service in the container with the following command: +This starts the Docker container of the image interactively, where we forward port 8888. After that, we start the jupyter service in the container with the following command: ```shell jupyter lab --ip 0.0.0.0 --no-browser --port=8888 --allow-root ``` -This will start the server on the same port we forwarded. Then copy the url which is given at the bottom and replace `hostname` with `localhost` and open it in the browser. +This will start a server on the same port as forwarded. Then copy the url which is given at the bottom and replace `hostname` with `localhost` and open it in a browser. ## Singularity Container @@ -35,11 +35,11 @@ We do not provide ready-made [Singularity containers](https://sylabs.io/). Howev singularity pull docker://ghcr.io/i4ds/karabo-pipeline:latest ``` -How to use Singularity containers can be seen in the [Singularity documentation](https://docs.sylabs.io/guides/3.1/user-guide/cli.html). +How to use Singularity containers (e.g. mount directories or enable gpu-support) can be seen in the [Singularity documentation](https://docs.sylabs.io/guides/3.1/user-guide/cli.html). ## Sarus Container -On CSCS it is recommended to use [Sarus containers](https://sarus.readthedocs.io/en/stable/index.html) (see CSCS [Sarus guide](https://user.cscs.ch/tools/containers/sarus/)). Sarus commands are similar to Docker or Singularity. It is recommended to create a sarus image in an interactive SLURM job using `srun --pty bash`. +On CSCS, it is recommended to use [Sarus containers](https://sarus.readthedocs.io/en/stable/index.html) (see CSCS [Sarus guide](https://user.cscs.ch/tools/containers/sarus/)). Sarus commands are similar to Docker or Singularity. It is recommended to create a sarus image in an interactive SLURM job using `srun --pty bash`. **Setup** @@ -50,7 +50,7 @@ module load daint-gpu \# or daint-mc module load sarus ``` -Then you can pull a docker image to a sarus image as follows: +Then you can pull a Docker image to a sarus image as follows: ```shell sarus pull ghcr.io/i4ds/karabo-pipeline:latest @@ -58,10 +58,10 @@ sarus pull ghcr.io/i4ds/karabo-pipeline:latest **Native MPI support (MPICH-based)** -In order to access the high-speed Cray Aries interconnect, the container application must be dynamically linked to an MPI implementation that is [ABI-compatible](https://www.mpich.org/abi/) with the compute node's MPI on Piz Daint, CSCS recommends one of the following MPI implementations: +Karabo >= `v0.21.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). Our containers provide native MPI by hooking CSCS MPI into the container as follows: -[MPICH v3.1.4](http://www.mpich.org/static/downloads/3.1.4/mpich-3.1.4.tar.gz) (Feburary 2015) -[MVAPICH2 2.2](http://mvapich.cse.ohio-state.edu/download/mvapich/mv2/mvapich2-2.2.tar.gz) (September 2016) -Intel MPI Library 2017 Update 1 +```shell +srun -N16 -n16 -C gpu sarus run --mpi --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline:latest +``` -How to use: TODO \ No newline at end of file +Here, an MPI application with 16 processes is launched with your repository mounted in the container (/workspace is the default working-directory). Make sure that you know how many processes are reasonable to run because it can rapidly sum up to a large number of nodehours. \ No newline at end of file From 10dc96dd4111c681c8af181e1509369056feae2c Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 18 Oct 2023 13:17:46 +0200 Subject: [PATCH 027/207] bugfix Dockerfile mpi-compilation needs python :rice: --- doc/src/container.md | 2 +- docker/user/Dockerfile | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index f11a3a13..7944a4db 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -58,7 +58,7 @@ sarus pull ghcr.io/i4ds/karabo-pipeline:latest **Native MPI support (MPICH-based)** -Karabo >= `v0.21.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). Our containers provide native MPI by hooking CSCS MPI into the container as follows: +Karabo >= `v0.21.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). Our containers provide native MPI by hooking CSCS MPI into the container using the `--mpi` flag as follows: ```shell srun -N16 -n16 -C gpu sarus run --mpi --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline:latest diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 688b9225..f4865e4a 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -5,10 +5,14 @@ RUN apt-get update && apt-get install -y git RUN git clone --branch ${KARABO_TAG} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 -# compilers & wget needed for MPI-installation RUN apt-get update && apt-get install -y gcc gfortran libarchive13 wget curl nano +ARG KARABO_TAG +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} +RUN conda init +SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] # install mpich on standard location (needed for mpi-hook) -# starting-layer because compilation can require hours. ARG MPICH_VERSION="4.1.2" RUN mkdir -p /tmp/mpich-build \ && cd /tmp/mpich-build \ @@ -22,12 +26,6 @@ RUN mkdir -p /tmp/mpich-build \ && cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ \ && cd / \ && rm -rf /tmp/mpich-build -ARG KARABO_TAG -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" -RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} -RUN conda init -SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install mamba -y -c conda-forge RUN mamba install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" # replace openmpi with mpich-dummy (see issue #512) From 4b93af71e5b7239cd3829c0361bfddc1ca393eac Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 18 Oct 2023 16:36:54 +0200 Subject: [PATCH 028/207] minor adjustments in Dockerfile.user :wink: --- docker/user/Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index f4865e4a..ee301839 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -26,11 +26,12 @@ RUN mkdir -p /tmp/mpich-build \ && cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ \ && cd / \ && rm -rf /tmp/mpich-build -RUN conda install mamba -y -c conda-forge -RUN mamba install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" +RUN conda install -n base conda-libmamba-solver && \ + conda config --set solver libmamba +RUN conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" # replace openmpi with mpich-dummy (see issue #512) -RUN conda remove --force-remove -y openmpi mpi && \ - mamba install -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" +# RUN conda remove --force-remove -y openmpi mpi && \ +# conda install -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" RUN pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo && \ mkdir /workspace From 5a147028b332c73bee19c2e97b8b1922d117134d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 19 Oct 2023 16:54:24 +0200 Subject: [PATCH 029/207] added mpi-supported h5py wheel as karabo dependency :fries: --- conda/meta.yaml | 2 +- docker/user/Dockerfile | 1 + environment.yaml | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 70d40439..0f5f6842 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -29,7 +29,7 @@ requirements: - distributed - eidos=1.1.0 - healpy - - h5py + - h5py=*=mpi* - ipython - katbeam=0.1.0 - libcufft diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index ee301839..2038a662 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -13,6 +13,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Li RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] # install mpich on standard location (needed for mpi-hook) +# be sure that it's version is consistent with the latest-builds of the Karabo-Feedstock ARG MPICH_VERSION="4.1.2" RUN mkdir -p /tmp/mpich-build \ && cd /tmp/mpich-build \ diff --git a/environment.yaml b/environment.yaml index af32ab89..6a3a45a5 100644 --- a/environment.yaml +++ b/environment.yaml @@ -14,7 +14,7 @@ dependencies: - distributed - eidos=1.1.0 - healpy - - h5py + - h5py=*=mpi* - ipython - katbeam=0.1.0 - libcufft From ad335690fcef45aa21e436dd4126cb25517ec6bd Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 26 Oct 2023 16:19:00 +0200 Subject: [PATCH 030/207] removed pinocchio from environment.yaml :radio_button: --- environment.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/environment.yaml b/environment.yaml index 6a3a45a5..7c390c9b 100644 --- a/environment.yaml +++ b/environment.yaml @@ -26,7 +26,6 @@ dependencies: - numpy=1.22 - oskarpy=2.8.3 - pandas - - pinocchio=5.0.0 - psutil - rascil=1.0.0 - reproject From 716d5945b23760bdb25015831083a0f01df4478a Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 26 Oct 2023 16:29:41 +0200 Subject: [PATCH 031/207] updated environment.yaml with mpich-deps :u5408: --- environment.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/environment.yaml b/environment.yaml index 7c390c9b..ff12e020 100644 --- a/environment.yaml +++ b/environment.yaml @@ -14,13 +14,14 @@ dependencies: - distributed - eidos=1.1.0 - healpy - - h5py=*=mpi* + - h5py=*=mpi_mpich* - ipython - katbeam=0.1.0 - libcufft - matplotlib - montagepy=6.0.0 - mpi4py + - mpich - nbformat - nbconvert - numpy=1.22 @@ -36,3 +37,5 @@ dependencies: - ska-sdp-func-python=0.1.4 - tools21cm=2.0.2 - xarray + # transversal dependencies which we need to reference to get mpi-wheels + - fftw=*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) From fda66512a4b63082d1a91c0590f1ea2bac9275c8 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 31 Oct 2023 13:49:06 +0100 Subject: [PATCH 032/207] updated conda-build files :suspension_railway: --- conda/conda_build_config.yaml | 2 ++ conda/meta.yaml | 35 ++++++++++++++++++----------------- 2 files changed, 20 insertions(+), 17 deletions(-) create mode 100644 conda/conda_build_config.yaml diff --git a/conda/conda_build_config.yaml b/conda/conda_build_config.yaml new file mode 100644 index 00000000..cb6373a5 --- /dev/null +++ b/conda/conda_build_config.yaml @@ -0,0 +1,2 @@ +python: + - 3.9 diff --git a/conda/meta.yaml b/conda/meta.yaml index 0f5f6842..cf2a5f90 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -12,46 +12,47 @@ build: requirements: build: - - python=3.9 + - python {{ python }} - pip host: - - python=3.9 + - python {{ python }} - pip run: - - python=3.9 - - aratmospy=1.0.0 + - python {{ python }} + - aratmospy =1.0.0 - astropy - bdsf - bluebild - cuda-cudart - - dask=2022.12.1 + - dask >=2022.12.1 - dask-mpi - distributed - eidos=1.1.0 - healpy - - h5py=*=mpi* + - h5py =*=mpi_mpich* - ipython - - katbeam=0.1.0 + - katbeam =0.1.0 - libcufft - matplotlib - - montagepy=6.0.0 + - montagepy =6.0.0 - mpi4py - nbformat - nbconvert - - numpy=1.22 - - oskarpy=2.8.3 + - numpy >=1.21, !=1.24.0 + - oskarpy =2.8.3 - pandas - - pinocchio=5.0.0 - psutil - - rascil=1.0.0 + - rascil =1.0.0 - reproject - requests - - scipy=1.10.1 - - ska-gridder-nifty-cuda=0.3.0 - - ska-sdp-datamodels=0.1.3 - - ska-sdp-func-python=0.1.4 - - tools21cm=2.0.2 + - scipy >=1.10.1 + - ska-gridder-nifty-cuda =0.3.0 + - ska-sdp-datamodels =0.1.3 + - ska-sdp-func-python =0.1.4 + - tools21cm =2.0.2 - xarray + # transversal dependencies which we need to reference to get mpi-wheels + - conda-forge::fftw =*=mpi_mpich* From 2de07470cebbd176afa73869397456183b3ea664 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 31 Oct 2023 16:46:52 +0100 Subject: [PATCH 033/207] replaced np.object0 with according non-deprecated alias :whale: --- karabo/simulation/sky_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karabo/simulation/sky_model.py b/karabo/simulation/sky_model.py index 44cf25ce..47b05503 100644 --- a/karabo/simulation/sky_model.py +++ b/karabo/simulation/sky_model.py @@ -85,7 +85,7 @@ List[str], List[int], List[float], - NDArray[np.object0], + NDArray[np.object_], NDArray[np.int_], NDArray[np.float_], DataArrayCoordinates[xr.DataArray], From bd971ad33d281ebf0e06af653db0e3646c3c4bd6 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 08:03:19 +0100 Subject: [PATCH 034/207] minor bugfix in pyproject.toml :bow: --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index dfb56f3a..11644bd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,4 +4,8 @@ requires = [ "wheel" ] build-backend = "setuptools.build_meta" + [tool.pytest.ini_options] +markers = [ + "mpi: mpi-tests (launch with: 'mpirun -n pytest .')", +] \ No newline at end of file From dca983b18ef871b1feadfd8c357cc97dfced80cb Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 08:08:00 +0100 Subject: [PATCH 035/207] deleted test-pinocchio (wrongly merged) :lollipop: --- karabo/test/test_pinocchio.py | 63 ----------------------------------- 1 file changed, 63 deletions(-) delete mode 100644 karabo/test/test_pinocchio.py diff --git a/karabo/test/test_pinocchio.py b/karabo/test/test_pinocchio.py deleted file mode 100644 index 1f737fd8..00000000 --- a/karabo/test/test_pinocchio.py +++ /dev/null @@ -1,63 +0,0 @@ -import tempfile -from pathlib import Path - -import numpy as np -import pytest - -from karabo.simulation.pinocchio import Pinocchio - - -@pytest.mark.skip(reason="`pinocchio.test.plc.out` not found on CSCS Sarus container") -def test_pinocchio_run(): - """Validate a simple PINOCCHIO run. - - Verify that PINOCCHIO can run successfully, - and check physical outputs against values from - previous successful runs. - """ - with tempfile.TemporaryDirectory() as tmpdir: - p = Pinocchio(working_dir=tmpdir) - - # Store output data for this redshift, in addition to z = 0 - p.addRedShift("0.3") - - # Load PINOCCHIO parameters from test parameter file - pwd = Path(__file__).parent - config = p.loadPinocchioConfig(pwd / "pinocchio_params.txt") - p.setConfig(config) - p.printConfig() - p.printRedShiftRequest() - - # Configure run planner, then execute run and save output files - p.runPlanner( - gbPerNode=16, - tasksPerNode=1, - ) - p.run(mpiThreads=2) - p.save(tmpdir) - - # Sanity check: number of halos saved at z = 0 - # This count will always be the same, as long as the random seed stays the same - halo_masses = np.loadtxt( - Path(tmpdir) / "pinocchio.test.plc.out", unpack=True, usecols=(8,) - ) - - assert len(halo_masses) == 45711 # Count found from previous run of PINOCCHIO - - # Physical check: verify that Mass Function is close to analytical fit - (m, nm, fit) = np.loadtxt( - Path(tmpdir) / "pinocchio.0.0000.test.mf.out", - unpack=True, - usecols=(0, 1, 5), - ) - - errors = m * nm - m * fit - # For this test, we use a small PINOCCHIO run, - # in which the mass function diverges for halo masses - # above 5 * 10**14 Msun - errors = errors[m < 5e14] - - # Threshold chosen to be a small error margin, - # in order to verify that PINOCCHIO successfully - # reproduces analytical fit to the halo mass function - assert np.sum(errors**2) < 1e-6 From b2deaca9b7655795f3ef24f12d4f5c780e5abe5d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 08:58:22 +0100 Subject: [PATCH 036/207] updated environment files :racehorse: --- conda/meta.yaml | 14 ++++++++------ environment.yaml | 39 ++++++++++++++++++++------------------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index cf2a5f90..abdb78fc 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -21,24 +21,25 @@ requirements: - python {{ python }} - aratmospy =1.0.0 - astropy - - bdsf + - bdsf =1.10.2 - bluebild - cuda-cudart - dask >=2022.12.1 - dask-mpi - distributed - - eidos=1.1.0 + - eidos =1.1.0 - healpy - - h5py =*=mpi_mpich* + - h5py =*=mpi_mpich* # hdf5 mpich-whl python bindings - ipython - katbeam =0.1.0 - libcufft - matplotlib - montagepy =6.0.0 - - mpi4py + - mpi4py # python bindings for mpi, it's implementation (mpich or openmpi) depends on other wheels + - mpich # explicit mpich as dep for `mpi4py`. mpich & openmpi support linux & mac (not windows) - nbformat - nbconvert - - numpy >=1.21, !=1.24.0 + - numpy >=1.21, !=1.24.0 # 1.24.0 is a buggy release - oskarpy =2.8.3 - pandas - psutil @@ -52,7 +53,8 @@ requirements: - tools21cm =2.0.2 - xarray # transversal dependencies which we need to reference to get mpi-wheels - - conda-forge::fftw =*=mpi_mpich* + - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel prio i4ds > conda-forge + # - casacore =*=mpi_openmpi* # oskarpy(oskar) -> casacore has just nompi & openmpi-wheels diff --git a/environment.yaml b/environment.yaml index ff12e020..5c078814 100644 --- a/environment.yaml +++ b/environment.yaml @@ -3,39 +3,40 @@ channels: - nvidia/label/cuda-11.7.0 - conda-forge dependencies: - - python=3.9 - - aratmospy=1.0.0 + - python =3.9 + - aratmospy =1.0.0 - astropy - - bdsf + - bdsf =1.10.2 - bluebild - cuda-cudart - - dask=2022.12.1 + - dask >=2022.12.1 - dask-mpi - distributed - - eidos=1.1.0 + - eidos =1.1.0 - healpy - - h5py=*=mpi_mpich* + - h5py =*=mpi_mpich* # hdf5 mpich-whl python bindings - ipython - - katbeam=0.1.0 + - katbeam =0.1.0 - libcufft - matplotlib - - montagepy=6.0.0 - - mpi4py - - mpich + - montagepy =6.0.0 + - mpi4py # python bindings for mpi, it's implementation (mpich or openmpi) depends on other wheels + - mpich # explicit mpich as dep for `mpi4py`. mpich & openmpi support linux & mac (not windows) - nbformat - nbconvert - - numpy=1.22 - - oskarpy=2.8.3 + - numpy >=1.21, !=1.24.0 # 1.24.0 is a buggy release + - oskarpy =2.8.3 - pandas - psutil - - rascil=1.0.0 + - rascil =1.0.0 - reproject - requests - - scipy=1.10.1 - - ska-gridder-nifty-cuda=0.3.0 - - ska-sdp-datamodels=0.1.3 - - ska-sdp-func-python=0.1.4 - - tools21cm=2.0.2 + - scipy >=1.10.1 + - ska-gridder-nifty-cuda =0.3.0 + - ska-sdp-datamodels =0.1.3 + - ska-sdp-func-python =0.1.4 + - tools21cm =2.0.2 - xarray # transversal dependencies which we need to reference to get mpi-wheels - - fftw=*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) + - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel prio i4ds > conda-forge + # - casacore =*=mpi_openmpi* # oskarpy(oskar) -> casacore has just nompi & openmpi-wheels From 673442237645eceb23ebb20dcf46ce350a866d9a Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 09:23:40 +0100 Subject: [PATCH 037/207] imporved dev-setup :sound: --- .pre-commit-config.yaml | 2 +- doc/src/development.md | 21 ++++++--------------- pyproject.toml | 22 +++++++++++++++++++++- setup.py | 3 ++- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f2990e8c..8832ebc6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 diff --git a/doc/src/development.md b/doc/src/development.md index 6f42c0c7..f5c44965 100644 --- a/doc/src/development.md +++ b/doc/src/development.md @@ -28,26 +28,18 @@ Then create a local development environment with the provided `environment.yaml` conda env create -n -f environment.yaml ``` -Then install the development dependencies using `requirements.txt`. +Then install karabo as a package and the development dependencies. ```shell conda activate -pip install -r requirements.txt +pip install -e ".[dev]" ``` -NOTE: With these commands, only the dependencies but not the current version of karabo will be installed into a conda environment. To tell Python to treat the reposity as a package, run the following (note that using `conda develop` is not recommended, see [this issue](https://github.com/conda/conda-build/issues/1992)): +Afterwards, activating you dev-tools in your IDE and SHELL is recommended. For the setup of your IDE of choice you have to do it yourself. For the SHELL setup, we recommend to do the following in the repo-root: ```shell -pip install -e . -``` - -(Optional) For your developer experience, the following link might be useful: [Setup Python Interpreter in PyCharm](https://www.jetbrains.com/help/pycharm/conda-support-creating-conda-virtual-environment.html). - -You are done! If everything worked as expected, you can start an interactive Python session and test the import: - -```shell -python ->>> import karabo +pre-commit install +podmena add local ``` ## Formatting @@ -156,10 +148,9 @@ So an md file can reference like ``[some file](path/to/some/file)``. When adding new submodules or modules. You need to update the modules.rst file accordingly and add new files similiar to the karabo.simulation.rst. To enable the automatic generation of the documentation via the python docstrings. There is also the command ```sphinx-apidoc``` from sphinx (our doc engine), that can automate this. -If you want to work this sphinx locally on your machine, for example to use this sphinx-apidoc command. Thus, use the following commands to generate the documentation: +If you want to work this sphinx locally on your machine, for example to use this sphinx-apidoc command. Thus, assuming you've installed the dev-dependencies from pyproject.toml, use the following commands to generate the documentation: ```shell -pip install -r requirements.txt make html ``` diff --git a/pyproject.toml b/pyproject.toml index 11644bd3..9c478408 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,4 +8,24 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] markers = [ "mpi: mpi-tests (launch with: 'mpirun -n pytest .')", -] \ No newline at end of file +] + +[project.optional-dependencies] + dev = [ + 'mypy==1.6.1', + 'mypy-extensions==1.0.0', + 'flake8==6.1.0', + 'isort==5.12.0', + 'black[jupyter]==23.10.0', + 'pydocstyle==6.3.0', + 'pytest==7.4.2', + 'pytest-mpi==0.6', + 'pytest-cov==4.1.0', + 'pre-commit==3.5.0', + 'nest_asyncio', # for notebook test runs + 'types-requests', # types for mypy + 'myst-parser', + 'sphinx', + 'sphinx_rtd_theme', + 'podmena', # commit emojis + ] \ No newline at end of file diff --git a/setup.py b/setup.py index ee229bb2..6d0ee85f 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import os import re -from distutils.core import setup + +from setuptools import setup with open(os.path.join("karabo", "version.py"), mode="r") as file: version_txt = file.readline() From 9172806e96f89452ce62e0b02e1e4d392f4f9c46 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 09:24:54 +0100 Subject: [PATCH 038/207] removed requirements.txt :godmode: --- requirements.txt | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 148cf111..00000000 --- a/requirements.txt +++ /dev/null @@ -1,27 +0,0 @@ -# dev & test dependencies - -# formatting tools -mypy==1.6.1 -mypy-extensions==1.0.0 -flake8==6.1.0 -isort==5.12.0 -black[jupyter]==23.10.0 -pydocstyle==6.3.0 -pytest==7.4.2 -pytest-mpi==0.6 -pytest-cov==4.1.0 -pre-commit==3.5.0 - -# for notebook test runs -nest_asyncio - -# types for mypy -types-requests - -# doc tools -myst-parser -sphinx -sphinx_rtd_theme - -# other -podmena From 2e232ad3cc3f392c3fae6550144075ca60b1b77a Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 09:40:56 +0100 Subject: [PATCH 039/207] added ipykernel to dev-deps :blue_car: --- pyproject.toml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9c478408..ff7fbad9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,20 +12,21 @@ markers = [ [project.optional-dependencies] dev = [ - 'mypy==1.6.1', - 'mypy-extensions==1.0.0', + 'black[jupyter]==23.10.0', 'flake8==6.1.0', + 'ipykernel', 'isort==5.12.0', - 'black[jupyter]==23.10.0', + 'pre-commit==3.5.0', 'pydocstyle==6.3.0', 'pytest==7.4.2', - 'pytest-mpi==0.6', 'pytest-cov==4.1.0', - 'pre-commit==3.5.0', - 'nest_asyncio', # for notebook test runs - 'types-requests', # types for mypy + 'pytest-mpi==0.6', + 'mypy==1.6.1', + 'mypy-extensions==1.0.0', 'myst-parser', + 'nest_asyncio', # for notebook test runs + 'podmena', # commit emojis 'sphinx', 'sphinx_rtd_theme', - 'podmena', # commit emojis + 'types-requests', # types for mypy ] \ No newline at end of file From f3fda217c12c2c2a701fc30abc589fc13c3ce73f Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 10:46:23 +0100 Subject: [PATCH 040/207] bugfix filter-sources for xarray>2023.2 :flashlight: --- karabo/simulation/sky_model.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/karabo/simulation/sky_model.py b/karabo/simulation/sky_model.py index 47b05503..3f854b18 100644 --- a/karabo/simulation/sky_model.py +++ b/karabo/simulation/sky_model.py @@ -604,11 +604,13 @@ def filter_by_radius_euclidean_flat_approximation( distances_sq = np.add(np.square(x), np.square(y)) # Filter sources based on inner and outer radius - filter_mask = (distances_sq >= np.square(inner_radius_deg)) & ( - distances_sq <= np.square(outer_radius_deg) + filter_mask = cast( # distances_sq actually an xr.DataArray because x & y are + xr.DataArray, + (distances_sq >= np.square(inner_radius_deg)) + & (distances_sq <= np.square(outer_radius_deg)), ) - copied_sky.sources = copied_sky.sources[filter_mask] + copied_sky.sources = copied_sky.sources[filter_mask.compute()] copied_sky.sources = self.rechunk_array_based_on_self(copied_sky.sources) From 3936695c5f03fb71d1256968c4816749b94793fd Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 11:29:38 +0100 Subject: [PATCH 041/207] updated build-procedure :church: --- .github/workflows/conda-build.yml | 29 ++++++++++++++++++++++++++++- conda/meta.yaml | 5 +++-- environment.yaml | 2 +- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 0acc197e..3bba4240 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -4,6 +4,16 @@ on: release: types: [published] workflow_dispatch: + inputs: + buildNumber: + type: string + required: false + default: "0" + workflow_call: + inputs: + buildNumber: + type: string + required: true jobs: @@ -13,7 +23,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get Previous tag uses: actions-ecosystem/action-get-latest-tag@v1 id: get-latest-tag @@ -25,6 +35,23 @@ jobs: conda config --append channels i4ds conda config --append channels nvidia/label/cuda-11.7.0 conda config --append channels conda-forge + - name: Export Build Number + id: buildnr + shell: bash -l {0} + run: | + if [[ ${{ github.event_name }} == "release" ]] + then + BUILD_NR="0" + fi + if [[ ${{ github.event_name }} == "workflow_dispatch" ]] + then + BUILD_NR=${{ inputs.buildNumber }} + fi + if [[ ${{ github.event_name }} == "workflow_call" ]] + then + BUILD_NR=${{ inputs.buildNumber }} + fi + echo "BUILD_NUMBER=$BUILD_NR" >> "$GITHUB_ENV" - name: Build Conda shell: bash -l {0} run: | diff --git a/conda/meta.yaml b/conda/meta.yaml index abdb78fc..d949f1b8 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -8,7 +8,8 @@ source: path: ../ build: - string: {{ KARABO_VERSION }} + number: {{ BUILD_NUMBER }} + string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ BUILD_NUMBER }} requirements: build: @@ -51,7 +52,7 @@ requirements: - ska-sdp-datamodels =0.1.3 - ska-sdp-func-python =0.1.4 - tools21cm =2.0.2 - - xarray + - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel prio i4ds > conda-forge # - casacore =*=mpi_openmpi* # oskarpy(oskar) -> casacore has just nompi & openmpi-wheels diff --git a/environment.yaml b/environment.yaml index 5c078814..ccdfd5ae 100644 --- a/environment.yaml +++ b/environment.yaml @@ -36,7 +36,7 @@ dependencies: - ska-sdp-datamodels =0.1.3 - ska-sdp-func-python =0.1.4 - tools21cm =2.0.2 - - xarray + - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel prio i4ds > conda-forge # - casacore =*=mpi_openmpi* # oskarpy(oskar) -> casacore has just nompi & openmpi-wheels From 59c71d5bdef06b1c3dc209a835319b4b2a246e15 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 14:59:52 +0100 Subject: [PATCH 042/207] implemented dynamic versioneering :pensive: --- .gitattributes | 1 + .github/workflows/conda-build.yml | 1 - doc/src/development.md | 4 +- environment.yaml | 33 +- karabo/__init__.py | 18 +- karabo/_version.py | 716 ++++++++++++++++++++++++++++++ karabo/util/__init__.py | 2 +- karabo/util/jupyter.py | 16 - karabo/util/setup_pkg.py | 19 + pyproject.toml | 35 +- setup.cfg | 33 -- setup.py | 16 +- 12 files changed, 804 insertions(+), 90 deletions(-) create mode 100644 .gitattributes create mode 100644 karabo/_version.py create mode 100644 karabo/util/setup_pkg.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..d33ab2a2 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +karabo/_version.py export-subst diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 3bba4240..13fb7b91 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -58,7 +58,6 @@ jobs: cd conda KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} export KARABO_VERSION="${KARABO_TAG:1}" - echo "__version__ = \"${KARABO_VERSION}\"" > karabo/version.py conda mambabuild . - name: Publish to Conda shell: bash -l {0} diff --git a/doc/src/development.md b/doc/src/development.md index f5c44965..81aaf277 100644 --- a/doc/src/development.md +++ b/doc/src/development.md @@ -167,8 +167,8 @@ If you validate your code manually, consider just writing a method in a test cla When everything is merged which should be merged, a new Release can be deployed on `conda-forge` as following: - [Karabo-Pipeline | Releases](https://github.com/i4Ds/Karabo-Pipeline/releases) - Click on `Draft a new release` -- Define a Version by clicking `Choose a tag`. Currently we increment the second number by 1. -- Update version in `karabo/version.py` +- Define a Version by clicking `Choose a tag`. Currently we increment the minor version by 1. +- Set the version in pyproject.toml - Check that the `Target` is set to `main`. - Describe the release (get inspired by the previous releases). - Click `Publish release`. diff --git a/environment.yaml b/environment.yaml index ccdfd5ae..6e1cc96a 100644 --- a/environment.yaml +++ b/environment.yaml @@ -4,39 +4,38 @@ channels: - conda-forge dependencies: - python =3.9 - - aratmospy =1.0.0 + - aratmospy =1.0.dev0 - astropy - - bdsf =1.10.2 + - bdsf =1.10.dev2 - bluebild - cuda-cudart - dask >=2022.12.1 - dask-mpi - distributed - - eidos =1.1.0 + - eidos =1.1.dev0 - healpy - - h5py =*=mpi_mpich* # hdf5 mpich-whl python bindings + - h5py =*=mpi_mpich* - ipython - - katbeam =0.1.0 + - katbeam =0.1.dev0 - libcufft - matplotlib - - montagepy =6.0.0 - - mpi4py # python bindings for mpi, it's implementation (mpich or openmpi) depends on other wheels - - mpich # explicit mpich as dep for `mpi4py`. mpich & openmpi support linux & mac (not windows) + - montagepy =6.0.dev0 + - mpi4py + - mpich - nbformat - nbconvert - - numpy >=1.21, !=1.24.0 # 1.24.0 is a buggy release - - oskarpy =2.8.3 + - numpy >=1.21, !=1.24.0 + - oskarpy =2.8.dev3 - pandas - psutil - - rascil =1.0.0 + - rascil =1.0.dev0 - reproject - requests - scipy >=1.10.1 - - ska-gridder-nifty-cuda =0.3.0 - - ska-sdp-datamodels =0.1.3 - - ska-sdp-func-python =0.1.4 - - tools21cm =2.0.2 + - ska-gridder-nifty-cuda =0.3.dev0 + - ska-sdp-datamodels =0.1.dev3 + - ska-sdp-func-python =0.1.dev4 + - tools21cm =2.0.dev2 - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel prio i4ds > conda-forge - # - casacore =*=mpi_openmpi* # oskarpy(oskar) -> casacore has just nompi & openmpi-wheels + - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) \ No newline at end of file diff --git a/karabo/__init__.py b/karabo/__init__.py index 5a9ffbbb..b4bc8844 100644 --- a/karabo/__init__.py +++ b/karabo/__init__.py @@ -1,11 +1,15 @@ -# set shared library if WSL to detect GPU drivers +"""This file is executed during build-time and when karabo gets imported. +Hence, you ONLY have deps available here which are available during build-time and +in karabo. If you don't know what that means, don't touch anything here. +""" import os import platform import sys -from karabo.version import __version__ +from ._version import get_versions -__version__ = __version__ +__version__ = get_versions()["version"] +del get_versions if "WSL" in platform.release() and ( os.environ.get("LD_LIBRARY_PATH") is None @@ -23,12 +27,14 @@ os.execv(sys.executable, ["python"] + sys.argv) # Setup dask for slurm -from karabo.util.dask import prepare_slurm_nodes_for_dask +if "SLURM_JOB_ID" in os.environ: + # ugly workaraound to not import stuff not available at build-time, but on import. + from karabo.util.dask import prepare_slurm_nodes_for_dask -prepare_slurm_nodes_for_dask() + prepare_slurm_nodes_for_dask() # set rascil data directory environment variable # see https://ska-telescope.gitlab.io/external/rascil/RASCIL_install.html -from karabo.util.jupyter import set_rascil_data_directory_env # noqa: E402 +from karabo.util.setup_pkg import set_rascil_data_directory_env # noqa: E402 set_rascil_data_directory_env() diff --git a/karabo/_version.py b/karabo/_version.py new file mode 100644 index 00000000..3556603e --- /dev/null +++ b/karabo/_version.py @@ -0,0 +1,716 @@ +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer + +"""Git implementation of _version.py.""" + +import errno +import functools +import os +import re +import subprocess +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple + + +def get_keywords() -> Dict[str, str]: + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + + +def get_config() -> VersioneerConfig: + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "None" + cfg.versionfile_source = "karabo/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + + def decorate(f: Callable) -> Callable: + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen( + [command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs, + ) + break + except OSError as e: + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, process.returncode + return stdout, process.returncode + + +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords: Dict[str, str] = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r"\d", r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r"\d", r): + continue + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs( + tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command +) -> Dict[str, Any]: + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + f"{tag_prefix}[[:digit:]]*", + ], + cwd=root, + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces: Dict[str, Any] = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces: Dict[str, Any]) -> str: + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces: Dict[str, Any]) -> str: + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces: Dict[str, Any]) -> str: + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces: Dict[str, Any]) -> str: + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } + + +def get_versions() -> Dict[str, Any]: + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for _ in cfg.versionfile_source.split("/"): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/karabo/util/__init__.py b/karabo/util/__init__.py index 9196f547..18ce9040 100644 --- a/karabo/util/__init__.py +++ b/karabo/util/__init__.py @@ -1,3 +1,3 @@ -from karabo.util.jupyter import set_rascil_data_directory_env +from karabo.util.setup_pkg import set_rascil_data_directory_env set_rascil_data_directory_env() diff --git a/karabo/util/jupyter.py b/karabo/util/jupyter.py index c9515aec..659f2c64 100644 --- a/karabo/util/jupyter.py +++ b/karabo/util/jupyter.py @@ -1,22 +1,6 @@ -import os -from distutils.sysconfig import get_python_lib - from IPython.core.getipython import get_ipython -def set_rascil_data_directory_env() -> None: - """ - Sets specific environment variables - that the jupyter kernel is not loading by default. - - This function is idempotent (running it more than once brings no side effects). - - """ - - data_folder = f"{get_python_lib()}/../../../data" - os.environ["RASCIL_DATA"] = data_folder - - def isNotebook() -> bool: # based on this.: # https://stackoverflow.com/questions/15411967/how-can-i-check-if-code-is-executed-in-the-ipython-notebook diff --git a/karabo/util/setup_pkg.py b/karabo/util/setup_pkg.py new file mode 100644 index 00000000..2a208bf5 --- /dev/null +++ b/karabo/util/setup_pkg.py @@ -0,0 +1,19 @@ +"""This .py file is ONLY for setup-specific util-functions. +Thus, ONLY deps at building-time are allowed here. +If you don't know what that means, don't touch anything here. +""" +import os +from sysconfig import get_path + + +def set_rascil_data_directory_env() -> None: + """ + Sets specific environment variables + that the jupyter kernel is not loading by default. + + This function is idempotent (running it more than once brings no side effects). + + """ + lib_dir = os.path.dirname(os.path.dirname(os.path.dirname(get_path("platlib")))) + data_folder = os.path.join(lib_dir, "data") + os.environ["RASCIL_DATA"] = data_folder diff --git a/pyproject.toml b/pyproject.toml index ff7fbad9..393385d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,41 @@ +[project] +name = "Karabo-Pipeline" +description = "A data-driven pipeline for Radio Astronomy from i4ds for the SKA Telescope." +authors = [ + { name = "Simon Felix", email = "simon.felix@fhnw.ch" }, +] +readme = "README.md" +license = {text = "MIT"} +requires-python = ">=3.9" +dynamic = ["version"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Topic :: Software Development :: Libraries :: Python Modules", +] + [build-system] requires = [ - "setuptools>=42", - "wheel" + "setuptools>=56.0", + "wheel", + "versioneer[toml]", ] build-backend = "setuptools.build_meta" +[tool.setuptools.dynamic] +version = { attr = "karabo.__version__" } + +[tool.versioneer] +VCS = "git" +style = "pep440" +versionfile_source = "karabo/_version.py" +versionfile_build = "karabo/_version.py" +tag_prefix = "v" + [tool.pytest.ini_options] markers = [ "mpi: mpi-tests (launch with: 'mpirun -n pytest .')", diff --git a/setup.cfg b/setup.cfg index 888e2723..a45e367e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,36 +1,3 @@ - -[metadata] -name = Karabo-Pipeline -author = Simon Felix -author_email = simon.felix@fhnw.ch -url = https://github.com/i4Ds/Karabo-Pipeline -description = A data-driven pipeline for Radio Astronomy from i4ds for the SKA Telescope -long_description = file: README.md -long_description_content_type = text/markdown -license = MIT -license_files = LICENSE -platform = any -keywords = {keywords} -classifiers = - Development Status :: 3 - Alpha - Intended Audience :: Developers - License :: OSI Approved :: MIT License - Operating System :: OS Independent - Programming Language :: Python - Programming Language :: Python :: 3.9 - Topic :: Software Development :: Libraries :: Python Modules -project_urls = - Bug Tracker = https://github.com/i4Ds/Karabo-Pipeline/issues - -[options] -zip_safe = false -include_package_data = true -python_requires = >=3.9 -packages = find: -test_suite = tests -setup_requires = - setuptools - [bdist_wheel] universal = true diff --git a/setup.py b/setup.py index 6d0ee85f..51fc6e1c 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,7 @@ -import os -import re +import versioneer +from setuptools import find_packages, setup -from setuptools import setup - -with open(os.path.join("karabo", "version.py"), mode="r") as file: - version_txt = file.readline() - -canonical_pattern = r"([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?" # noqa: E501 -karabo_version = re.search(canonical_pattern, version_txt).group() - -# implicitly takes config from setup.cfg setup( - version=karabo_version, + version=versioneer.get_version(), + packages=find_packages(), ) From b9de446eb5b9bf633e116b0ee49201e0f9cef095 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 15:04:17 +0100 Subject: [PATCH 043/207] removed remaining requirements.txt stuff :handbag: --- .github/workflows/build-docs.yml | 2 +- .github/workflows/test.yml | 3 +-- doc/src/development.md | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index c4553e50..6549f8e5 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -20,7 +20,7 @@ jobs: run: | conda env create -n karabo_dev_env -f environment.yaml conda activate karabo_dev_env - pip install -r requirements.txt + pip install ".[dev]" - name: Build Docs shell: bash -l {0} run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b6ab0ede..6b9ff424 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,8 +26,7 @@ jobs: conda create -y -n test_karabo python=3.9 conda activate test_karabo mamba env update --file environment.yaml - pip install -r requirements.txt - pip install ipykernel + pip install ".[dev]" python -m ipykernel install --user --name python3 - name: Test Dev-Tools shell: bash -l {0} diff --git a/doc/src/development.md b/doc/src/development.md index 81aaf277..68651d78 100644 --- a/doc/src/development.md +++ b/doc/src/development.md @@ -44,7 +44,7 @@ podmena add local ## Formatting -To increase the readability of the code and to better detect potential errors already during development, a number of tools are used. These tools must first be installed in the virtual environment using `pip install -r requirements.txt`. If possible use the versions defined in `requirements.txt`, so that all developers work with the same versions. The configurations of the tools are handled in `setup.cfg`. If changes to the configurations are desired, the team members should agree to this (e.g. via a meeting). +To increase the readability of the code and to better detect potential errors already during development, a number of tools are used. The configurations of the tools are handled in `setup.cfg` or `pyproject.toml`. If changes to the configurations are desired, the team members should agree to this (e.g. via a meeting). It is possible that certain tools complain about something that is not easy or even impossible to fix. ONLY then, there are options to ignore certain lines of code or even whole files for the checker. E.g. `# noqa` ignores inline flake8 complaints. But be careful not to accidentally ignore the whole file (e.g. with `# flake8: noqa`). Please refer to the documentation of the respective tool to learn how to ignore the errors. From d486251c4c909df33b819cac5d6e6516aca4654d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 2 Nov 2023 17:08:05 +0100 Subject: [PATCH 044/207] updated Dockerfile-user mpi-installation to be dependent on karabo-user installation version :bath: --- docker/user/Dockerfile | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 2038a662..a240780f 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -13,9 +13,13 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Li RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] # install mpich on standard location (needed for mpi-hook) -# be sure that it's version is consistent with the latest-builds of the Karabo-Feedstock -ARG MPICH_VERSION="4.1.2" -RUN mkdir -p /tmp/mpich-build \ +RUN conda install -n base conda-libmamba-solver && \ + conda config --set solver libmamba +RUN conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" +# fetch mpich-version to have it consistent with it's installation from karabo +ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' +RUN MPICH_VERSION=$(eval $MPICH_EVAL) \ + mkdir -p /tmp/mpich-build \ && cd /tmp/mpich-build \ && wget http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz \ && tar xvzf mpich-${MPICH_VERSION}.tar.gz \ @@ -27,12 +31,9 @@ RUN mkdir -p /tmp/mpich-build \ && cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ \ && cd / \ && rm -rf /tmp/mpich-build -RUN conda install -n base conda-libmamba-solver && \ - conda config --set solver libmamba -RUN conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" # replace openmpi with mpich-dummy (see issue #512) -# RUN conda remove --force-remove -y openmpi mpi && \ -# conda install -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" +RUN MPICH_VERSION=$(eval $MPICH_EVAL) \ + conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" RUN pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo && \ mkdir /workspace From 0bdb4c99910e24d1121141ff5f102f7adb98d4fa Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 3 Nov 2023 09:47:02 +0100 Subject: [PATCH 045/207] refactored dockerfiles :mouse: --- docker/dev/Dockerfile | 43 ++++++++++++++++++++++++++---------------- docker/user/Dockerfile | 32 +++++++++++++++---------------- 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index bc5ecc10..91d95575 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -1,20 +1,31 @@ -# This Dockerfile is for developing purpose only -# Make sure to call `docker build` at the root of the repo to have access to the required files +# This Dockerfile is designed for CI/CD purpose (not mounting your repo and work with the container) +# If you want to mount your repo, you more or less just need the first stage, then install your own +# deps from the mounted repo, and don't kill the container once you've installed your environment. -FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 -RUN apt-get update && apt-get install -y libarchive13 wget curl nano +# first stage is to have a more or less consistent base-image +FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-base +RUN apt-get update && apt-get install -y git libarchive13 wget curl nano +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p /opt/conda -ENV PATH=/opt/conda/bin:$PATH CONDA_PREFIX=/opt/conda + /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] -RUN conda update -y conda && \ - conda install mamba -y -c conda-forge -COPY environment.yaml requirements.txt ./ -RUN mamba env update --file environment.yaml && \ - pip install -r requirements.txt && \ - pip install jupyterlab ipykernel && \ - python -m ipykernel install --user --name=karabo && \ - rm environment.yaml requirements.txt -RUN mkdir /workspace -WORKDIR /workspace \ No newline at end of file +RUN conda install -n base conda-libmamba-solver && \ + conda config --set solver libmamba +WORKDIR /workspace + +#second stage is to build an image which is changing very often (e.g. env installation for CI-jobs) +# because dev-image is used for ci-purpose and it's mpi-implementation and version is not known prior, +# we don't compile the mpi from source here because it takes just too long. This makes an mpi-hook not possible. +FROM karabo-base +# redefine envs because they're just scoped per build-stage +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" +# KARABO_TAG can be a branch, tag or commit (set from CI-job to get specific repo checkout) +ARG KARABO_TAG +# keep ADD instead of RUN because ADD always evaluates it's content to decide whether to use the cache or not +ADD https://github.com/i4Ds/Karabo-Pipeline.git#${KARABO_TAG} Karabo-Pipeline/ +WORKDIR /workspace/Karabo-Pipeline +# note that installation like this has several assumptions about the used files like: +# conda-channel definition & not naming env in `environment.yaml`, dev-optional dep in pyproject.toml +RUN conda env update -f=environment.yaml && \ + pip install -e ".[dev]" \ No newline at end of file diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index a240780f..2920a835 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -2,7 +2,7 @@ FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as build ARG KARABO_TAG RUN apt-get update && apt-get install -y git -RUN git clone --branch ${KARABO_TAG} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git +ADD https://github.com/i4Ds/Karabo-Pipeline.git#${KARABO_TAG} Karabo-Pipeline/ FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 RUN apt-get update && apt-get install -y gcc gfortran libarchive13 wget curl nano @@ -18,21 +18,21 @@ RUN conda install -n base conda-libmamba-solver && \ RUN conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" # fetch mpich-version to have it consistent with it's installation from karabo ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' -RUN MPICH_VERSION=$(eval $MPICH_EVAL) \ - mkdir -p /tmp/mpich-build \ - && cd /tmp/mpich-build \ - && wget http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz \ - && tar xvzf mpich-${MPICH_VERSION}.tar.gz \ - && cd mpich-${MPICH_VERSION} \ - && ./configure --enable-fast=all,O3 --prefix=/usr --with-cuda=/usr/local/cuda \ - && make -j4 \ - && make install \ - && ldconfig \ - && cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ \ - && cd / \ - && rm -rf /tmp/mpich-build -# replace openmpi with mpich-dummy (see issue #512) -RUN MPICH_VERSION=$(eval $MPICH_EVAL) \ +RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ + mkdir -p /tmp/mpich-build && \ + cd /tmp/mpich-build && \ + wget http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz && \ + tar xvzf mpich-${MPICH_VERSION}.tar.gz && \ + cd mpich-${MPICH_VERSION} && \ + ./configure --enable-fast=all,O3 --prefix=/usr --with-cuda=/usr/local/cuda && \ + make -j4 && \ + make install && \ + ldconfig && \ + cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ && \ + cd / && \ + rm -rf /tmp/mpich-build +# replace mpi with dummy-install (see issue #512) +RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" RUN pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo && \ From e6038cece37709d9f2dda5372197d057c7f8cd69 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 3 Nov 2023 10:04:44 +0100 Subject: [PATCH 046/207] added versioneer to meta.yaml build-stage :books: --- conda/meta.yaml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index d949f1b8..ed0aefb5 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -20,38 +20,38 @@ requirements: - pip run: - python {{ python }} - - aratmospy =1.0.0 + - aratmospy =1.0.dev0 - astropy - - bdsf =1.10.2 + - bdsf =1.10.dev2 - bluebild - cuda-cudart - dask >=2022.12.1 - dask-mpi - distributed - - eidos =1.1.0 + - eidos =1.1.dev0 - healpy - h5py =*=mpi_mpich* # hdf5 mpich-whl python bindings - ipython - - katbeam =0.1.0 + - katbeam =0.1.dev0 - libcufft - matplotlib - - montagepy =6.0.0 + - montagepy =6.0.dev0 - mpi4py # python bindings for mpi, it's implementation (mpich or openmpi) depends on other wheels - mpich # explicit mpich as dep for `mpi4py`. mpich & openmpi support linux & mac (not windows) - nbformat - nbconvert - numpy >=1.21, !=1.24.0 # 1.24.0 is a buggy release - - oskarpy =2.8.3 + - oskarpy =2.8.dev3 - pandas - psutil - - rascil =1.0.0 + - rascil =1.0.dev0 - reproject - requests - scipy >=1.10.1 - - ska-gridder-nifty-cuda =0.3.0 - - ska-sdp-datamodels =0.1.3 - - ska-sdp-func-python =0.1.4 - - tools21cm =2.0.2 + - ska-gridder-nifty-cuda =0.3.dev0 + - ska-sdp-datamodels =0.1.dev3 + - ska-sdp-func-python =0.1.dev4 + - tools21cm =2.0.dev2 - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel prio i4ds > conda-forge From f91ee97fdf2374c468ba0ce9443542f3d62e5f7c Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 3 Nov 2023 10:05:43 +0100 Subject: [PATCH 047/207] added versioneer to meta.yaml build-stage :taxi: --- conda/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/meta.yaml b/conda/meta.yaml index ed0aefb5..f3fe6dc9 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -15,6 +15,7 @@ requirements: build: - python {{ python }} - pip + - versioneer host: - python {{ python }} - pip From 6a07123be6b6d4fafdebf50188e5f43d07c0f7e3 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 3 Nov 2023 11:34:28 +0100 Subject: [PATCH 048/207] bugfix conda-build meta.yaml for versioneer :poop: --- .github/workflows/test.yml | 1 - conda/build.sh | 3 ++- conda/meta.yaml | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6b9ff424..5a8997cb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,7 +43,6 @@ jobs: export IS_GITHUB_RUNNER=true export RUN_GPU_TESTS=false export RUN_NOTEBOOK_TESTS=true - pip install . --no-deps mpirun -n 2 pytest --cov=./ --only-mpi pytest --cov=./ --cov-append --cov-report=xml - name: Upload coverage reports to Codecov diff --git a/conda/build.sh b/conda/build.sh index 281491fa..161aa1e1 100644 --- a/conda/build.sh +++ b/conda/build.sh @@ -1 +1,2 @@ -$PYTHON -m pip install . \ No newline at end of file + +$PYTHON -m pip install --no-deps . \ No newline at end of file diff --git a/conda/meta.yaml b/conda/meta.yaml index f3fe6dc9..d355240e 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -16,9 +16,12 @@ requirements: - python {{ python }} - pip - versioneer + - tomli host: - python {{ python }} - pip + - versioneer + - tomli run: - python {{ python }} - aratmospy =1.0.dev0 From 93719e65756ee88e744b513e019706f807703fc7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 3 Nov 2023 13:30:22 +0100 Subject: [PATCH 049/207] adapted conda-build to custom pkg-version :triumph: --- .github/workflows/conda-build.yml | 37 ++++++++++++++----------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 13fb7b91..63ef9df3 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -9,11 +9,17 @@ on: type: string required: false default: "0" + version: + type: string + required: true workflow_call: inputs: buildNumber: type: string required: true + version: + type: string + required: true jobs: @@ -27,37 +33,28 @@ jobs: - name: Get Previous tag uses: actions-ecosystem/action-get-latest-tag@v1 id: get-latest-tag - - name: Install conda build - shell: bash -l {0} - id: channels - run: | - KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} - conda config --append channels i4ds - conda config --append channels nvidia/label/cuda-11.7.0 - conda config --append channels conda-forge - - name: Export Build Number - id: buildnr + - name: Build Conda shell: bash -l {0} run: | if [[ ${{ github.event_name }} == "release" ]] then BUILD_NR="0" + KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} + KARABO_VER="${KARABO_TAG:1}" fi - if [[ ${{ github.event_name }} == "workflow_dispatch" ]] - then - BUILD_NR=${{ inputs.buildNumber }} - fi - if [[ ${{ github.event_name }} == "workflow_call" ]] + if [[ ${{ github.event_name }} == "workflow_dispatch" ]] || [[ ${{ github.event_name }} == "workflow_call" ]] then BUILD_NR=${{ inputs.buildNumber }} + KARABO_VER=${{ inputs.version }} fi + echo "KARABO_VERSION=$KARABO_VER" >> "$GITHUB_ENV" echo "BUILD_NUMBER=$BUILD_NR" >> "$GITHUB_ENV" - - name: Build Conda - shell: bash -l {0} - run: | + + conda config --append channels i4ds + conda config --append channels nvidia/label/cuda-11.7.0 + conda config --append channels conda-forge + cd conda - KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} - export KARABO_VERSION="${KARABO_TAG:1}" conda mambabuild . - name: Publish to Conda shell: bash -l {0} From 5da123b654898ca6437cfae8a8197801f58927f7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 3 Nov 2023 17:07:41 +0100 Subject: [PATCH 050/207] adapted Dockerfile.user to a testable (before release) setup :princess: --- .github/workflows/build-user-image.yml | 45 +++++++++++++++++++++++--- docker/dev/Dockerfile | 13 ++++---- docker/user/Dockerfile | 38 +++++++++++++++------- 3 files changed, 74 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index c036b4a5..1fadc97b 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -7,6 +7,21 @@ on: - completed workflow_dispatch: + inputs: + gitrev: + type: string + required: true + verstag: + type: string + required: true + latest: + type: boolean + required: false + default: false + test: + type: boolean + required: false + default: false env: REGISTRY: ghcr.io @@ -41,21 +56,43 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Setup metadata img-name & img-tag + shell: bash -l {0} + run: | + if [[ ${{ github.event_name }} == "workflow_run" ]] + then + echo "latest=true" >> "$GITHUB_ENV" + echo "version=${{ steps.get-latest-tag.outputs.tag }}" >> "$GITHUB_ENV" + echo "gitrev=${{ steps.get-latest-tag.outputs.tag }}" >> "$GITHUB_ENV" + echo "build=user" >> "$GITHUB_ENV" + fi + if [[ ${{ github.event_name }} == "workflow_dispatch" ]] + then + echo "latest=${{ inputs.latest }}" >> "$GITHUB_ENV" + echo "version=${{ inputs.verstag }}" >> "$GITHUB_ENV" + echo "gitrev=${{ inputs.gitrev }}" >> "$GITHUB_ENV" + if [[ ${{ inputs.test }} == "true" ]] + then + echo "build=test" >> "$GITHUB_ENV" + else + echo "build=user" >> "$GITHUB_ENV" + fi + fi - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ github.repository }} tags: | - type=raw, value=latest - type=pep440, pattern={{version}}, value=${{ steps.get-latest-tag.outputs.tag }} + type=raw, enable=${{ env.latest }}, value=latest + type=pep440, pattern={{version}}, value=${{ env.version }} - name: Build and export to Docker uses: docker/build-push-action@v5 with: context: . push: false - build-args: KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} + build-args: KARABO_TAG=${{ env.version }} load: true tags: ${{ steps.meta.outputs.tags }} labels: test @@ -69,6 +106,6 @@ jobs: file: docker/user/Dockerfile context: . push: true - build-args: KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} + build-args: GIT_REV=${{ env.gitrev }} build=${{ env.build }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index 91d95575..b253f1df 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -5,7 +5,7 @@ # first stage is to have a more or less consistent base-image FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-base RUN apt-get update && apt-get install -y git libarchive13 wget curl nano -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} RUN conda init @@ -14,18 +14,17 @@ RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba WORKDIR /workspace -#second stage is to build an image which is changing very often (e.g. env installation for CI-jobs) +# second stage is to build an image which is changing very often (e.g. env installation for CI-jobs) # because dev-image is used for ci-purpose and it's mpi-implementation and version is not known prior, # we don't compile the mpi from source here because it takes just too long. This makes an mpi-hook not possible. FROM karabo-base # redefine envs because they're just scoped per build-stage ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" -# KARABO_TAG can be a branch, tag or commit (set from CI-job to get specific repo checkout) -ARG KARABO_TAG +ARG GIT_REV # keep ADD instead of RUN because ADD always evaluates it's content to decide whether to use the cache or not -ADD https://github.com/i4Ds/Karabo-Pipeline.git#${KARABO_TAG} Karabo-Pipeline/ -WORKDIR /workspace/Karabo-Pipeline +ADD https://github.com/i4Ds/Karabo-Pipeline.git#${GIT_REV} Karabo-Pipeline/ # note that installation like this has several assumptions about the used files like: # conda-channel definition & not naming env in `environment.yaml`, dev-optional dep in pyproject.toml -RUN conda env update -f=environment.yaml && \ +RUN cd Karabo-Pipeline && \ + conda env update -f=environment.yaml && \ pip install -e ".[dev]" \ No newline at end of file diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 2920a835..e2531506 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -1,23 +1,39 @@ -# Create build container to not have copied filed in real container afterwards -FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as build -ARG KARABO_TAG + +# build: user|test, GIT_REV: in case of "user", ONLY `v{major}.{minor}.{patch}` +ARG build=user GIT_REV + +FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-repo RUN apt-get update && apt-get install -y git -ADD https://github.com/i4Ds/Karabo-Pipeline.git#${KARABO_TAG} Karabo-Pipeline/ +ADD https://github.com/i4Ds/Karabo-Pipeline.git#${GIT_REV} Karabo-Pipeline/ -FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 -RUN apt-get update && apt-get install -y gcc gfortran libarchive13 wget curl nano -ARG KARABO_TAG -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" +FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-base +RUN apt-get update && apt-get install -y git gcc gfortran libarchive13 wget curl nano +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] -# install mpich on standard location (needed for mpi-hook) RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba -RUN conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${KARABO_TAG:1}" + +FROM karabo-base as build-user +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" +RUN conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${GIT_REV:1}" + +FROM karabo-base as build-test +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" +COPY --from=karabo-repo Karabo-Pipeline/ repo/ +RUN cd repo && \ + conda env update -f=environment.yaml && \ + pip install --no-deps . && \ + cd .. && \ + rm -rf repo/ + +FROM build-${build} +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" # fetch mpich-version to have it consistent with it's installation from karabo ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' +# install mpich on standard location to enable mpi-hook (may take a while) RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ mkdir -p /tmp/mpich-build && \ cd /tmp/mpich-build && \ @@ -38,6 +54,6 @@ RUN pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo && \ mkdir /workspace WORKDIR /workspace -COPY --from=build Karabo-Pipeline/ repo/ +COPY --from=karabo-repo Karabo-Pipeline/ repo/ RUN cp -r repo/karabo/examples /workspace/examples/ && \ rm -rf repo/ From 67d22dc326efec05b9e2afa7db2ec58a2da586d0 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 3 Nov 2023 17:08:30 +0100 Subject: [PATCH 051/207] bugfix build-user-image.yml :loop: --- .github/workflows/build-user-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 1fadc97b..c9549f4f 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -92,7 +92,7 @@ jobs: with: context: . push: false - build-args: KARABO_TAG=${{ env.version }} + build-args: GIT_REV=${{ env.gitrev }} load: true tags: ${{ steps.meta.outputs.tags }} labels: test From 827be883c3e49c6c35736e681bc99caa823fdd58 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 3 Nov 2023 17:09:21 +0100 Subject: [PATCH 052/207] bugfix build-user-image.yml :construction: --- .github/workflows/build-user-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index c9549f4f..3db3c613 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -92,7 +92,7 @@ jobs: with: context: . push: false - build-args: GIT_REV=${{ env.gitrev }} + build-args: GIT_REV=${{ env.gitrev }} build=${{ env.build }} load: true tags: ${{ steps.meta.outputs.tags }} labels: test From c4470559e5b9f166bf9c4935a1c712cc276ae2ac Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 6 Nov 2023 14:36:32 +0100 Subject: [PATCH 053/207] updated Dockerfile-user :clap: --- docker/user/Dockerfile | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index e2531506..c373cbf7 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -6,31 +6,29 @@ FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-repo RUN apt-get update && apt-get install -y git ADD https://github.com/i4Ds/Karabo-Pipeline.git#${GIT_REV} Karabo-Pipeline/ -FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-base +FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 RUN apt-get update && apt-get install -y git gcc gfortran libarchive13 wget curl nano -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" +ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba - -FROM karabo-base as build-user -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" -RUN conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${GIT_REV:1}" - -FROM karabo-base as build-test -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" COPY --from=karabo-repo Karabo-Pipeline/ repo/ -RUN cd repo && \ +RUN if [[ ${build} = "user" ]] && \ + then && \ + conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${GIT_REV:1}" && \ + fi && \ + if [[ ${build} = "test" ]] && \ + then && \ + cd repo && \ conda env update -f=environment.yaml && \ pip install --no-deps . && \ cd .. && \ + fi && \ + cp -r repo/karabo/examples /workspace/examples/ && \ rm -rf repo/ - -FROM build-${build} -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" # fetch mpich-version to have it consistent with it's installation from karabo ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' # install mpich on standard location to enable mpi-hook (may take a while) @@ -49,11 +47,8 @@ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ rm -rf /tmp/mpich-build # replace mpi with dummy-install (see issue #512) RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ - conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" -RUN pip install jupyterlab ipykernel pytest && \ + conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" && \ + pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo && \ mkdir /workspace -WORKDIR /workspace -COPY --from=karabo-repo Karabo-Pipeline/ repo/ -RUN cp -r repo/karabo/examples /workspace/examples/ && \ - rm -rf repo/ +WORKDIR /workspace \ No newline at end of file From 5c19eb9061c60e96ed7e97243a884802ae0ae220 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 6 Nov 2023 15:57:03 +0100 Subject: [PATCH 054/207] bugfix user-dockerfile :clock9: --- docker/user/Dockerfile | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index c373cbf7..ae4a5724 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -1,12 +1,12 @@ - -# build: user|test, GIT_REV: in case of "user", ONLY `v{major}.{minor}.{patch}` -ARG build=user GIT_REV - FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-repo -RUN apt-get update && apt-get install -y git -ADD https://github.com/i4Ds/Karabo-Pipeline.git#${GIT_REV} Karabo-Pipeline/ +ARG GIT_REV +RUN apt-get update && apt-get install -y git && \ + git clone --branch ${GIT_REV} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 +# build: user|test, GIT_REV: in case of "user", ONLY `v{major}.{minor}.{patch}` +ARG GIT_REV \ + BUILD=user RUN apt-get update && apt-get install -y git gcc gfortran libarchive13 wget curl nano ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ @@ -16,19 +16,20 @@ SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba COPY --from=karabo-repo Karabo-Pipeline/ repo/ -RUN if [[ ${build} = "user" ]] && \ - then && \ - conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${GIT_REV:1}" && \ - fi && \ - if [[ ${build} = "test" ]] && \ - then && \ - cd repo && \ - conda env update -f=environment.yaml && \ - pip install --no-deps . && \ - cd .. && \ +RUN if [ "$BUILD" = "user" ] ; then \ + conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${GIT_REV:1}"; \ + elif [ "$BUILD" = "test" ] ; then \ + cd "repo"; \ + conda env update -f="environment.yaml"; \ + pip install --no-deps "."; \ + cd ".." ; \ + else \ + echo "Invalid build $BUILD"; \ fi && \ - cp -r repo/karabo/examples /workspace/examples/ && \ - rm -rf repo/ + mkdir /workspace && \ + cp -r "repo/karabo/examples" "/workspace/examples/" && \ + rm -rf "repo/" +WORKDIR /workspace # fetch mpich-version to have it consistent with it's installation from karabo ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' # install mpich on standard location to enable mpi-hook (may take a while) @@ -49,6 +50,4 @@ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" && \ pip install jupyterlab ipykernel pytest && \ - python -m ipykernel install --user --name=karabo && \ - mkdir /workspace -WORKDIR /workspace \ No newline at end of file + python -m ipykernel install --user --name=karabo \ No newline at end of file From 99a134f2dd858aff3a9e9016b61a28ad34c76849 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 6 Nov 2023 15:59:17 +0100 Subject: [PATCH 055/207] bugfix Dockerfile-dev :bookmark: --- docker/dev/Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index b253f1df..93d01836 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -21,10 +21,9 @@ FROM karabo-base # redefine envs because they're just scoped per build-stage ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" ARG GIT_REV -# keep ADD instead of RUN because ADD always evaluates it's content to decide whether to use the cache or not -ADD https://github.com/i4Ds/Karabo-Pipeline.git#${GIT_REV} Karabo-Pipeline/ # note that installation like this has several assumptions about the used files like: # conda-channel definition & not naming env in `environment.yaml`, dev-optional dep in pyproject.toml -RUN cd Karabo-Pipeline && \ +RUN git clone --branch ${GIT_REV} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git && \ + cd Karabo-Pipeline && \ conda env update -f=environment.yaml && \ pip install -e ".[dev]" \ No newline at end of file From e1a4c368500f0887ad256a062e260f99dfc828ec Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 6 Nov 2023 16:03:47 +0100 Subject: [PATCH 056/207] minor doc-update in dockerfile-dev :children_crossing: --- docker/dev/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index 93d01836..5416522e 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -15,6 +15,7 @@ RUN conda install -n base conda-libmamba-solver && \ WORKDIR /workspace # second stage is to build an image which is changing very often (e.g. env installation for CI-jobs) +# please ALWAYS pass the git-commit-rev (NOT the branch) as build-arg to ensure that not a cached layer is used. # because dev-image is used for ci-purpose and it's mpi-implementation and version is not known prior, # we don't compile the mpi from source here because it takes just too long. This makes an mpi-hook not possible. FROM karabo-base From a5f22acf8dab7093934fee7cffb2ad3e3a7dc59f Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 13:48:19 +0100 Subject: [PATCH 057/207] updated description of build-user-image workflow inputs :gun: --- .github/workflows/build-user-image.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 3db3c613..d5298521 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -11,17 +11,21 @@ on: gitrev: type: string required: true + description: "commit, tag or branch (careful with branch if docker-build environment is caching)" verstag: type: string required: true + description: "PEP440 conform version-tag of Karabo (incl. leading 'v')" latest: type: boolean required: false default: false + description: "Tag image as 'latest'?" test: type: boolean required: false default: false + description: "Test build? If yes, the environment gets installed from the according `gitrev` environment.yaml instead" env: REGISTRY: ghcr.io From 9fc12710c1dbc54d8ca210f2d7575a3d39b3de4d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 13:59:50 +0100 Subject: [PATCH 058/207] introduced venv in dockerfile-user to not f*** up base env :relieved: --- docker/user/Dockerfile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index ae4a5724..55f317e2 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -10,11 +10,14 @@ ARG GIT_REV \ RUN apt-get update && apt-get install -y git gcc gfortran libarchive13 wget curl nano ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} -RUN conda init + /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} && \ + conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ - conda config --set solver libmamba + conda config --set solver libmamba && \ + conda create -y -n karabo +# change venv because libmamba solver lives in base and any serious environment update could f*** up the linked deps like `libarchive.so` +SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] COPY --from=karabo-repo Karabo-Pipeline/ repo/ RUN if [ "$BUILD" = "user" ] ; then \ conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${GIT_REV:1}"; \ @@ -26,6 +29,7 @@ RUN if [ "$BUILD" = "user" ] ; then \ else \ echo "Invalid build $BUILD"; \ fi && \ + echo "conda activate karabo" >> ~/.bashrc && \ mkdir /workspace && \ cp -r "repo/karabo/examples" "/workspace/examples/" && \ rm -rf "repo/" From 1875003978505931cd55f30964adc9c112220eb7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 14:03:08 +0100 Subject: [PATCH 059/207] updated dev-img with venv :sweat: --- docker/dev/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index 5416522e..c1f25e7b 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -11,7 +11,10 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Li RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ - conda config --set solver libmamba + conda config --set solver libmamba && \ + conda create -y -n karabo +SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] +RUN echo "conda activate karabo" >> ~/.bashrc WORKDIR /workspace # second stage is to build an image which is changing very often (e.g. env installation for CI-jobs) From a963a506a02ccbe4e2ca192860f340d96f6183f6 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 14:22:06 +0100 Subject: [PATCH 060/207] updated build-user-image workflow to be able to run on workflow-dispatch :dolphin: --- .github/workflows/build-user-image.yml | 2 +- docker/dev/Dockerfile | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index d5298521..02a6f693 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -33,7 +33,7 @@ env: jobs: build-and-push-image: runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'success' }} + if: ${{ (github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch') }} permissions: contents: read packages: write diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index c1f25e7b..b79dc57e 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -13,6 +13,7 @@ SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba && \ conda create -y -n karabo +# create venv to not f*** up base-env in which libmamba solver lives SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] RUN echo "conda activate karabo" >> ~/.bashrc WORKDIR /workspace From e45f0cdd963fe72dadc1ba22e24a9260c419080b Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 14:28:03 +0100 Subject: [PATCH 061/207] added file to build-and-export-docker action :pushpin: --- .github/workflows/build-user-image.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 02a6f693..43ddb2fa 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -94,6 +94,7 @@ jobs: - name: Build and export to Docker uses: docker/build-push-action@v5 with: + file: docker/user/Dockerfile context: . push: false build-args: GIT_REV=${{ env.gitrev }} build=${{ env.build }} From b3b62bd42db07836cbb89a8b0ae017dff9a6b418 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 14:39:10 +0100 Subject: [PATCH 062/207] bugfix build-args in build-user-image workflow :man: --- .github/workflows/build-user-image.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 43ddb2fa..c4554d0d 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -97,7 +97,7 @@ jobs: file: docker/user/Dockerfile context: . push: false - build-args: GIT_REV=${{ env.gitrev }} build=${{ env.build }} + build-args: GIT_REV=${{ env.gitrev }},BUILD=${{ env.build }} load: true tags: ${{ steps.meta.outputs.tags }} labels: test @@ -111,6 +111,6 @@ jobs: file: docker/user/Dockerfile context: . push: true - build-args: GIT_REV=${{ env.gitrev }} build=${{ env.build }} + build-args: GIT_REV=${{ env.gitrev }},BUILD=${{ env.build }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} From f48017e79fb08307c2a02a07cfc2516ff07a61fc Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 15:09:36 +0100 Subject: [PATCH 063/207] bugfix get-shallow git-repo from git-rev (not only branches or tags) :camera: --- .github/workflows/build-user-image.yml | 4 ++-- .github/workflows/conda-build.yml | 2 ++ docker/user/Dockerfile | 7 ++++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index c4554d0d..8d0ea246 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -11,7 +11,7 @@ on: gitrev: type: string required: true - description: "commit, tag or branch (careful with branch if docker-build environment is caching)" + description: "commit (full), tag or branch (branch is not unique and therefore step could get cached)" verstag: type: string required: true @@ -25,7 +25,7 @@ on: type: boolean required: false default: false - description: "Test build? If yes, the environment gets installed from the according `gitrev` environment.yaml instead" + description: "Install from `gitrev` environment.yaml instead?" env: REGISTRY: ghcr.io diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 63ef9df3..62f11f6c 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -9,9 +9,11 @@ on: type: string required: false default: "0" + description: "Conda build-number on anaconda.org" version: type: string required: true + description: "PEP440 package version (incl. leading 'v')" workflow_call: inputs: buildNumber: diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 55f317e2..2263a5cb 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -1,7 +1,12 @@ FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-repo ARG GIT_REV RUN apt-get update && apt-get install -y git && \ - git clone --branch ${GIT_REV} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git + mkdir Karabo-Pipeline && \ + cd Karabo-Pipeline && \ + git init && \ + git remote add origin git@github.com:i4Ds/Karabo-Pipeline.git && \ + git fetch origin ${GIT_REV} && \ + git reset --hard ${GIT_REV} FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 # build: user|test, GIT_REV: in case of "user", ONLY `v{major}.{minor}.{patch}` From 2f5fdf6fb04bde4890b16c9f7faab8bacb645f0c Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 15:25:22 +0100 Subject: [PATCH 064/207] bugfix remote from https and not from ssh :video_game: --- docker/user/Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 2263a5cb..ebe40bf0 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -4,14 +4,13 @@ RUN apt-get update && apt-get install -y git && \ mkdir Karabo-Pipeline && \ cd Karabo-Pipeline && \ git init && \ - git remote add origin git@github.com:i4Ds/Karabo-Pipeline.git && \ + git remote add origin https://github.com/i4Ds/Karabo-Pipeline.git && \ git fetch origin ${GIT_REV} && \ git reset --hard ${GIT_REV} FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 # build: user|test, GIT_REV: in case of "user", ONLY `v{major}.{minor}.{patch}` -ARG GIT_REV \ - BUILD=user +ARG BUILD=user RUN apt-get update && apt-get install -y git gcc gfortran libarchive13 wget curl nano ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ From 50cc5fc7643d68874a585176669464c8c05328c5 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 15:28:46 +0100 Subject: [PATCH 065/207] updated workflow-dispatch descriptions :bangbang: --- .github/workflows/build-user-image.yml | 4 ++-- .github/workflows/conda-build.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 8d0ea246..6cd14616 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -11,11 +11,11 @@ on: gitrev: type: string required: true - description: "commit (full), tag or branch (branch is not unique and therefore step could get cached)" + description: "gitrev: commit (full), tag or branch (branch is not unique and therefore step could get cached)" verstag: type: string required: true - description: "PEP440 conform version-tag of Karabo (incl. leading 'v')" + description: "version: PEP440 version-tag of Karabo (incl. leading 'v')" latest: type: boolean required: false diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 62f11f6c..9ae77446 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -9,11 +9,11 @@ on: type: string required: false default: "0" - description: "Conda build-number on anaconda.org" + description: "build-nr: conda build-nr of anaconda.org" version: type: string required: true - description: "PEP440 package version (incl. leading 'v')" + description: "version: PEP440 package-version (incl. leading 'v')" workflow_call: inputs: buildNumber: From 329263f7667283d5db6e3fb8c17f31184ca23aaa Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 15:40:56 +0100 Subject: [PATCH 066/207] bugfix build-args passing to docker-build-push action :sos: --- .github/workflows/build-user-image.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 6cd14616..6c753066 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -97,7 +97,9 @@ jobs: file: docker/user/Dockerfile context: . push: false - build-args: GIT_REV=${{ env.gitrev }},BUILD=${{ env.build }} + build-args: | + GIT_REV=${{ env.gitrev }} + BUILD=${{ env.build }} load: true tags: ${{ steps.meta.outputs.tags }} labels: test @@ -111,6 +113,8 @@ jobs: file: docker/user/Dockerfile context: . push: true - build-args: GIT_REV=${{ env.gitrev }},BUILD=${{ env.build }} + build-args: | + GIT_REV=${{ env.gitrev }} + BUILD=${{ env.build }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} From 1c1347181328211ac0dc854e31b69ac936c67280 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 15:55:42 +0100 Subject: [PATCH 067/207] throw exit-code in dockerfile-user if build not set correctly :musical_note: --- docker/user/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index ebe40bf0..08b276ed 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -31,7 +31,7 @@ RUN if [ "$BUILD" = "user" ] ; then \ pip install --no-deps "."; \ cd ".." ; \ else \ - echo "Invalid build $BUILD"; \ + exit 2; \ fi && \ echo "conda activate karabo" >> ~/.bashrc && \ mkdir /workspace && \ From 06a204f33f3efcf5e0941970c54b8dc6eff4bb47 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 16:09:53 +0100 Subject: [PATCH 068/207] added dev-flag to conda-build :rose: --- .github/workflows/conda-build.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 9ae77446..660657be 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -14,6 +14,10 @@ on: type: string required: true description: "version: PEP440 package-version (incl. leading 'v')" + dev: + required: true + type: boolean + description: "dev-build? If yes, it sets an offset to build-nr" workflow_call: inputs: buildNumber: @@ -22,6 +26,10 @@ on: version: type: string required: true + dev: + required: false + default: false + type: boolean jobs: @@ -49,6 +57,10 @@ jobs: BUILD_NR=${{ inputs.buildNumber }} KARABO_VER=${{ inputs.version }} fi + if [[ ${{ inputs.dev }} == "true" ]] + then + BUILD_NR="$(($BUILD_NR + 999))" + fi echo "KARABO_VERSION=$KARABO_VER" >> "$GITHUB_ENV" echo "BUILD_NUMBER=$BUILD_NR" >> "$GITHUB_ENV" From 81ff6197658177e011effc6de3b7411ac37b2069 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 16:18:11 +0100 Subject: [PATCH 069/207] added security to dev-builds in conda-build workflow :baggage_claim: --- .github/workflows/conda-build.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 660657be..ead22415 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -59,7 +59,16 @@ jobs: fi if [[ ${{ inputs.dev }} == "true" ]] then + if [[ "dev" != *"$KARABO_VER"* ]] + then + exit 2 + fi BUILD_NR="$(($BUILD_NR + 999))" + else + if [[ "dev" == *"$KARABO_VER"* ]] + then + exit 2 + fi fi echo "KARABO_VERSION=$KARABO_VER" >> "$GITHUB_ENV" echo "BUILD_NUMBER=$BUILD_NR" >> "$GITHUB_ENV" From 8a315340e28ae55b546c4347c372a900fdbc0d21 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 16:49:08 +0100 Subject: [PATCH 070/207] bugfix dev-evaluation in conda-build.yml :sa: --- .github/workflows/conda-build.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index ead22415..710fa7ab 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -57,15 +57,16 @@ jobs: BUILD_NR=${{ inputs.buildNumber }} KARABO_VER=${{ inputs.version }} fi + DEV_STR="dev" if [[ ${{ inputs.dev }} == "true" ]] then - if [[ "dev" != *"$KARABO_VER"* ]] + if [[ "$KARABO_VER" != *"$DEV_STR"* ]] then exit 2 fi BUILD_NR="$(($BUILD_NR + 999))" else - if [[ "dev" == *"$KARABO_VER"* ]] + if [[ "$KARABO_VER" == *"$DEV_STR"* ]] then exit 2 fi From e01ba5950d3003f81499a6d8dab173c66e8b6695 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 16:57:04 +0100 Subject: [PATCH 071/207] outcommented mpich-compilation in Dockerfile :disappointed_relieved: --- docker/user/Dockerfile | 49 +++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 08b276ed..a9a242d9 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -36,26 +36,31 @@ RUN if [ "$BUILD" = "user" ] ; then \ echo "conda activate karabo" >> ~/.bashrc && \ mkdir /workspace && \ cp -r "repo/karabo/examples" "/workspace/examples/" && \ - rm -rf "repo/" -WORKDIR /workspace -# fetch mpich-version to have it consistent with it's installation from karabo -ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' -# install mpich on standard location to enable mpi-hook (may take a while) -RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ - mkdir -p /tmp/mpich-build && \ - cd /tmp/mpich-build && \ - wget http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz && \ - tar xvzf mpich-${MPICH_VERSION}.tar.gz && \ - cd mpich-${MPICH_VERSION} && \ - ./configure --enable-fast=all,O3 --prefix=/usr --with-cuda=/usr/local/cuda && \ - make -j4 && \ - make install && \ - ldconfig && \ - cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ && \ - cd / && \ - rm -rf /tmp/mpich-build -# replace mpi with dummy-install (see issue #512) -RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ - conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" && \ + rm -rf "repo/" && \ pip install jupyterlab ipykernel pytest && \ - python -m ipykernel install --user --name=karabo \ No newline at end of file + python -m ipykernel install --user --name=karabo +WORKDIR /workspace + +# The following steps are needed if an mpi-hook through sarus must be allowed. +# However, because this steps takes more than 2 hours, and the github-runners run out +# of memory, these steps are outcommented at the time-beeing. + +# # fetch mpich-version to have it consistent with it's installation from karabo +# ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' +# # install mpich on standard location to enable mpi-hook (may take a while) +# RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ +# mkdir -p /tmp/mpich-build && \ +# cd /tmp/mpich-build && \ +# wget http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz && \ +# tar xvzf mpich-${MPICH_VERSION}.tar.gz && \ +# cd mpich-${MPICH_VERSION} && \ +# ./configure --enable-fast=all,O3 --prefix=/usr --with-cuda=/usr/local/cuda && \ +# make -j4 && \ +# make install && \ +# ldconfig && \ +# cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ && \ +# cd / && \ +# rm -rf /tmp/mpich-build +# # replace mpi with dummy-install (see issue #512) +# RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ +# conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" \ No newline at end of file From 31b0723deb6c70f3de8e274a63b0f07c9564238d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 7 Nov 2023 17:01:03 +0100 Subject: [PATCH 072/207] bugfix conda-build export of env-vars in same step :tent: --- .github/workflows/conda-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 710fa7ab..e95c9eec 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -71,8 +71,8 @@ jobs: exit 2 fi fi - echo "KARABO_VERSION=$KARABO_VER" >> "$GITHUB_ENV" - echo "BUILD_NUMBER=$BUILD_NR" >> "$GITHUB_ENV" + export KARABO_VERSION=$KARABO_VER + export BUILD_NUMBER=$BUILD_NR conda config --append channels i4ds conda config --append channels nvidia/label/cuda-11.7.0 From 9cfa55a01335d89ce875ed3ef84bf356d5e7defe Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 8 Nov 2023 08:38:43 +0100 Subject: [PATCH 073/207] added failing tests if PR is draft :ng: --- .github/workflows/test.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5a8997cb..4dd55349 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,13 +4,19 @@ on: schedule: # Every night at 5am - cron: '0 5 * * *' pull_request: - types: [opened, synchronize, reopened] + types: [opened, synchronize, reopened, ready_for_review] workflow_dispatch: push: branches: - main # is activated once a PR gets merged jobs: + Fail-if-PR-is-Draft: + if: ${{ (github.event_name == 'pull_request') && (github.event.pull_request.draft == true) }} + runs-on: ubuntu-latest + steps: + - name: Fail with exit 1 if PR is draft + run: exit 1 Test_Karabo: runs-on: ubuntu-latest steps: From 74033708486c48b5b101a928df8df1b25633f243 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 8 Nov 2023 11:16:47 +0100 Subject: [PATCH 074/207] setup build-user-img, conda-build & test-user-package for dev-testing :mag: --- .github/workflows/build-user-image.yml | 48 +++++++++---------- .github/workflows/conda-build.yml | 61 ++++++++++++++++++++----- .github/workflows/test-user-package.yml | 54 +++++++++++++++++++--- 3 files changed, 123 insertions(+), 40 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 6c753066..b2ea7b67 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -1,17 +1,22 @@ name: Build User Image on: - workflow_run: - workflows: ["Test User Package"] - types: - - completed + workflow_call: + inputs: + verstag: + type: string + required: true + latest: + type: boolean + required: false + default: false workflow_dispatch: inputs: gitrev: type: string required: true - description: "gitrev: commit (full), tag or branch (branch is not unique and therefore step could get cached)" + description: "gitrev: commit-hash (full), tag or branch (branch is not unique and therefore checkout-step could get cached)" verstag: type: string required: true @@ -31,13 +36,8 @@ env: REGISTRY: ghcr.io jobs: - build-and-push-image: + build-test-and-push-image: runs-on: ubuntu-latest - if: ${{ (github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch') }} - permissions: - contents: read - packages: write - steps: - name: Checkout repository uses: actions/checkout@v4 @@ -63,24 +63,26 @@ jobs: - name: Setup metadata img-name & img-tag shell: bash -l {0} run: | - if [[ ${{ github.event_name }} == "workflow_run" ]] - then - echo "latest=true" >> "$GITHUB_ENV" - echo "version=${{ steps.get-latest-tag.outputs.tag }}" >> "$GITHUB_ENV" - echo "gitrev=${{ steps.get-latest-tag.outputs.tag }}" >> "$GITHUB_ENV" + if [[ ${{ github.event_name }} == "workflow_call" ]]; then + echo "gitrev=${{ inputs.verstag }}" >> "$GITHUB_ENV" echo "build=user" >> "$GITHUB_ENV" - fi - if [[ ${{ github.event_name }} == "workflow_dispatch" ]] - then - echo "latest=${{ inputs.latest }}" >> "$GITHUB_ENV" - echo "version=${{ inputs.verstag }}" >> "$GITHUB_ENV" + elif [[ ${{ github.event_name }} == "workflow_dispatch" ]]; then echo "gitrev=${{ inputs.gitrev }}" >> "$GITHUB_ENV" - if [[ ${{ inputs.test }} == "true" ]] - then + if [[ ${{ inputs.test }} == "true" ]]; then echo "build=test" >> "$GITHUB_ENV" else echo "build=user" >> "$GITHUB_ENV" fi + else + echo "Invalid github-event!" + exit 2 + fi + echo "latest=${{ inputs.latest }}" >> "$GITHUB_ENV" + echo "version=${{ inputs.verstag }}" >> "$GITHUB_ENV" + DEV_STR="dev" + if [[ "${{ inputs.verstag }}" == *"$DEV_STR"* ]] && [[ $LATEST_DOCKER == 'true' ]]; then + echo "Invalid configuration of workflow-inputs!" + exit 2 fi - name: Extract metadata (tags, labels) for Docker id: meta diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index e95c9eec..9a747f4c 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -15,9 +15,19 @@ on: required: true description: "version: PEP440 package-version (incl. leading 'v')" dev: - required: true type: boolean + required: true description: "dev-build? If yes, it sets an offset to build-nr" + buildDocker: + type: boolean + required: false + default: false + description: "build docker-img if conda-build & tests succeeded?" + latestDocker: + type: boolean + required: false + default: false + description: "tag docker-img as latest (if `buildDocker` is enabled)" workflow_call: inputs: buildNumber: @@ -27,16 +37,27 @@ on: type: string required: true dev: + type: boolean + required: false + default: false + buildDocker: + type: boolean required: false default: false + latestDocker: type: boolean + required: false + default: false jobs: conda-build: runs-on: ubuntu-latest container: ghcr.io/i4ds/mambabuild-docker:latest - + outputs: + karabo-tag: ${{ steps.bcs.outputs.karabo_tag }} + build-docker: ${{ steps.bcs.outputs.build_docker }} + latest-docker: ${{ steps.bcs.outputs.latest_docker }} steps: - name: Checkout repository uses: actions/checkout@v4 @@ -44,35 +65,45 @@ jobs: uses: actions-ecosystem/action-get-latest-tag@v1 id: get-latest-tag - name: Build Conda + id: bcs shell: bash -l {0} run: | if [[ ${{ github.event_name }} == "release" ]] then - BUILD_NR="0" + BUILD_NUMBER="0" KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} - KARABO_VER="${KARABO_TAG:1}" + BUILD_DOCKER=true + LATEST_DOCKER=true fi if [[ ${{ github.event_name }} == "workflow_dispatch" ]] || [[ ${{ github.event_name }} == "workflow_call" ]] then - BUILD_NR=${{ inputs.buildNumber }} - KARABO_VER=${{ inputs.version }} + BUILD_NUMBER=${{ inputs.buildNumber }} + KARABO_TAG=${{ inputs.version }} + BUILD_DOCKER=${{ inputs.buildDocker }} + LATEST_DOCKER=${{ inputs.latestDocker }} fi + KARABO_VERSION="${KARABO_TAG:1}" DEV_STR="dev" if [[ ${{ inputs.dev }} == "true" ]] then - if [[ "$KARABO_VER" != *"$DEV_STR"* ]] + if [[ "$KARABO_VERSION" != *"$DEV_STR"* ]] || [[ $LATEST_DOCKER == 'true' ]] then + echo "Invalid configuration of workflow-inputs!" exit 2 fi - BUILD_NR="$(($BUILD_NR + 999))" + BUILD_NUMBER="$(($BUILD_NUMBER + 999))" else - if [[ "$KARABO_VER" == *"$DEV_STR"* ]] + if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]] then + echo "Invalid configuration of workflow-inputs!" exit 2 fi fi - export KARABO_VERSION=$KARABO_VER - export BUILD_NUMBER=$BUILD_NR + echo "KARABO_VERSION=$KARABO_VERSION" >> "$GITHUB_ENV" + echo "BUILD_NUMBER=$BUILD_NUMBER" >> "$GITHUB_ENV" + echo "karabo_tag=$KARABO_TAG" >> "$GITHUB_OUTPUT" + echo "build_docker=$BUILD_DOCKER" >> "$"$GITHUB_OUTPUT"" + echo "latest_docker=$LATEST_DOCKER" >> "$GITHUB_OUTPUT" conda config --append channels i4ds conda config --append channels nvidia/label/cuda-11.7.0 @@ -85,3 +116,11 @@ jobs: run: | conda activate base anaconda -t ${{ secrets.ANACONDA_SECRET }} upload /opt/conda/conda-bld/linux-64/karabo-pipeline-*.tar.bz2 --force + + test-build: + needs: conda-build + uses: ./.github/workflows/test-user-package.yml + with: + version: ${{ needs.conda-build.outputs.karabo-tag }} + buildDocker: ${{ needs.conda-build.outputs.build-docker }} + latestDocker: ${{ needs.conda-build.outputs.latest-docker }} diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index 103afb95..84d3c399 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -1,13 +1,38 @@ name: Test User Package on: - workflow_run: - workflows: ["Conda Build"] - types: - - completed + workflow_dispatch: + inputs: + version: + type: string + required: true + description: "version: PEP440 package-version (incl. leading 'v')" + buildDocker: + type: boolean + required: false + default: false + description: "build docker-img if tests succeeded?" + latestDocker: + type: boolean + required: false + default: false + description: "tag docker-img as latest (if `buildDocker` is enabled)" + workflow_call: + inputs: + version: + type: string + required: true + buildDocker: + type: boolean + required: false + default: false + latestDocker: + type: boolean + required: false + default: false jobs: - conda-build: + test-conda-build: runs-on: ubuntu-latest if: ${{ github.event.workflow_run.conclusion == 'success' }} steps: @@ -18,13 +43,20 @@ jobs: - name: Set variables, Install Package & Dependencies shell: bash -l {0} run: | + KARABO_TAG=${{ inputs.version }} + KARABO_VERSION="${KARABO_TAG:1}" + DEV_STR="dev" + if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]] && [[ "${{ inputs.latestDocker }}" == 'true' ]]; then + echo "Invalid configuration of workflow-inputs!" + exit 2 + fi export IS_GITHUB_RUNNER=true export RUN_NOTEBOOK_TESTS=false conda install -y -n base conda-libmamba-solver conda config --set solver libmamba conda create -y -n karabo-env python=3.9 conda activate karabo-env - conda install -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline=${{ env.KARABO_VERSION }} + conda install -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline=$KARABO_VERSION pip install ipykernel python -m ipykernel install --user --name python3 - name: Test Package @@ -32,4 +64,14 @@ jobs: run: | conda activate karabo-env pytest --pyargs karabo.test + + build-docker: + needs: test-conda-build + if: ${{ inputs.buildDocker == 'true' }} + uses: ./.github/workflows/build-user-image.yml + with: + verstag: ${{ inputs.version }} + latest: ${{ inputs.latestDocker }} + + From 4637372ccb87420b1e1d76072944dece3fbde5d0 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 09:57:44 +0100 Subject: [PATCH 075/207] added df-h to build-user-image workflow :jack_o_lantern: --- .github/workflows/build-user-image.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index b2ea7b67..a0ff2732 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -93,6 +93,11 @@ jobs: type=raw, enable=${{ env.latest }}, value=latest type=pep440, pattern={{version}}, value=${{ env.version }} + - name: Monitor-disk-sapce-1 + shell: bash -l {0} + run: | + echo $(df -h) + - name: Build and export to Docker uses: docker/build-push-action@v5 with: @@ -105,10 +110,21 @@ jobs: load: true tags: ${{ steps.meta.outputs.tags }} labels: test + + - name: Monitor-disk-sapce-2 + shell: bash -l {0} + run: | + echo $(df -h) + - name: Test container run: | docker run --rm ${{ steps.meta.outputs.tags }}:test pytest /opt/conda/lib/python3.9/site-packages/karabo/test + - name: Monitor-disk-sapce-3 + shell: bash -l {0} + run: | + echo $(df -h) + - name: Build and push Docker image uses: docker/build-push-action@v5 with: @@ -120,3 +136,8 @@ jobs: BUILD=${{ env.build }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + + - name: Monitor-disk-sapce-4 + shell: bash -l {0} + run: | + echo $(df -h) \ No newline at end of file From efdff3a8d685fb54f24a7cee18f61abe31a8f3d7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 10:36:55 +0100 Subject: [PATCH 076/207] made docker-build by myself :scissors: --- .github/workflows/build-user-image.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index a0ff2732..de2ce13e 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -93,11 +93,12 @@ jobs: type=raw, enable=${{ env.latest }}, value=latest type=pep440, pattern={{version}}, value=${{ env.version }} - - name: Monitor-disk-sapce-1 + - name: Docker build & monitor shell: bash -l {0} run: | echo $(df -h) - + docker build --build-arg GIT_REV=${{ env.gitrev }} --build-arg BUILD=${{ env.build }} -f docker/user/Dockerfile -t "karabo-pipeline":${{ env.version }} . + echo $(df -h) - name: Build and export to Docker uses: docker/build-push-action@v5 with: From 369f1eef5dd59eadab9a2bbae65d133b5cbeda25 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 11:40:02 +0100 Subject: [PATCH 077/207] made docker-build by myself entirely :traffic_light: --- .github/workflows/build-user-image.yml | 55 +++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index de2ce13e..8d0b8156 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -39,15 +39,35 @@ jobs: build-test-and-push-image: runs-on: ubuntu-latest steps: + - name: Monitor-disk-sapce-1 + shell: bash -l {0} + run: | + echo $(df -h) + - name: Checkout repository uses: actions/checkout@v4 + - name: Monitor-disk-sapce-2 + shell: bash -l {0} + run: | + echo $(df -h) + - name: Set up QEMU uses: docker/setup-qemu-action@v3 + - name: Monitor-disk-sapce-3 + shell: bash -l {0} + run: | + echo $(df -h) + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + - name: Monitor-disk-sapce-4 + shell: bash -l {0} + run: | + echo $(df -h) + # Aussumes that current repo-tag matches karabo:latest on anaconda.org - name: Get Previous tag uses: actions-ecosystem/action-get-latest-tag@v1 @@ -60,6 +80,11 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Monitor-disk-sapce-5 + shell: bash -l {0} + run: | + echo $(df -h) + - name: Setup metadata img-name & img-tag shell: bash -l {0} run: | @@ -84,6 +109,12 @@ jobs: echo "Invalid configuration of workflow-inputs!" exit 2 fi + + - name: Monitor-disk-sapce-6 + shell: bash -l {0} + run: | + echo $(df -h) + - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v5 @@ -93,12 +124,26 @@ jobs: type=raw, enable=${{ env.latest }}, value=latest type=pep440, pattern={{version}}, value=${{ env.version }} - - name: Docker build & monitor + - name: Monitor-disk-sapce-7 + shell: bash -l {0} + run: | + echo $(df -h) + + - name: Docker build manually shell: bash -l {0} run: | + docker build --build-arg GIT_REV=${{ env.gitrev }} --build-arg BUILD=${{ env.build }} -f docker/user/Dockerfile -t karabo-pipeline:${{ env.version }} . + if [[ ${{ env.latest }} == "true" ]]; then + docker tag karabo-pipeline:${{ env.version }} karabo-pipeline:latest + fi echo $(df -h) - docker build --build-arg GIT_REV=${{ env.gitrev }} --build-arg BUILD=${{ env.build }} -f docker/user/Dockerfile -t "karabo-pipeline":${{ env.version }} . + + - name: push docker-image + shell: bash -l {0} + run: | + docker push karabo-pipeline --all-tags echo $(df -h) + - name: Build and export to Docker uses: docker/build-push-action@v5 with: @@ -112,7 +157,7 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: test - - name: Monitor-disk-sapce-2 + - name: Monitor-disk-sapce-8 shell: bash -l {0} run: | echo $(df -h) @@ -121,7 +166,7 @@ jobs: run: | docker run --rm ${{ steps.meta.outputs.tags }}:test pytest /opt/conda/lib/python3.9/site-packages/karabo/test - - name: Monitor-disk-sapce-3 + - name: Monitor-disk-sapce-9 shell: bash -l {0} run: | echo $(df -h) @@ -138,7 +183,7 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - - name: Monitor-disk-sapce-4 + - name: Monitor-disk-sapce-10 shell: bash -l {0} run: | echo $(df -h) \ No newline at end of file From 0190fa5cee5c891c344e4a531f4e6726ee53ed5d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 12:04:55 +0100 Subject: [PATCH 078/207] ensured to push on ghcr.io :airplane: --- .github/workflows/build-user-image.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 8d0b8156..ce37a3ce 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -141,7 +141,8 @@ jobs: - name: push docker-image shell: bash -l {0} run: | - docker push karabo-pipeline --all-tags + docker logout docker.io + docker push ${{ env.REGISTRY }}/${{ github.actor }}/karabo-pipeline --all-tags echo $(df -h) - name: Build and export to Docker From a7ddb89e997162165b38352b76cb3b4cbd7bdbbe Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 13:28:30 +0100 Subject: [PATCH 079/207] bugfix docker-push user-image workflow :cupid: --- .github/workflows/build-user-image.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index ce37a3ce..b434affc 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -34,6 +34,7 @@ on: env: REGISTRY: ghcr.io + IMG_NAME: karabo-pipeline jobs: build-test-and-push-image: @@ -132,9 +133,9 @@ jobs: - name: Docker build manually shell: bash -l {0} run: | - docker build --build-arg GIT_REV=${{ env.gitrev }} --build-arg BUILD=${{ env.build }} -f docker/user/Dockerfile -t karabo-pipeline:${{ env.version }} . + docker build --build-arg GIT_REV=${{ env.gitrev }} --build-arg BUILD=${{ env.build }} -f docker/user/Dockerfile -t ${{ env.IMG_NAME }}:${{ env.version }} . if [[ ${{ env.latest }} == "true" ]]; then - docker tag karabo-pipeline:${{ env.version }} karabo-pipeline:latest + docker tag ${{ env.IMG_NAME }}:${{ env.version }} ${{ env.IMG_NAME }}:latest fi echo $(df -h) @@ -142,7 +143,7 @@ jobs: shell: bash -l {0} run: | docker logout docker.io - docker push ${{ env.REGISTRY }}/${{ github.actor }}/karabo-pipeline --all-tags + docker push ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMG_NAME }} --all-tags echo $(df -h) - name: Build and export to Docker From 4ec3b1179abd99ccd7dcf54582a52b7de5c891f5 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 13:40:49 +0100 Subject: [PATCH 080/207] bugfix docker-push user-image workflow :loudspeaker: --- .github/workflows/build-user-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index b434affc..bf0a1299 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -143,7 +143,7 @@ jobs: shell: bash -l {0} run: | docker logout docker.io - docker push ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMG_NAME }} --all-tags + docker push --all-tags echo $(df -h) - name: Build and export to Docker From 62470b816b23d6d0a0c1c1a1d8d904b8811d9659 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 13:58:59 +0100 Subject: [PATCH 081/207] bugfix added img-name to push image :children_crossing: --- .github/workflows/build-user-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index bf0a1299..c25c3a88 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -143,7 +143,7 @@ jobs: shell: bash -l {0} run: | docker logout docker.io - docker push --all-tags + docker push --all-tags ${{ env.IMG_NAME }} echo $(df -h) - name: Build and export to Docker From 92dac2ef2067df173ba16fe1683bb19b4424b97b Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 15:19:36 +0100 Subject: [PATCH 082/207] bugfix registry docker push :apple: --- .github/workflows/build-user-image.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index c25c3a88..35338cff 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -142,8 +142,11 @@ jobs: - name: push docker-image shell: bash -l {0} run: | + REPO_OWNER=${{ github.repository_owner }} + echo ${{ env.REGISTRY }} + echo ${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }} docker logout docker.io - docker push --all-tags ${{ env.IMG_NAME }} + docker push --all-tags ${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }} echo $(df -h) - name: Build and export to Docker From 8d4735b2a7111b2ae0381dc150a54c6116b27f1e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 15:39:54 +0100 Subject: [PATCH 083/207] adapted docker-img address accordingly to ghcr.io :tophat: --- .github/workflows/build-user-image.yml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 35338cff..f032211b 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -105,6 +105,8 @@ jobs: fi echo "latest=${{ inputs.latest }}" >> "$GITHUB_ENV" echo "version=${{ inputs.verstag }}" >> "$GITHUB_ENV" + REPO_OWNER=${{ github.repository_owner }} + echo "REPO_OWNER=${REPO_OWNER@L}" >> "$GITHUB_ENV" DEV_STR="dev" if [[ "${{ inputs.verstag }}" == *"$DEV_STR"* ]] && [[ $LATEST_DOCKER == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" @@ -133,20 +135,23 @@ jobs: - name: Docker build manually shell: bash -l {0} run: | - docker build --build-arg GIT_REV=${{ env.gitrev }} --build-arg BUILD=${{ env.build }} -f docker/user/Dockerfile -t ${{ env.IMG_NAME }}:${{ env.version }} . + docker build \ + --build-arg GIT_REV=${{ env.gitrev }} \ + --build-arg BUILD=${{ env.build }} \ + -f docker/user/Dockerfile \ + -t ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMG_NAME }}:${{ env.version }} \ + . if [[ ${{ env.latest }} == "true" ]]; then - docker tag ${{ env.IMG_NAME }}:${{ env.version }} ${{ env.IMG_NAME }}:latest + docker tag ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMG_NAME }}:${{ env.version }} \ + ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMG_NAME }}:latest fi echo $(df -h) - name: push docker-image shell: bash -l {0} run: | - REPO_OWNER=${{ github.repository_owner }} - echo ${{ env.REGISTRY }} - echo ${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }} docker logout docker.io - docker push --all-tags ${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }} + docker push --all-tags ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMG_NAME }} echo $(df -h) - name: Build and export to Docker From 8a67f6ec18ea2aca5e8248e3b40e573cbfbe83b1 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 16:01:27 +0100 Subject: [PATCH 084/207] adapted build-user-image to standard :boar: --- .github/workflows/build-user-image.yml | 96 +++----------------------- 1 file changed, 8 insertions(+), 88 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index f032211b..f48cf7f0 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -40,36 +40,15 @@ jobs: build-test-and-push-image: runs-on: ubuntu-latest steps: - - name: Monitor-disk-sapce-1 - shell: bash -l {0} - run: | - echo $(df -h) - - name: Checkout repository uses: actions/checkout@v4 - - name: Monitor-disk-sapce-2 - shell: bash -l {0} - run: | - echo $(df -h) - - name: Set up QEMU uses: docker/setup-qemu-action@v3 - - name: Monitor-disk-sapce-3 - shell: bash -l {0} - run: | - echo $(df -h) - - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Monitor-disk-sapce-4 - shell: bash -l {0} - run: | - echo $(df -h) - - # Aussumes that current repo-tag matches karabo:latest on anaconda.org - name: Get Previous tag uses: actions-ecosystem/action-get-latest-tag@v1 id: get-latest-tag @@ -81,11 +60,6 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Monitor-disk-sapce-5 - shell: bash -l {0} - run: | - echo $(df -h) - - name: Setup metadata img-name & img-tag shell: bash -l {0} run: | @@ -106,18 +80,13 @@ jobs: echo "latest=${{ inputs.latest }}" >> "$GITHUB_ENV" echo "version=${{ inputs.verstag }}" >> "$GITHUB_ENV" REPO_OWNER=${{ github.repository_owner }} - echo "REPO_OWNER=${REPO_OWNER@L}" >> "$GITHUB_ENV" + echo "IMG_ADDR=${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }}" >> "$GITHUB_ENV" DEV_STR="dev" if [[ "${{ inputs.verstag }}" == *"$DEV_STR"* ]] && [[ $LATEST_DOCKER == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" exit 2 fi - - name: Monitor-disk-sapce-6 - shell: bash -l {0} - run: | - echo $(df -h) - - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v5 @@ -127,73 +96,24 @@ jobs: type=raw, enable=${{ env.latest }}, value=latest type=pep440, pattern={{version}}, value=${{ env.version }} - - name: Monitor-disk-sapce-7 - shell: bash -l {0} - run: | - echo $(df -h) - - - name: Docker build manually + - name: Docker build shell: bash -l {0} run: | docker build \ --build-arg GIT_REV=${{ env.gitrev }} \ --build-arg BUILD=${{ env.build }} \ -f docker/user/Dockerfile \ - -t ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMG_NAME }}:${{ env.version }} \ + -t ${{ env.IMG_ADDR }}:${{ env.version }} \ . if [[ ${{ env.latest }} == "true" ]]; then - docker tag ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMG_NAME }}:${{ env.version }} \ - ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMG_NAME }}:latest + docker tag ${{ env.IMG_ADDR }}:${{ env.version }} ${{ env.IMG_ADDR }}:latest fi - echo $(df -h) - - name: push docker-image - shell: bash -l {0} + - name: Test image run: | - docker logout docker.io - docker push --all-tags ${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMG_NAME }} - echo $(df -h) - - - name: Build and export to Docker - uses: docker/build-push-action@v5 - with: - file: docker/user/Dockerfile - context: . - push: false - build-args: | - GIT_REV=${{ env.gitrev }} - BUILD=${{ env.build }} - load: true - tags: ${{ steps.meta.outputs.tags }} - labels: test - - - name: Monitor-disk-sapce-8 - shell: bash -l {0} - run: | - echo $(df -h) - - - name: Test container - run: | - docker run --rm ${{ steps.meta.outputs.tags }}:test pytest /opt/conda/lib/python3.9/site-packages/karabo/test - - - name: Monitor-disk-sapce-9 - shell: bash -l {0} - run: | - echo $(df -h) - - - name: Build and push Docker image - uses: docker/build-push-action@v5 - with: - file: docker/user/Dockerfile - context: . - push: true - build-args: | - GIT_REV=${{ env.gitrev }} - BUILD=${{ env.build }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} + docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} pytest /opt/conda/lib/python3.9/site-packages/karabo/test - - name: Monitor-disk-sapce-10 + - name: Docker push shell: bash -l {0} run: | - echo $(df -h) \ No newline at end of file + docker push --all-tags ${{ env.IMG_ADDR }} \ No newline at end of file From ccefa183074ccdd2c01f85ed010449e49f612f05 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 16:16:46 +0100 Subject: [PATCH 085/207] added pytest installation to docker-user image testing workflow :roller_coaster: --- .github/workflows/build-user-image.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index f48cf7f0..96615409 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -111,7 +111,8 @@ jobs: - name: Test image run: | - docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} pytest /opt/conda/lib/python3.9/site-packages/karabo/test + docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} \ + "pip install pytest && pytest /opt/conda/lib/python3.9/site-packages/karabo/test" - name: Docker push shell: bash -l {0} From d726ec87bd01a366635ed62272f1ea38004aee60 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 16:30:23 +0100 Subject: [PATCH 086/207] bugfix pytest-call in user-image :yen: --- .github/workflows/build-user-image.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 96615409..ca0d28ed 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -112,7 +112,8 @@ jobs: - name: Test image run: | docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} \ - "pip install pytest && pytest /opt/conda/lib/python3.9/site-packages/karabo/test" + pip install pytest && \ + pytest /opt/conda/lib/python3.9/site-packages/karabo/test - name: Docker push shell: bash -l {0} From e520020e478cb24995ecd7efd71dfcbc4bf3a0dc Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 17 Nov 2023 16:42:55 +0100 Subject: [PATCH 087/207] added bash shell in docker-run in build-test-user-image :busts_in_silhouette: --- .github/workflows/build-user-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index ca0d28ed..6b6e38f0 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -111,7 +111,7 @@ jobs: - name: Test image run: | - docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} \ + docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash && \ pip install pytest && \ pytest /opt/conda/lib/python3.9/site-packages/karabo/test From 9eb7904967d3523f5de5e17b55a52487248bf1ae Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 10:18:03 +0100 Subject: [PATCH 088/207] adapted ld-library-path to base-image :two: --- docker/user/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index a9a242d9..fee0df72 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -12,7 +12,10 @@ FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 # build: user|test, GIT_REV: in case of "user", ONLY `v{major}.{minor}.{patch}` ARG BUILD=user RUN apt-get update && apt-get install -y git gcc gfortran libarchive13 wget curl nano -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" +ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:/usr/local/cuda/lib64" \ + PATH="/opt/conda/bin:${PATH}" \ + CONDA_PREFIX="/opt/conda" \ + IS_DOCKER_CONTAINER="true" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} && \ conda init From 5cc3b87fca2b167cec5ba2c4077bf0559823d96c Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 10:18:39 +0100 Subject: [PATCH 089/207] made site-package-location identification in build-user-image more robust :no_mobile_phones: --- .github/workflows/build-user-image.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 6b6e38f0..61e6127b 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -110,10 +110,11 @@ jobs: fi - name: Test image - run: | + run: | # what is installed and the package location relies entirely on the user Dockerfile docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash && \ pip install pytest && \ - pytest /opt/conda/lib/python3.9/site-packages/karabo/test + SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed 's/.*\(\/opt\/conda.*\).*/\1/') && \ + pytest $SITE_PKGS/karabo/test - name: Docker push shell: bash -l {0} From 8c9f3cd3a4f847bddcedfc8dd217d6f31a2bfc0e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 11:45:58 +0100 Subject: [PATCH 090/207] defined entrypoint to Dockerfile-user :arrow_right: --- .github/workflows/build-user-image.yml | 7 +++---- docker/user/Dockerfile | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 61e6127b..d22373df 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -111,10 +111,9 @@ jobs: - name: Test image run: | # what is installed and the package location relies entirely on the user Dockerfile - docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash && \ - pip install pytest && \ - SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed 's/.*\(\/opt\/conda.*\).*/\1/') && \ - pytest $SITE_PKGS/karabo/test + docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ + SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed 's/.*\(\/opt\/conda.*\).*/\1/'); \ + pytest $SITE_PKGS/karabo/test" - name: Docker push shell: bash -l {0} diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index fee0df72..88679119 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -36,13 +36,13 @@ RUN if [ "$BUILD" = "user" ] ; then \ else \ exit 2; \ fi && \ - echo "conda activate karabo" >> ~/.bashrc && \ mkdir /workspace && \ cp -r "repo/karabo/examples" "/workspace/examples/" && \ rm -rf "repo/" && \ pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo WORKDIR /workspace +ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] # The following steps are needed if an mpi-hook through sarus must be allowed. # However, because this steps takes more than 2 hours, and the github-runners run out From f4a2859e03b7ee5e2bc617e42553d38d55542ae4 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 11:49:07 +0100 Subject: [PATCH 091/207] bugfix calling tests in build-user-image :bear: --- .github/workflows/build-user-image.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index d22373df..21950f8a 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -112,8 +112,7 @@ jobs: - name: Test image run: | # what is installed and the package location relies entirely on the user Dockerfile docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ - SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed 's/.*\(\/opt\/conda.*\).*/\1/'); \ - pytest $SITE_PKGS/karabo/test" + 'SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); pytest $SITE_PKGS/karabo/test'" - name: Docker push shell: bash -l {0} From f48e669452c7fb7d8445b34fef4f5d13f1aa7fa4 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 11:54:03 +0100 Subject: [PATCH 092/207] readded conda-activate-karabo to .bashrc for interactive mode :heartbeat: --- docker/user/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 88679119..0d5703cb 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -36,6 +36,7 @@ RUN if [ "$BUILD" = "user" ] ; then \ else \ exit 2; \ fi && \ + echo "conda activate karabo" >> ~/.bashrc && \ mkdir /workspace && \ cp -r "repo/karabo/examples" "/workspace/examples/" && \ rm -rf "repo/" && \ From a37f2f4db7a9700e052d2de68a29cadba0842960 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 11:59:45 +0100 Subject: [PATCH 093/207] bugfix removed unnecessary " at the end of docker-run :bullettrain_front: --- .github/workflows/build-user-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 21950f8a..2bfaae1b 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -112,7 +112,7 @@ jobs: - name: Test image run: | # what is installed and the package location relies entirely on the user Dockerfile docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ - 'SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); pytest $SITE_PKGS/karabo/test'" + 'SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); pytest $SITE_PKGS/karabo/test' - name: Docker push shell: bash -l {0} From 01ac74d45dc6c106ed2e8a3ce8f0807025ca343c Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 13:04:39 +0100 Subject: [PATCH 094/207] set env-vars in test-user-image :pisces: --- .github/workflows/build-user-image.yml | 2 +- .github/workflows/test.yml | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 2bfaae1b..227b6927 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -112,7 +112,7 @@ jobs: - name: Test image run: | # what is installed and the package location relies entirely on the user Dockerfile docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ - 'SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); pytest $SITE_PKGS/karabo/test' + 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=true; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); pytest $SITE_PKGS/karabo/test' - name: Docker push shell: bash -l {0} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4dd55349..6c3a5173 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -46,9 +46,7 @@ jobs: shell: bash -l {0} run: | conda activate test_karabo - export IS_GITHUB_RUNNER=true - export RUN_GPU_TESTS=false - export RUN_NOTEBOOK_TESTS=true + export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=true mpirun -n 2 pytest --cov=./ --only-mpi pytest --cov=./ --cov-append --cov-report=xml - name: Upload coverage reports to Codecov From e23dbb877da422a785d02bc9f33e757e2c359b03 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 13:09:31 +0100 Subject: [PATCH 095/207] minor changes :musical_note: --- .github/workflows/build-user-image.yml | 4 ++-- .github/workflows/purge-image.yml | 23 ----------------------- 2 files changed, 2 insertions(+), 25 deletions(-) delete mode 100644 .github/workflows/purge-image.yml diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 227b6927..7058f488 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -25,12 +25,12 @@ on: type: boolean required: false default: false - description: "Tag image as 'latest'?" + description: "tag image as 'latest'?" test: type: boolean required: false default: false - description: "Install from `gitrev` environment.yaml instead?" + description: "install from `gitrev` environment.yaml instead?" env: REGISTRY: ghcr.io diff --git a/.github/workflows/purge-image.yml b/.github/workflows/purge-image.yml deleted file mode 100644 index 15af751d..00000000 --- a/.github/workflows/purge-image.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Purge Image - -on: - workflow_call: - inputs: - IMAGE_NAME: - required: true - type: string - IMAGE_TAG: - required: true - type: string - -jobs: - Purge_Image: - runs-on: ubuntu-latest - steps: - - name: Purge image - uses: bots-house/ghcr-delete-image-action@v1.1.0 - with: - owner: ${{ github.repository_owner }} - name: ${{ inputs.IMAGE_NAME }} - token: ${{ secrets.GITHUB_TOKEN }} - tag: ${{ inputs.IMAGE_TAG }} From bb971a8523c8288c7a1026be15b9a3311cc7d688 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 13:12:21 +0100 Subject: [PATCH 096/207] adapted test-workflow to main-setup :bulb: --- .github/workflows/test.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7a50a7dc..afb676b8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,12 +11,6 @@ on: - main # is activated once a PR gets merged jobs: - Fail-if-PR-is-Draft: - if: ${{ (github.event_name == 'pull_request') && (github.event.pull_request.draft == true) }} - runs-on: ubuntu-latest - steps: - - name: Fail with exit 1 if PR is draft - run: exit 1 Test_Karabo: if: github.event.pull_request.draft == false runs-on: ubuntu-latest From bc9cb1ce8c1c56761b028b002741c1143e0a475f Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 13:43:31 +0100 Subject: [PATCH 097/207] adapted docker-img dev to karabo-venv :page_facing_up: --- docker/dev/Dockerfile | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index b79dc57e..ec89e060 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -5,17 +5,20 @@ # first stage is to have a more or less consistent base-image FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-base RUN apt-get update && apt-get install -y git libarchive13 wget curl nano -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" +ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:/usr/local/cuda/lib64" \ + PATH="/opt/conda/bin:${PATH}" \ + CONDA_PREFIX="/opt/conda" \ + IS_DOCKER_CONTAINER="true" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} RUN conda init SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba && \ - conda create -y -n karabo + conda create -y -n karabo && \ + echo "conda activate karabo" >> ~/.bashrc # create venv to not f*** up base-env in which libmamba solver lives SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] -RUN echo "conda activate karabo" >> ~/.bashrc WORKDIR /workspace # second stage is to build an image which is changing very often (e.g. env installation for CI-jobs) @@ -24,11 +27,15 @@ WORKDIR /workspace # we don't compile the mpi from source here because it takes just too long. This makes an mpi-hook not possible. FROM karabo-base # redefine envs because they're just scoped per build-stage -ENV PATH="/opt/conda/bin:${PATH}" CONDA_PREFIX="/opt/conda" IS_DOCKER_CONTAINER="true" +ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:/usr/local/cuda/lib64" \ + PATH="/opt/conda/bin:${PATH}" \ + CONDA_PREFIX="/opt/conda" \ + IS_DOCKER_CONTAINER="true" ARG GIT_REV # note that installation like this has several assumptions about the used files like: # conda-channel definition & not naming env in `environment.yaml`, dev-optional dep in pyproject.toml RUN git clone --branch ${GIT_REV} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git && \ cd Karabo-Pipeline && \ conda env update -f=environment.yaml && \ - pip install -e ".[dev]" \ No newline at end of file + pip install -e ".[dev]" +ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] \ No newline at end of file From 84880b8850656663574903fcee25b7d2bdaf9bc5 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 14:04:30 +0100 Subject: [PATCH 098/207] adapted mpi-doc :sparkle: --- doc/src/container.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index 7944a4db..26401261 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -56,12 +56,18 @@ Then you can pull a Docker image to a sarus image as follows: sarus pull ghcr.io/i4ds/karabo-pipeline:latest ``` -**Native MPI support (MPICH-based)** +**MPI (MPICH) Support** -Karabo >= `v0.21.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). Our containers provide native MPI by hooking CSCS MPI into the container using the `--mpi` flag as follows: +Karabo >= `v0.21.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). ```shell -srun -N16 -n16 -C gpu sarus run --mpi --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline:latest +srun -N16 -n16 -C gpu sarus run --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline:latest ``` -Here, an MPI application with 16 processes is launched with your repository mounted in the container (/workspace is the default working-directory). Make sure that you know how many processes are reasonable to run because it can rapidly sum up to a large number of nodehours. \ No newline at end of file +Here, an MPI application with 16 processes is launched with your repository mounted in the container (/workspace is the default working-directory). Make sure that you know how many processes are reasonable to run because it can rapidly sum up to a large number of nodehours. + +Currently, native-mpi-hook is NOT enabled, because the mpi-installation needs to live at a standard-location, which is not the case here since mpich lives in a conda-venv. If this is a feature you need, don't hesitate to contact us. In the container, you would have to install mpich from source (the same version which lives in the conda-venv), and replace the mpich in the conda-env with a dummy-installation. Then, you're able to use to use native MPI by hooking CSCS MPI into the container adding the `--mpi` flag as follows: + +```shell +srun -N16 -n16 -C gpu sarus run --mpi --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline:latest +``` \ No newline at end of file From 2ab9eb3bd978b3a49edd36e3b09267fcaf378eea Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 15:33:14 +0100 Subject: [PATCH 099/207] addressed mypy-issues :six: --- karabo/_version.py | 1 + karabo/imaging/image.py | 3 ++- karabo/simulation/beam.py | 4 ++-- karabo/simulation/sky_model.py | 6 +++--- karabo/sourcedetection/evaluation.py | 7 ++++--- karabo/test/conftest.py | 8 ++++---- setup.cfg | 7 ++++--- 7 files changed, 20 insertions(+), 16 deletions(-) diff --git a/karabo/_version.py b/karabo/_version.py index 3556603e..be52b71a 100644 --- a/karabo/_version.py +++ b/karabo/_version.py @@ -7,6 +7,7 @@ # This file is released into the public domain. # Generated by versioneer-0.29 # https://github.com/python-versioneer/python-versioneer +# mypy: ignore-errors """Git implementation of _version.py.""" diff --git a/karabo/imaging/image.py b/karabo/imaging/image.py index eb37206c..aa6342ea 100644 --- a/karabo/imaging/image.py +++ b/karabo/imaging/image.py @@ -323,7 +323,8 @@ def plot_power_spectrum( plt.gca().set_ylabel("Brightness temperature [K]") plt.gca().set_xscale("log") plt.gca().set_yscale("log") - plt.gca().set_ylim(1e-6 * np.max(profile), 2.0 * np.max(profile)) + max_profile = float(np.max(profile)) + plt.gca().set_ylim(1e-6 * max_profile, 2.0 * max_profile) plt.tight_layout() if save_png: diff --git a/karabo/simulation/beam.py b/karabo/simulation/beam.py index 591e019a..98ab344b 100644 --- a/karabo/simulation/beam.py +++ b/karabo/simulation/beam.py @@ -333,7 +333,7 @@ def show_kat_beam( """ plt.imshow( beampixels, - extent=[-beamextent / 2, beamextent / 2, -beamextent / 2, beamextent / 2], + extent=(-beamextent / 2, beamextent / 2, -beamextent / 2, beamextent / 2), ) plt.title("%s pol beam\nfor %s at %dMHz" % (pol, "", freq)) plt.xlabel("deg") @@ -359,7 +359,7 @@ def plot_beam( :return: polar plot """ fig = plt.figure() - ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True) + ax = fig.add_axes((0.1, 0.1, 0.8, 0.8), polar=True) ax.pcolormesh( phi, theta, absdir ) # TODO (Add check for this) X,Y & data2D must all be same dimensions diff --git a/karabo/simulation/sky_model.py b/karabo/simulation/sky_model.py index 3f854b18..9834cba6 100644 --- a/karabo/simulation/sky_model.py +++ b/karabo/simulation/sky_model.py @@ -1160,7 +1160,7 @@ def get_sky_model_from_h5_to_xarray( sky = xr.concat(data_arrays, dim=XARRAY_DIM_1_DEFAULT) sky = sky.T sky = sky.chunk( - {XARRAY_DIM_0_DEFAULT: chunksize, XARRAY_DIM_1_DEFAULT: sky.shape[1]} # type: ignore [dict-item] # noqa: E501 + {XARRAY_DIM_0_DEFAULT: chunksize, XARRAY_DIM_1_DEFAULT: sky.shape[1]} ) return SkyModel(sky, h5_file_connection=f) @@ -1354,11 +1354,11 @@ def get_sky_model_from_fits( data_arrays.append(data_array) for freq_dataset in data_arrays: - freq_dataset.chunk({XARRAY_DIM_0_DEFAULT: chunksize}) # type: ignore [dict-item] # noqa: E501 + freq_dataset.chunk({XARRAY_DIM_0_DEFAULT: chunksize}) result_dataset = ( xr.concat(data_arrays, dim=XARRAY_DIM_0_DEFAULT) - .chunk({XARRAY_DIM_0_DEFAULT: chunksize}) # type: ignore [dict-item] + .chunk({XARRAY_DIM_0_DEFAULT: chunksize}) .T ) diff --git a/karabo/sourcedetection/evaluation.py b/karabo/sourcedetection/evaluation.py index 26320df4..78f042e2 100644 --- a/karabo/sourcedetection/evaluation.py +++ b/karabo/sourcedetection/evaluation.py @@ -187,7 +187,7 @@ def automatic_assignment_of_ground_truth_and_prediction( idx_assigment_pred[distance == np.inf] = -1 # Check if a ground truth point is assigned to more # than one predicted point - pred_multiple_assignments = SourceDetectionEvaluation.__return_multiple_assigned_detected_points( # noqa + pred_multiple_assignments = SourceDetectionEvaluation.__return_multiple_assigned_detected_points( # noqa: E501 idx_assigment_pred ) @@ -329,14 +329,15 @@ def plot_confusion_matrix( filename: Optional[str] = None, ) -> None: conf_matrix = self.get_confusion_matrix() + ax: Axes _, ax = plt.subplots() - ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3) + ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3) # type: ignore[attr-defined] # noqa: E501 for i in range(conf_matrix.shape[0]): for j in range(conf_matrix.shape[1]): ax.text( x=j, y=i, - s=int(conf_matrix[i, j]), + s=str(conf_matrix[i, j]), va="center", ha="center", size="x-large", diff --git a/karabo/test/conftest.py b/karabo/test/conftest.py index 1e8c3aa4..6f237a6c 100644 --- a/karabo/test/conftest.py +++ b/karabo/test/conftest.py @@ -1,6 +1,6 @@ """Pytest global fixtures needs to be here!""" import os -from collections.abc import Callable +from collections.abc import Callable, Generator from dataclasses import dataclass import matplotlib.pyplot as plt @@ -66,7 +66,7 @@ def tobject() -> TFiles: @pytest.fixture(scope="function", autouse=True) -def clean_disk(): +def clean_disk() -> Generator[None, None, None]: """Automatically clears FileHandler.root after each test. Needed in some cases where the underlying functions do use FileHanlder @@ -100,11 +100,11 @@ def sky_data(sky_data_with_ids: NDArray[np.object_]) -> NDArray[np.float64]: def normalized_norm_diff() -> NNImageDiffCallable: """Compare two images.""" - def _normalized_norm_diff(img_path_1, img_path_2): + def _normalized_norm_diff(img_path_1: str, img_path_2: str) -> float: img1 = plt.imread(img_path_1) img2 = plt.imread(img_path_2) assert img1.shape == img2.shape # Calculate the error between the two images - return np.linalg.norm(img1 - img2) / (img1.shape[0] * img1.shape[1]) + return float(np.linalg.norm(img1 - img2) / (img1.shape[0] * img1.shape[1])) return _normalized_norm_diff diff --git a/setup.cfg b/setup.cfg index a45e367e..e2bf4368 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,8 +41,8 @@ exclude = .git, .eggs, __pycache__, tests/, docs/, build/, dist/ [mypy] exclude = (?x)( - karabo/test/* - | setup.py + ^.*test_.*\.py$ | + ^/setup\.py$ ) # mypy-strict configs check_untyped_defs = true @@ -52,9 +52,9 @@ disallow_subclassing_any = true disallow_untyped_calls = false disallow_untyped_decorators = true disallow_untyped_defs = true +extra_checks = true ignore_missing_imports = true implicit_reexport = false -strict_concatenate = true strict_equality = true warn_redundant_casts = true warn_return_any = true @@ -87,3 +87,4 @@ omit = */.experiments/* */examples/* setup.py + */_version.py From 0e30e2bfb4c14629cb028b45beb97f65960783f4 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 15:58:57 +0100 Subject: [PATCH 100/207] added type-ignore to __init__.py because mypy can't handle that :fish_cake: --- karabo/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/karabo/__init__.py b/karabo/__init__.py index b4bc8844..ea2e535c 100644 --- a/karabo/__init__.py +++ b/karabo/__init__.py @@ -29,12 +29,14 @@ # Setup dask for slurm if "SLURM_JOB_ID" in os.environ: # ugly workaraound to not import stuff not available at build-time, but on import. - from karabo.util.dask import prepare_slurm_nodes_for_dask + from karabo.util.dask import prepare_slurm_nodes_for_dask # type: ignore prepare_slurm_nodes_for_dask() # set rascil data directory environment variable # see https://ska-telescope.gitlab.io/external/rascil/RASCIL_install.html -from karabo.util.setup_pkg import set_rascil_data_directory_env # noqa: E402 +from karabo.util.setup_pkg import ( # type: ignore # noqa: E402 + set_rascil_data_directory_env, +) set_rascil_data_directory_env() From a0fcba9e1f4003d3ffff6368381d1616b9ab5316 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 16:22:11 +0100 Subject: [PATCH 101/207] adapted exclude-option in setup.cfg to hopefully ignore __init__.py on the runners :pig_nose: --- karabo/__init__.py | 6 ++---- setup.cfg | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/karabo/__init__.py b/karabo/__init__.py index ea2e535c..b4bc8844 100644 --- a/karabo/__init__.py +++ b/karabo/__init__.py @@ -29,14 +29,12 @@ # Setup dask for slurm if "SLURM_JOB_ID" in os.environ: # ugly workaraound to not import stuff not available at build-time, but on import. - from karabo.util.dask import prepare_slurm_nodes_for_dask # type: ignore + from karabo.util.dask import prepare_slurm_nodes_for_dask prepare_slurm_nodes_for_dask() # set rascil data directory environment variable # see https://ska-telescope.gitlab.io/external/rascil/RASCIL_install.html -from karabo.util.setup_pkg import ( # type: ignore # noqa: E402 - set_rascil_data_directory_env, -) +from karabo.util.setup_pkg import set_rascil_data_directory_env # noqa: E402 set_rascil_data_directory_env() diff --git a/setup.cfg b/setup.cfg index e2bf4368..345c3a4a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,7 +42,8 @@ exclude = .git, .eggs, __pycache__, tests/, docs/, build/, dist/ [mypy] exclude = (?x)( ^.*test_.*\.py$ | - ^/setup\.py$ + /setup\.py$ | + /karabo/__init__\.py$ ) # mypy-strict configs check_untyped_defs = true From cd38303c4a43d90285f1f736e53233a62071476d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 16:44:31 +0100 Subject: [PATCH 102/207] adapted mypy-exclude-regex to exclude all __init__.py :alarm_clock: --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 345c3a4a..5acc8d81 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,7 +43,7 @@ exclude = .git, .eggs, __pycache__, tests/, docs/, build/, dist/ exclude = (?x)( ^.*test_.*\.py$ | /setup\.py$ | - /karabo/__init__\.py$ + __init__\.py$ ) # mypy-strict configs check_untyped_defs = true From f9d04671c1e97d2586aca5df892f3939c9d21771 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 16:50:59 +0100 Subject: [PATCH 103/207] hopefully bugfix to ignore __init__.py by mypy :fries: --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 5acc8d81..bd3f609a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,7 +43,7 @@ exclude = .git, .eggs, __pycache__, tests/, docs/, build/, dist/ exclude = (?x)( ^.*test_.*\.py$ | /setup\.py$ | - __init__\.py$ + ^.*__init__\.py$ ) # mypy-strict configs check_untyped_defs = true From 282ad3817544e0b122870166baa085026f2d2ec3 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 16:58:00 +0100 Subject: [PATCH 104/207] trying editable install to avoid duplicate modules :sweat: --- .github/workflows/test.yml | 2 +- setup.cfg | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index afb676b8..cc3e9278 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,7 +27,7 @@ jobs: conda create -y -n test_karabo python=3.9 conda activate test_karabo mamba env update --file environment.yaml - pip install ".[dev]" + pip install -e ".[dev]" python -m ipykernel install --user --name python3 - name: Test Dev-Tools shell: bash -l {0} diff --git a/setup.cfg b/setup.cfg index bd3f609a..aeaa7471 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,8 +42,7 @@ exclude = .git, .eggs, __pycache__, tests/, docs/, build/, dist/ [mypy] exclude = (?x)( ^.*test_.*\.py$ | - /setup\.py$ | - ^.*__init__\.py$ + /setup\.py$ ) # mypy-strict configs check_untyped_defs = true From 37d8b18b3836e749f2b3568a92566fc765bbf6b7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 20 Nov 2023 17:04:22 +0100 Subject: [PATCH 105/207] added verbose-flag to pytest-testing of docker-image :mailbox: --- .github/workflows/build-user-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 7058f488..fac70ce2 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -112,7 +112,7 @@ jobs: - name: Test image run: | # what is installed and the package location relies entirely on the user Dockerfile docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ - 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=true; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); pytest $SITE_PKGS/karabo/test' + 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=true; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); pytest --verbose $SITE_PKGS/karabo/test' - name: Docker push shell: bash -l {0} From 243889017d0575253fe95ff604b4882bdabbbfa8 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 09:31:19 +0100 Subject: [PATCH 106/207] removed pytest-mpi as dep and added --only-mpi flag handling in conftest.py :family: --- karabo/test/conftest.py | 42 ++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 - 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/karabo/test/conftest.py b/karabo/test/conftest.py index 6f237a6c..a56aa402 100644 --- a/karabo/test/conftest.py +++ b/karabo/test/conftest.py @@ -1,12 +1,13 @@ """Pytest global fixtures needs to be here!""" import os -from collections.abc import Callable, Generator +from collections.abc import Callable, Generator, Iterable from dataclasses import dataclass import matplotlib.pyplot as plt import numpy as np import pytest from numpy.typing import NDArray +from pytest import Config, Item, Parser from karabo.test import data_path from karabo.util.file_handler import FileHandler @@ -108,3 +109,42 @@ def _normalized_norm_diff(img_path_1: str, img_path_2: str) -> float: return float(np.linalg.norm(img1 - img2) / (img1.shape[0] * img1.shape[1])) return _normalized_norm_diff + + +def pytest_addoption(parser: Parser) -> None: + """Pytest custom argparse hook. + + Add custom argparse options here. + + Args: + parser: pytest.Parser + """ + parser.addoption( + "--only-mpi", + action="store_true", + default=False, + help="Run only mpi tests", + ) + + +def pytest_collection_modifyitems(config: Config, items: Iterable[Item]) -> None: + """Pytest modify-items hook. + + Change pytest-behavior dependent on parsed input. + + See https://docs.pytest.org/en/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option + + Args: + config: pytest.Config + items: iterable of pytest.Item + """ # noqa: E501 + if not config.getoption("--only-mpi"): + skipper = pytest.mark.skip(reason="Only run when --only-mpi is given") + for item in items: + if "mpi" in item.keywords: + item.add_marker(skipper) + else: + skipper = pytest.mark.skip(reason="Don't run when --only-mpi is given") + for item in items: + if "mpi" not in item.keywords: + item.add_marker(skipper) diff --git a/pyproject.toml b/pyproject.toml index 393385d7..770d3bf9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,6 @@ markers = [ 'pydocstyle==6.3.0', 'pytest==7.4.2', 'pytest-cov==4.1.0', - 'pytest-mpi==0.6', 'mypy==1.6.1', 'mypy-extensions==1.0.0', 'myst-parser', From 8c0b76263e50c221c02724ca92c1e299df04ba17 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 10:29:54 +0100 Subject: [PATCH 107/207] minor changes in pytest test-discovery :gift: --- karabo/test/conftest.py | 93 ++++++++++++++++++++++++----------------- pyproject.toml | 4 +- 2 files changed, 55 insertions(+), 42 deletions(-) diff --git a/karabo/test/conftest.py b/karabo/test/conftest.py index a56aa402..40c1e45c 100644 --- a/karabo/test/conftest.py +++ b/karabo/test/conftest.py @@ -19,6 +19,60 @@ file_handler_test_dir = os.path.join(os.path.dirname(__file__), "karabo_test") +def pytest_addoption(parser: Parser) -> None: + """Pytest custom argparse hook. + + Add custom argparse options here. + + Pytest argparse-options have to be declared in the root conftest.py. + For some reason, the root conftest.py has to live near the project-root, even if + only a single conftest.py exists. However, this prevents using `pytest .` with + custom argparse-coptions from the root. Instead, either specify the test-dir + or leave it out entirely. + + Args: + parser: pytest.Parser + """ + parser.addoption( + "--only-mpi", + action="store_true", + default=False, + help="run only mpi tests", + ) + + +def pytest_configure(config: Config) -> None: + """Pytest add ini-values. + + Args: + config: pytest.Config + """ + config.addinivalue_line("markers", "mpi: mark mpi-tests as mpi") + + +def pytest_collection_modifyitems(config: Config, items: Iterable[Item]) -> None: + """Pytest modify-items hook. + + Change pytest-behavior dependent on parsed input. + + See https://docs.pytest.org/en/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option + + Args: + config: pytest.Config + items: iterable of pytest.Item + """ # noqa: E501 + if not config.getoption("--only-mpi"): + skipper = pytest.mark.skip(reason="Only run when --only-mpi is given") + for item in items: + if "mpi" in item.keywords: + item.add_marker(skipper) + else: + skipper = pytest.mark.skip(reason="Don't run when --only-mpi is given") + for item in items: + if "mpi" not in item.keywords: + item.add_marker(skipper) + + @dataclass class TFiles: """Read-only repo-artifact paths. @@ -109,42 +163,3 @@ def _normalized_norm_diff(img_path_1: str, img_path_2: str) -> float: return float(np.linalg.norm(img1 - img2) / (img1.shape[0] * img1.shape[1])) return _normalized_norm_diff - - -def pytest_addoption(parser: Parser) -> None: - """Pytest custom argparse hook. - - Add custom argparse options here. - - Args: - parser: pytest.Parser - """ - parser.addoption( - "--only-mpi", - action="store_true", - default=False, - help="Run only mpi tests", - ) - - -def pytest_collection_modifyitems(config: Config, items: Iterable[Item]) -> None: - """Pytest modify-items hook. - - Change pytest-behavior dependent on parsed input. - - See https://docs.pytest.org/en/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option - - Args: - config: pytest.Config - items: iterable of pytest.Item - """ # noqa: E501 - if not config.getoption("--only-mpi"): - skipper = pytest.mark.skip(reason="Only run when --only-mpi is given") - for item in items: - if "mpi" in item.keywords: - item.add_marker(skipper) - else: - skipper = pytest.mark.skip(reason="Don't run when --only-mpi is given") - for item in items: - if "mpi" not in item.keywords: - item.add_marker(skipper) diff --git a/pyproject.toml b/pyproject.toml index 770d3bf9..a7e5d188 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,9 +37,7 @@ versionfile_build = "karabo/_version.py" tag_prefix = "v" [tool.pytest.ini_options] -markers = [ - "mpi: mpi-tests (launch with: 'mpirun -n pytest .')", -] +testpaths = "karabo/test" [project.optional-dependencies] dev = [ From b0a993e764dd910a66026266c810861bba258412 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 10:51:34 +0100 Subject: [PATCH 108/207] removed mpi-pytest from codecov because of race-condition issue which doesn't seem to be solvable atm :two_men_holding_hands: --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cc3e9278..732901a3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,8 +42,8 @@ jobs: run: | conda activate test_karabo export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=true - mpirun -n 2 pytest --cov=./ --only-mpi - pytest --cov=./ --cov-append --cov-report=xml + mpirun -n 2 pytest --only-mpi + pytest --cov=./ --cov-report=xml - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v3 with: From bfc103c0b3438ec13717110bba57a611f0177f0f Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 11:30:12 +0100 Subject: [PATCH 109/207] bugfix compute filter-mask in filter-sky :couple: --- karabo/simulation/sky_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/karabo/simulation/sky_model.py b/karabo/simulation/sky_model.py index 9834cba6..2c1029dd 100644 --- a/karabo/simulation/sky_model.py +++ b/karabo/simulation/sky_model.py @@ -608,9 +608,9 @@ def filter_by_radius_euclidean_flat_approximation( xr.DataArray, (distances_sq >= np.square(inner_radius_deg)) & (distances_sq <= np.square(outer_radius_deg)), - ) + ).compute() - copied_sky.sources = copied_sky.sources[filter_mask.compute()] + copied_sky.sources = copied_sky.sources[filter_mask] copied_sky.sources = self.rechunk_array_based_on_self(copied_sky.sources) @@ -644,7 +644,7 @@ def filter_by_column( filter_mask = (copied_sky.sources[:, col_idx] >= min_val) & ( copied_sky.sources[:, col_idx] <= max_val ) - filter_mask = self.rechunk_array_based_on_self(filter_mask) + filter_mask = self.rechunk_array_based_on_self(filter_mask).compute() # Apply the filter mask and drop the unmatched rows copied_sky.sources = copied_sky.sources.where(filter_mask, drop=True) From 08ec44ed8afd92d524d8887438b1dae43d5433d4 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 12:00:38 +0100 Subject: [PATCH 110/207] ugly hotfix to initialize dask-mpi inside docker-container :japanese_castle: --- karabo/util/dask.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/karabo/util/dask.py b/karabo/util/dask.py index 775b5892..570abd99 100644 --- a/karabo/util/dask.py +++ b/karabo/util/dask.py @@ -104,7 +104,13 @@ def get_dask_client() -> Client: from dask_mpi import initialize from mpi4py import MPI - initialize(nthreads=DaskHandler.n_threads_per_worker, comm=MPI.COMM_WORLD) + # mpi4py.MPI.Intracomm + n_threads_per_worker = DaskHandler.n_threads_per_worker + if n_threads_per_worker is None: # ugly hotfix to be able to initialize + initialize(comm=MPI.COMM_WORLD) + else: + initialize(nthreads=n_threads_per_worker, comm=MPI.COMM_WORLD) + initialize(nthreads=n_threads_per_worker, comm=MPI.COMM_WORLD) DaskHandler.dask_client = Client() elif DaskHandler.dask_client is None: if ( From 9e4cc0aac53d67ab981e4be89d71f5cd5e5636e8 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 14:15:42 +0100 Subject: [PATCH 111/207] added mpi-tests to docker-user tests :skull: --- .github/workflows/build-user-image.yml | 2 +- karabo/util/dask.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index fac70ce2..04a9ae7f 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -112,7 +112,7 @@ jobs: - name: Test image run: | # what is installed and the package location relies entirely on the user Dockerfile docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ - 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=true; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); pytest --verbose $SITE_PKGS/karabo/test' + 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=true; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); mpirun -n 2 pytest --only-mpi; pytest --verbose $SITE_PKGS/karabo/test' - name: Docker push shell: bash -l {0} diff --git a/karabo/util/dask.py b/karabo/util/dask.py index 570abd99..6d64c292 100644 --- a/karabo/util/dask.py +++ b/karabo/util/dask.py @@ -110,7 +110,6 @@ def get_dask_client() -> Client: initialize(comm=MPI.COMM_WORLD) else: initialize(nthreads=n_threads_per_worker, comm=MPI.COMM_WORLD) - initialize(nthreads=n_threads_per_worker, comm=MPI.COMM_WORLD) DaskHandler.dask_client = Client() elif DaskHandler.dask_client is None: if ( From ffb208221e4c8e1f36462ec4bbe9cc0e5ed602f1 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 15:01:29 +0100 Subject: [PATCH 112/207] bugfix only enter dask-initialize with mpi if mpirun :shell: --- karabo/util/dask.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/karabo/util/dask.py b/karabo/util/dask.py index 6d64c292..4bd6fce8 100644 --- a/karabo/util/dask.py +++ b/karabo/util/dask.py @@ -12,6 +12,8 @@ import psutil from dask import compute, delayed # type: ignore[attr-defined] from dask.distributed import Client, LocalCluster, Nanny, Worker +from dask_mpi import initialize +from mpi4py import MPI from karabo.error import KaraboDaskError from karabo.util._types import IntFloat @@ -98,15 +100,9 @@ def setup() -> None: @staticmethod def get_dask_client() -> Client: - # Get IS_DOCKER_CONTAINER variable - if os.environ.get("IS_DOCKER_CONTAINER", "false").lower() == "true": - from dask.distributed import Client - from dask_mpi import initialize - from mpi4py import MPI - - # mpi4py.MPI.Intracomm + if MPI.COMM_WORLD.Get_size() > 1: n_threads_per_worker = DaskHandler.n_threads_per_worker - if n_threads_per_worker is None: # ugly hotfix to be able to initialize + if n_threads_per_worker is None: initialize(comm=MPI.COMM_WORLD) else: initialize(nthreads=n_threads_per_worker, comm=MPI.COMM_WORLD) From ecf20a97796640a389bd216157eb05883fc41fb3 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 15:40:56 +0100 Subject: [PATCH 113/207] bugfix conda-build inherited env-vars [skip ci] :womans_clothes: --- .github/workflows/conda-build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 9a747f4c..9cf0c793 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -99,8 +99,7 @@ jobs: exit 2 fi fi - echo "KARABO_VERSION=$KARABO_VERSION" >> "$GITHUB_ENV" - echo "BUILD_NUMBER=$BUILD_NUMBER" >> "$GITHUB_ENV" + export $KARABO_VERSION $BUILD_NUMBER echo "karabo_tag=$KARABO_TAG" >> "$GITHUB_OUTPUT" echo "build_docker=$BUILD_DOCKER" >> "$"$GITHUB_OUTPUT"" echo "latest_docker=$LATEST_DOCKER" >> "$GITHUB_OUTPUT" From a215d0ad70410a01b64f223a71497ecbca144713 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 15:45:02 +0100 Subject: [PATCH 114/207] bugfix conda-build inherited env-vars [skip ci] :ambulance: --- .github/workflows/conda-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 9cf0c793..b89cdae1 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -99,7 +99,7 @@ jobs: exit 2 fi fi - export $KARABO_VERSION $BUILD_NUMBER + export KARABO_VERSION=$KARABO_VERSION BUILD_NUMBER=$BUILD_NUMBER echo "karabo_tag=$KARABO_TAG" >> "$GITHUB_OUTPUT" echo "build_docker=$BUILD_DOCKER" >> "$"$GITHUB_OUTPUT"" echo "latest_docker=$LATEST_DOCKER" >> "$GITHUB_OUTPUT" From 06a222b0ecc35d6d32cfbe7fc483f0a1f81781e7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 16:23:46 +0100 Subject: [PATCH 115/207] bugfix conda-build set output-vars [skip ci] :rainbow: --- .github/workflows/conda-build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index b89cdae1..2337a938 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -100,9 +100,9 @@ jobs: fi fi export KARABO_VERSION=$KARABO_VERSION BUILD_NUMBER=$BUILD_NUMBER - echo "karabo_tag=$KARABO_TAG" >> "$GITHUB_OUTPUT" - echo "build_docker=$BUILD_DOCKER" >> "$"$GITHUB_OUTPUT"" - echo "latest_docker=$LATEST_DOCKER" >> "$GITHUB_OUTPUT" + echo "karabo_tag=$KARABO_TAG" >> $GITHUB_OUTPUT + echo "build_docker=$BUILD_DOCKER" >> $GITHUB_OUTPUT + echo "latest_docker=$LATEST_DOCKER" >> $GITHUB_OUTPUT conda config --append channels i4ds conda config --append channels nvidia/label/cuda-11.7.0 From 6738f678418e6ad8b5195164709cba7135531562 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 16:58:04 +0100 Subject: [PATCH 116/207] removed notebook-test from user-img test because data-dirs are not part of package and therefore would cause an error [skip ci] :clock530: --- .github/workflows/build-user-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 04a9ae7f..12a92c58 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -112,7 +112,7 @@ jobs: - name: Test image run: | # what is installed and the package location relies entirely on the user Dockerfile docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ - 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=true; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); mpirun -n 2 pytest --only-mpi; pytest --verbose $SITE_PKGS/karabo/test' + 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=false; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); mpirun -n 2 pytest --only-mpi; pytest --verbose $SITE_PKGS/karabo/test' - name: Docker push shell: bash -l {0} From 43b5933560c8a414997bd79707a90f25fdd970d5 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 21 Nov 2023 17:12:13 +0100 Subject: [PATCH 117/207] bugfix casting to str-to-boolean for reproducable github-workflows :nut_and_bolt: --- .github/workflows/conda-build.yml | 4 ++-- .github/workflows/test-user-package.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 2337a938..7ad56c32 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -121,5 +121,5 @@ jobs: uses: ./.github/workflows/test-user-package.yml with: version: ${{ needs.conda-build.outputs.karabo-tag }} - buildDocker: ${{ needs.conda-build.outputs.build-docker }} - latestDocker: ${{ needs.conda-build.outputs.latest-docker }} + buildDocker: ${{ needs.conda-build.outputs.build-docker == 'true' }} + latestDocker: ${{ needs.conda-build.outputs.latest-docker == 'true' }} diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index 84d3c399..88ac8985 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -71,7 +71,7 @@ jobs: uses: ./.github/workflows/build-user-image.yml with: verstag: ${{ inputs.version }} - latest: ${{ inputs.latestDocker }} + latest: ${{ inputs.latestDocker == 'true' }} From 9daae488c5194efd2550d0133566bf955b240620 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 22 Nov 2023 09:13:31 +0100 Subject: [PATCH 118/207] added debugging logs to github workflows [skip ci] :telephone: --- .github/workflows/build-user-image.yml | 16 ++++++++++++++++ .github/workflows/conda-build.yml | 4 ++++ .github/workflows/test-user-package.yml | 5 +++++ 3 files changed, 25 insertions(+) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 12a92c58..0c472752 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -65,13 +65,18 @@ jobs: run: | if [[ ${{ github.event_name }} == "workflow_call" ]]; then echo "gitrev=${{ inputs.verstag }}" >> "$GITHUB_ENV" + gitrev=${{ inputs.verstag }} echo "build=user" >> "$GITHUB_ENV" + build=user elif [[ ${{ github.event_name }} == "workflow_dispatch" ]]; then echo "gitrev=${{ inputs.gitrev }}" >> "$GITHUB_ENV" + gitrev=${{ inputs.gitrev }} if [[ ${{ inputs.test }} == "true" ]]; then echo "build=test" >> "$GITHUB_ENV" + build=test else echo "build=user" >> "$GITHUB_ENV" + build=user fi else echo "Invalid github-event!" @@ -87,6 +92,17 @@ jobs: exit 2 fi + latest=${{ inputs.latest }} + echo "latest: $latest" + version=${{ inputs.verstag }} + echo "vesion: $version" + imgaddr=${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }} + echo "img-addr: $imgaddr" + echo "gitrev: $gitrev" + echo "build: $build" + + + - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v5 diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 7ad56c32..e3ad6654 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -104,6 +104,10 @@ jobs: echo "build_docker=$BUILD_DOCKER" >> $GITHUB_OUTPUT echo "latest_docker=$LATEST_DOCKER" >> $GITHUB_OUTPUT + echo "karabo-tag: $KARABO_TAG" + echo "build-docker: $BUILD_DOCKER" + echo "latest-docker: $LATEST_DOCKER" + conda config --append channels i4ds conda config --append channels nvidia/label/cuda-11.7.0 conda config --append channels conda-forge diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index 88ac8985..5d57cd75 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -59,6 +59,11 @@ jobs: conda install -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline=$KARABO_VERSION pip install ipykernel python -m ipykernel install --user --name python3 + + version=${{ inputs.version }} + echo "inputs-version: $version" + latest=${{ inputs.latestDocker == 'true' }} + echo "latest-docker: $latest" - name: Test Package shell: bash -l {0} run: | From ce10b659ebdd27c9e4d7e455c6b48213e54abff9 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 29 Nov 2023 16:29:58 +0100 Subject: [PATCH 119/207] adapted meta.yaml to pin compatible numpy :question: --- conda/meta.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index d355240e..e19d70d4 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -14,14 +14,15 @@ build: requirements: build: - python {{ python }} - - pip - - versioneer - - tomli + - cython + - numpy host: - python {{ python }} + - cython - pip - - versioneer + - numpy - tomli + - versioneer run: - python {{ python }} - aratmospy =1.0.dev0 @@ -44,7 +45,7 @@ requirements: - mpich # explicit mpich as dep for `mpi4py`. mpich & openmpi support linux & mac (not windows) - nbformat - nbconvert - - numpy >=1.21, !=1.24.0 # 1.24.0 is a buggy release + - {{ pin_compatible('numpy') }} - oskarpy =2.8.dev3 - pandas - psutil From e3b0b8d5fd39b483c494a9f1e997139e68b9bd09 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 1 Dec 2023 11:33:12 +0100 Subject: [PATCH 120/207] updated conda-build to be closer to a best-practise build :crying_cat_face: --- .github/workflows/conda-build.yml | 8 ++++---- conda/meta.yaml | 8 +++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index e3ad6654..d3b4a7da 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -70,14 +70,14 @@ jobs: run: | if [[ ${{ github.event_name }} == "release" ]] then - BUILD_NUMBER="0" + build="0" KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} BUILD_DOCKER=true LATEST_DOCKER=true fi if [[ ${{ github.event_name }} == "workflow_dispatch" ]] || [[ ${{ github.event_name }} == "workflow_call" ]] then - BUILD_NUMBER=${{ inputs.buildNumber }} + build=${{ inputs.buildNumber }} KARABO_TAG=${{ inputs.version }} BUILD_DOCKER=${{ inputs.buildDocker }} LATEST_DOCKER=${{ inputs.latestDocker }} @@ -91,7 +91,7 @@ jobs: echo "Invalid configuration of workflow-inputs!" exit 2 fi - BUILD_NUMBER="$(($BUILD_NUMBER + 999))" + build="$(($build + 1000))" else if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]] then @@ -99,7 +99,7 @@ jobs: exit 2 fi fi - export KARABO_VERSION=$KARABO_VERSION BUILD_NUMBER=$BUILD_NUMBER + export KARABO_VERSION=$KARABO_VERSION build=$build echo "karabo_tag=$KARABO_TAG" >> $GITHUB_OUTPUT echo "build_docker=$BUILD_DOCKER" >> $GITHUB_OUTPUT echo "latest_docker=$LATEST_DOCKER" >> $GITHUB_OUTPUT diff --git a/conda/meta.yaml b/conda/meta.yaml index e19d70d4..da194204 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -8,17 +8,15 @@ source: path: ../ build: - number: {{ BUILD_NUMBER }} - string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ BUILD_NUMBER }} + number: {{ build }} + string: py{{ py }}h{{ PKG_HASH }}_{{ build }} requirements: build: - - python {{ python }} - - cython + - python - numpy host: - python {{ python }} - - cython - pip - numpy - tomli From 2943e7850e4db5615698ced3e71cfc772348f4ba Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 09:43:07 +0100 Subject: [PATCH 121/207] fixed bluebild-version :sunrise: --- conda/meta.yaml | 2 +- environment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index da194204..9735c21d 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -26,7 +26,7 @@ requirements: - aratmospy =1.0.dev0 - astropy - bdsf =1.10.dev2 - - bluebild + - bluebild =0.1.dev0 - cuda-cudart - dask >=2022.12.1 - dask-mpi diff --git a/environment.yaml b/environment.yaml index 6e1cc96a..ea2c94f8 100644 --- a/environment.yaml +++ b/environment.yaml @@ -7,7 +7,7 @@ dependencies: - aratmospy =1.0.dev0 - astropy - bdsf =1.10.dev2 - - bluebild + - bluebild =0.1.dev0 - cuda-cudart - dask >=2022.12.1 - dask-mpi From 2c67eeb3d65409f29463304b2ddc0fb87e4006f2 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 11:44:48 +0100 Subject: [PATCH 122/207] removed echos and improved build-user-image workflow :small_red_triangle_down: --- .github/workflows/build-user-image.yml | 45 +++++++++++-------------- .github/workflows/conda-build.yml | 25 ++------------ .github/workflows/test-user-package.yml | 7 ---- 3 files changed, 23 insertions(+), 54 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 0c472752..3c0ad5e4 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -15,8 +15,9 @@ on: inputs: gitrev: type: string - required: true - description: "gitrev: commit-hash (full), tag or branch (branch is not unique and therefore checkout-step could get cached)" + required: false + default: "" + description: "gitrev: commit-hash (full) if not current commit." verstag: type: string required: true @@ -64,26 +65,32 @@ jobs: shell: bash -l {0} run: | if [[ ${{ github.event_name }} == "workflow_call" ]]; then - echo "gitrev=${{ inputs.verstag }}" >> "$GITHUB_ENV" - gitrev=${{ inputs.verstag }} + echo "gitrev=$GITHUB_SHA" >> "$GITHUB_ENV" echo "build=user" >> "$GITHUB_ENV" - build=user elif [[ ${{ github.event_name }} == "workflow_dispatch" ]]; then - echo "gitrev=${{ inputs.gitrev }}" >> "$GITHUB_ENV" - gitrev=${{ inputs.gitrev }} + if [[ ${{ inputs.gitrev }} != "" ]]; then + echo "gitrev=${{ inputs.gitrev }}" >> "$GITHUB_ENV" + else + echo "gitrev=$GITHUB_SHA" >> "$GITHUB_ENV" + fi if [[ ${{ inputs.test }} == "true" ]]; then echo "build=test" >> "$GITHUB_ENV" - build=test else echo "build=user" >> "$GITHUB_ENV" - build=user fi else echo "Invalid github-event!" exit 2 fi echo "latest=${{ inputs.latest }}" >> "$GITHUB_ENV" - echo "version=${{ inputs.verstag }}" >> "$GITHUB_ENV" + verstag=${{ inputs.verstag }}" + if [[ ${verstag:0:1} == "v" ]]; then + version=${verstag:1} + echo "version=$version" >> "$GITHUB_ENV" + else + echo "invalid verstag: has no leading v" + exit 2 + fi REPO_OWNER=${{ github.repository_owner }} echo "IMG_ADDR=${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }}" >> "$GITHUB_ENV" DEV_STR="dev" @@ -92,17 +99,6 @@ jobs: exit 2 fi - latest=${{ inputs.latest }} - echo "latest: $latest" - version=${{ inputs.verstag }} - echo "vesion: $version" - imgaddr=${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }} - echo "img-addr: $imgaddr" - echo "gitrev: $gitrev" - echo "build: $build" - - - - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v5 @@ -110,7 +106,7 @@ jobs: images: ${{ env.REGISTRY }}/${{ github.repository }} tags: | type=raw, enable=${{ env.latest }}, value=latest - type=pep440, pattern={{version}}, value=${{ env.version }} + type=raw, value=${{ env.version }} - name: Docker build shell: bash -l {0} @@ -126,10 +122,9 @@ jobs: fi - name: Test image - run: | # what is installed and the package location relies entirely on the user Dockerfile + run: | # karabo-sitepackage-location used instead of --pyargs because --only-mpi is a custom-flag of karabo which lives in the site-packages docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ - 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=false; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); mpirun -n 2 pytest --only-mpi; pytest --verbose $SITE_PKGS/karabo/test' - + 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=false; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); mpirun -n 2 pytest --only-mpi; pytest $SITE_PKGS/karabo/test' - name: Docker push shell: bash -l {0} run: | diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index d3b4a7da..43c1161e 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -14,10 +14,6 @@ on: type: string required: true description: "version: PEP440 package-version (incl. leading 'v')" - dev: - type: boolean - required: true - description: "dev-build? If yes, it sets an offset to build-nr" buildDocker: type: boolean required: false @@ -36,10 +32,6 @@ on: version: type: string required: true - dev: - type: boolean - required: false - default: false buildDocker: type: boolean required: false @@ -84,30 +76,19 @@ jobs: fi KARABO_VERSION="${KARABO_TAG:1}" DEV_STR="dev" - if [[ ${{ inputs.dev }} == "true" ]] - then - if [[ "$KARABO_VERSION" != *"$DEV_STR"* ]] || [[ $LATEST_DOCKER == 'true' ]] - then + if [[ "$KARABO_VERSION" != *"$DEV_STR"* ]]; then + if [[ $LATEST_DOCKER == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" exit 2 fi build="$(($build + 1000))" - else - if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]] - then - echo "Invalid configuration of workflow-inputs!" - exit 2 - fi fi + export KARABO_VERSION=$KARABO_VERSION build=$build echo "karabo_tag=$KARABO_TAG" >> $GITHUB_OUTPUT echo "build_docker=$BUILD_DOCKER" >> $GITHUB_OUTPUT echo "latest_docker=$LATEST_DOCKER" >> $GITHUB_OUTPUT - echo "karabo-tag: $KARABO_TAG" - echo "build-docker: $BUILD_DOCKER" - echo "latest-docker: $LATEST_DOCKER" - conda config --append channels i4ds conda config --append channels nvidia/label/cuda-11.7.0 conda config --append channels conda-forge diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index 5d57cd75..423fde91 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -34,12 +34,9 @@ on: jobs: test-conda-build: runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'success' }} steps: - name: Install Conda uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - name: Set variables, Install Package & Dependencies shell: bash -l {0} run: | @@ -60,10 +57,6 @@ jobs: pip install ipykernel python -m ipykernel install --user --name python3 - version=${{ inputs.version }} - echo "inputs-version: $version" - latest=${{ inputs.latestDocker == 'true' }} - echo "latest-docker: $latest" - name: Test Package shell: bash -l {0} run: | From c0bc5f86de4eea98f55c533015e5389b84714e0c Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 11:55:20 +0100 Subject: [PATCH 123/207] removed build-string default to ensure that workflow-user knows what he/she does :fish: --- .github/workflows/conda-build.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 43c1161e..de7e1ce1 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -7,9 +7,8 @@ on: inputs: buildNumber: type: string - required: false - default: "0" - description: "build-nr: conda build-nr of anaconda.org" + required: true + description: "build-nr: anaconda.org build-nr (SET CORRECTLY)" version: type: string required: true @@ -74,7 +73,12 @@ jobs: BUILD_DOCKER=${{ inputs.buildDocker }} LATEST_DOCKER=${{ inputs.latestDocker }} fi - KARABO_VERSION="${KARABO_TAG:1}" + if [[ ${KARABO_TAG:1} == "v" ]]; then + KARABO_VERSION="${KARABO_TAG:1}" + else + echo "invalid karabo-tag: has no leading v" + exit 2 + fi DEV_STR="dev" if [[ "$KARABO_VERSION" != *"$DEV_STR"* ]]; then if [[ $LATEST_DOCKER == 'true' ]]; then From e46b935b1868f8c0ddf6dfc2732d72cfc5868832 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 11:58:27 +0100 Subject: [PATCH 124/207] minor update in container.md :fast_forward: --- doc/src/container.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/container.md b/doc/src/container.md index 26401261..68b7918e 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -58,7 +58,7 @@ sarus pull ghcr.io/i4ds/karabo-pipeline:latest **MPI (MPICH) Support** -Karabo >= `v0.21.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). +Karabo >= `v0.22.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). ```shell srun -N16 -n16 -C gpu sarus run --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline:latest From d40a74a892fd29309851f2bf2dc3875a403567be Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 13:02:54 +0100 Subject: [PATCH 125/207] bugfix check leading v in tag in conda-build workflow :rice_ball: --- .github/workflows/conda-build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index de7e1ce1..eb57b779 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -8,11 +8,11 @@ on: buildNumber: type: string required: true - description: "build-nr: anaconda.org build-nr (SET CORRECTLY)" + description: "build-nr: anaconda.org build-nr (DON'T trigger build if you don't know how to set it)" version: type: string required: true - description: "version: PEP440 package-version (incl. leading 'v')" + description: "version: PEP440 package-version incl. leading 'v' (DON'T trigger build if you don't know what PEP440 is)" buildDocker: type: boolean required: false @@ -73,7 +73,7 @@ jobs: BUILD_DOCKER=${{ inputs.buildDocker }} LATEST_DOCKER=${{ inputs.latestDocker }} fi - if [[ ${KARABO_TAG:1} == "v" ]]; then + if [[ ${KARABO_TAG:0:1} == "v" ]]; then KARABO_VERSION="${KARABO_TAG:1}" else echo "invalid karabo-tag: has no leading v" From fcfb49e0c525bf51625fb0d4128950f8e3a5d14d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 14:20:06 +0100 Subject: [PATCH 126/207] bugfix set dev-string check correclty in conda-build.yml :no_pedestrians: --- .github/workflows/conda-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index eb57b779..ffba666f 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -80,7 +80,7 @@ jobs: exit 2 fi DEV_STR="dev" - if [[ "$KARABO_VERSION" != *"$DEV_STR"* ]]; then + if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]]; then if [[ $LATEST_DOCKER == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" exit 2 From 8eaeacfb3c6b8595fb65e05218dbedb70def6609 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 16:42:36 +0100 Subject: [PATCH 127/207] added versioneer to dev-dep :white_circle: --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index a7e5d188..932f8a6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,4 +57,5 @@ testpaths = "karabo/test" 'sphinx', 'sphinx_rtd_theme', 'types-requests', # types for mypy + 'versioneer', ] \ No newline at end of file From a5d8aac45d7819f89e781910bfd3f0a361d99749 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 17:06:32 +0100 Subject: [PATCH 128/207] improved version definition security for conda-build workflows :aquarius: --- .github/workflows/conda-build.yml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index ffba666f..6cad19e2 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -59,19 +59,28 @@ jobs: id: bcs shell: bash -l {0} run: | + DEV_STR="dev" if [[ ${{ github.event_name }} == "release" ]] then build="0" KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} BUILD_DOCKER=true LATEST_DOCKER=true - fi - if [[ ${{ github.event_name }} == "workflow_dispatch" ]] || [[ ${{ github.event_name }} == "workflow_call" ]] + elif [[ ${{ github.event_name }} == "workflow_dispatch" ]] || [[ ${{ github.event_name }} == "workflow_call" ]] then build=${{ inputs.buildNumber }} - KARABO_TAG=${{ inputs.version }} BUILD_DOCKER=${{ inputs.buildDocker }} LATEST_DOCKER=${{ inputs.latestDocker }} + KARABO_TAG=${{ inputs.version }} + pip install versioneer + VERSIONEER_VERSION=$(python -c 'import versioneer; print(versioneer.get_version())') + if [[ "$VERSIONEER_VERSION" == *"+"* ]] && [[ "$KARABO_TAG" != *"$DEV_STR"* ]]; then + echo "Dirty version needs to be a PEP440 conform dev-version" + exit 2 + fi + else + echo "Not specified github-event occured" + exit 2 fi if [[ ${KARABO_TAG:0:1} == "v" ]]; then KARABO_VERSION="${KARABO_TAG:1}" @@ -79,13 +88,14 @@ jobs: echo "invalid karabo-tag: has no leading v" exit 2 fi - DEV_STR="dev" if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]]; then if [[ $LATEST_DOCKER == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" exit 2 fi build="$(($build + 1000))" + else + fi export KARABO_VERSION=$KARABO_VERSION build=$build From 6a9ee0be157ab56938e873f01a67d850f0a98148 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 17:08:37 +0100 Subject: [PATCH 129/207] minor imporvements in conda-build :arrow_heading_down: --- .github/workflows/conda-build.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 6cad19e2..5227e4b0 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -94,8 +94,6 @@ jobs: exit 2 fi build="$(($build + 1000))" - else - fi export KARABO_VERSION=$KARABO_VERSION build=$build From c041a7e2170571c6830119dde66e2c21b70e02e1 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 4 Dec 2023 17:42:59 +0100 Subject: [PATCH 130/207] hopefully bugfix of github boolean passing in reusable workflow see #1483 of github runners :balloon: --- .github/workflows/build-user-image.yml | 6 +----- .github/workflows/conda-build.yml | 4 ++-- .github/workflows/test-user-package.yml | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 3c0ad5e4..58f386ec 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -50,10 +50,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Get Previous tag - uses: actions-ecosystem/action-get-latest-tag@v1 - id: get-latest-tag - - name: Log in to the Container registry uses: docker/login-action@v3 with: @@ -117,7 +113,7 @@ jobs: -f docker/user/Dockerfile \ -t ${{ env.IMG_ADDR }}:${{ env.version }} \ . - if [[ ${{ env.latest }} == "true" ]]; then + if [[ ${{ env.latest }} == 'true' ]]; then docker tag ${{ env.IMG_ADDR }}:${{ env.version }} ${{ env.IMG_ADDR }}:latest fi diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 5227e4b0..c6798ac2 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -47,8 +47,8 @@ jobs: container: ghcr.io/i4ds/mambabuild-docker:latest outputs: karabo-tag: ${{ steps.bcs.outputs.karabo_tag }} - build-docker: ${{ steps.bcs.outputs.build_docker }} - latest-docker: ${{ steps.bcs.outputs.latest_docker }} + build-docker: ${{ steps.bcs.outputs.build_docker == 'true' }} + latest-docker: ${{ steps.bcs.outputs.latest_docker == 'true' }} steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index 423fde91..8739c0b1 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -43,7 +43,7 @@ jobs: KARABO_TAG=${{ inputs.version }} KARABO_VERSION="${KARABO_TAG:1}" DEV_STR="dev" - if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]] && [[ "${{ inputs.latestDocker }}" == 'true' ]]; then + if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]] && [[ ${{ inputs.latestDocker }} == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" exit 2 fi From 28ec6912e2f57e2e7153cb87af06c280121f6934 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 5 Dec 2023 08:45:07 +0100 Subject: [PATCH 131/207] improved version definition security for conda-build workflows :arrow_right_hook: --- .github/workflows/conda-build.yml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index c6798ac2..78b3d6d4 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -74,9 +74,16 @@ jobs: KARABO_TAG=${{ inputs.version }} pip install versioneer VERSIONEER_VERSION=$(python -c 'import versioneer; print(versioneer.get_version())') - if [[ "$VERSIONEER_VERSION" == *"+"* ]] && [[ "$KARABO_TAG" != *"$DEV_STR"* ]]; then - echo "Dirty version needs to be a PEP440 conform dev-version" - exit 2 + if [[ "$VERSIONEER_VERSION" == *"+"* ]]; then + if [[ "$KARABO_TAG" != *"$DEV_STR"* ]]; then + echo "Dirty version needs to be a PEP440 conform dev-version" + exit 2 + fi + else + if [[ "$KARABO_TAG" != *"$VERSIONEER_VERSION"* ]]; then + echo "Provided version doesn't match the actual version" + exit 2 + fi fi else echo "Not specified github-event occured" From c60536693fda445b710dc192d6217f90d0b12a2b Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 5 Dec 2023 09:23:21 +0100 Subject: [PATCH 132/207] added little verbosity to conda-build workflow :round_pushpin: --- .github/workflows/conda-build.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 78b3d6d4..558f4128 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -62,12 +62,13 @@ jobs: DEV_STR="dev" if [[ ${{ github.event_name }} == "release" ]] then + echo "Conda build from release" build="0" KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} BUILD_DOCKER=true LATEST_DOCKER=true - elif [[ ${{ github.event_name }} == "workflow_dispatch" ]] || [[ ${{ github.event_name }} == "workflow_call" ]] - then + elif [[ ${{ github.event_name }} == "workflow_dispatch" ]] || [[ ${{ github.event_name }} == "workflow_call" ]]; then + echo "Conda build from dispatch | call" build=${{ inputs.buildNumber }} BUILD_DOCKER=${{ inputs.buildDocker }} LATEST_DOCKER=${{ inputs.latestDocker }} From 23edf24f19334b4301f161cb32035bc5aa866e39 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 5 Dec 2023 12:30:24 +0100 Subject: [PATCH 133/207] hopefully bugfix to trigger build-docker by taking boolean values directly from input :ear: --- .github/workflows/test-user-package.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index 8739c0b1..ebbeccf7 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -65,11 +65,11 @@ jobs: build-docker: needs: test-conda-build - if: ${{ inputs.buildDocker == 'true' }} + if: ${{ inputs.buildDocker }} uses: ./.github/workflows/build-user-image.yml with: verstag: ${{ inputs.version }} - latest: ${{ inputs.latestDocker == 'true' }} + latest: ${{ inputs.latestDocker }} From 87997e406d41fad3f607d19edbc36f3ae2fb941b Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 6 Dec 2023 08:52:20 +0100 Subject: [PATCH 134/207] bugfix build-user-image.yml :left_luggage: --- .github/workflows/build-user-image.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 58f386ec..52215fd3 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -64,12 +64,12 @@ jobs: echo "gitrev=$GITHUB_SHA" >> "$GITHUB_ENV" echo "build=user" >> "$GITHUB_ENV" elif [[ ${{ github.event_name }} == "workflow_dispatch" ]]; then - if [[ ${{ inputs.gitrev }} != "" ]]; then + if [[ "${{ inputs.gitrev }}" != "" ]]; then echo "gitrev=${{ inputs.gitrev }}" >> "$GITHUB_ENV" else echo "gitrev=$GITHUB_SHA" >> "$GITHUB_ENV" fi - if [[ ${{ inputs.test }} == "true" ]]; then + if [[ "${{ inputs.test }}" == 'true' ]]; then echo "build=test" >> "$GITHUB_ENV" else echo "build=user" >> "$GITHUB_ENV" @@ -79,9 +79,9 @@ jobs: exit 2 fi echo "latest=${{ inputs.latest }}" >> "$GITHUB_ENV" - verstag=${{ inputs.verstag }}" - if [[ ${verstag:0:1} == "v" ]]; then - version=${verstag:1} + verstag="${{ inputs.verstag }}" + if [[ "${verstag:0:1}" == "v" ]]; then + version="${verstag:1}" echo "version=$version" >> "$GITHUB_ENV" else echo "invalid verstag: has no leading v" @@ -90,7 +90,7 @@ jobs: REPO_OWNER=${{ github.repository_owner }} echo "IMG_ADDR=${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }}" >> "$GITHUB_ENV" DEV_STR="dev" - if [[ "${{ inputs.verstag }}" == *"$DEV_STR"* ]] && [[ $LATEST_DOCKER == 'true' ]]; then + if [[ "${{ inputs.verstag }}" == *"$DEV_STR"* ]] && [[ "${{ inputs.latest }}" == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" exit 2 fi From faab290299e14f51d82e19e2afd33816ce68fe3d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 6 Dec 2023 08:57:14 +0100 Subject: [PATCH 135/207] minor improvements in build-user-image.yml :ticket: --- .github/workflows/build-user-image.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 52215fd3..c601edc0 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -17,7 +17,7 @@ on: type: string required: false default: "" - description: "gitrev: commit-hash (full) if not current commit." + description: "gitrev: commit-hash (full) | branch | tag if not current commit." verstag: type: string required: true @@ -31,7 +31,7 @@ on: type: boolean required: false default: false - description: "install from `gitrev` environment.yaml instead?" + description: "create env from environment.yaml instead of conda-wheel?" env: REGISTRY: ghcr.io From 5a3909b70c54cd6eea342d2177e655da73b1516c Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 6 Dec 2023 09:47:20 +0100 Subject: [PATCH 136/207] replaced exit 2 with exit 1 in all bash scripts :three: --- .github/workflows/build-user-image.yml | 6 +++--- .github/workflows/conda-build.yml | 10 +++++----- .github/workflows/test-user-package.yml | 2 +- docker/user/Dockerfile | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index c601edc0..e5d6badb 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -76,7 +76,7 @@ jobs: fi else echo "Invalid github-event!" - exit 2 + exit 1 fi echo "latest=${{ inputs.latest }}" >> "$GITHUB_ENV" verstag="${{ inputs.verstag }}" @@ -85,14 +85,14 @@ jobs: echo "version=$version" >> "$GITHUB_ENV" else echo "invalid verstag: has no leading v" - exit 2 + exit 1 fi REPO_OWNER=${{ github.repository_owner }} echo "IMG_ADDR=${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }}" >> "$GITHUB_ENV" DEV_STR="dev" if [[ "${{ inputs.verstag }}" == *"$DEV_STR"* ]] && [[ "${{ inputs.latest }}" == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" - exit 2 + exit 1 fi - name: Extract metadata (tags, labels) for Docker diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 558f4128..3098b099 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -78,28 +78,28 @@ jobs: if [[ "$VERSIONEER_VERSION" == *"+"* ]]; then if [[ "$KARABO_TAG" != *"$DEV_STR"* ]]; then echo "Dirty version needs to be a PEP440 conform dev-version" - exit 2 + exit 1 fi else if [[ "$KARABO_TAG" != *"$VERSIONEER_VERSION"* ]]; then echo "Provided version doesn't match the actual version" - exit 2 + exit 1 fi fi else echo "Not specified github-event occured" - exit 2 + exit 1 fi if [[ ${KARABO_TAG:0:1} == "v" ]]; then KARABO_VERSION="${KARABO_TAG:1}" else echo "invalid karabo-tag: has no leading v" - exit 2 + exit 1 fi if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]]; then if [[ $LATEST_DOCKER == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" - exit 2 + exit 1 fi build="$(($build + 1000))" fi diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index ebbeccf7..c1980708 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -45,7 +45,7 @@ jobs: DEV_STR="dev" if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]] && [[ ${{ inputs.latestDocker }} == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" - exit 2 + exit 1 fi export IS_GITHUB_RUNNER=true export RUN_NOTEBOOK_TESTS=false diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 0d5703cb..df193a7c 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -34,7 +34,7 @@ RUN if [ "$BUILD" = "user" ] ; then \ pip install --no-deps "."; \ cd ".." ; \ else \ - exit 2; \ + exit 1; \ fi && \ echo "conda activate karabo" >> ~/.bashrc && \ mkdir /workspace && \ From 2f6020e655c1f40bb2c393038f662c8a915a3187 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 6 Dec 2023 10:20:21 +0100 Subject: [PATCH 137/207] bugfix install environment in user Dockerfile correctly :blue_heart: --- .github/workflows/build-user-image.yml | 1 + docker/user/Dockerfile | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index e5d6badb..50b81848 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -110,6 +110,7 @@ jobs: docker build \ --build-arg GIT_REV=${{ env.gitrev }} \ --build-arg BUILD=${{ env.build }} \ + --build-arg KARABO_VERSION=${{ env.version }} \ -f docker/user/Dockerfile \ -t ${{ env.IMG_ADDR }}:${{ env.version }} \ . diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index df193a7c..4d822a63 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -1,3 +1,4 @@ +# for copying example-notebooks & in case env is installed through `environment.yaml` FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-repo ARG GIT_REV RUN apt-get update && apt-get install -y git && \ @@ -8,9 +9,10 @@ RUN apt-get update && apt-get install -y git && \ git fetch origin ${GIT_REV} && \ git reset --hard ${GIT_REV} +# main build FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 -# build: user|test, GIT_REV: in case of "user", ONLY `v{major}.{minor}.{patch}` -ARG BUILD=user +# build: user|test, KARABO_VERSION: version to install from anaconda.org in case build=user: `{major}.{minor}.{patch}` (no leading 'v') +ARG BUILD=user KARABO_VERSION="" RUN apt-get update && apt-get install -y git gcc gfortran libarchive13 wget curl nano ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:/usr/local/cuda/lib64" \ PATH="/opt/conda/bin:${PATH}" \ @@ -27,7 +29,7 @@ RUN conda install -n base conda-libmamba-solver && \ SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] COPY --from=karabo-repo Karabo-Pipeline/ repo/ RUN if [ "$BUILD" = "user" ] ; then \ - conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="${GIT_REV:1}"; \ + conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="$KARABO_VERSION"; \ elif [ "$BUILD" = "test" ] ; then \ cd "repo"; \ conda env update -f="environment.yaml"; \ From 320740ef2ad2155033ea45ccc3abd3874a95e6c7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 6 Dec 2023 10:55:21 +0100 Subject: [PATCH 138/207] removed weird leading v to github-workflows version-args :rice: --- .github/workflows/build-user-image.yml | 11 ++------ .github/workflows/conda-build.yml | 36 ++++++++++++------------- .github/workflows/test-user-package.yml | 7 +++-- 3 files changed, 23 insertions(+), 31 deletions(-) diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-user-image.yml index 50b81848..1074e45c 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-user-image.yml @@ -21,7 +21,7 @@ on: verstag: type: string required: true - description: "version: PEP440 version-tag of Karabo (incl. leading 'v')" + description: "version: PEP440 version-tag of Karabo. DON't trigger build if you don't know what PEP440 is!" latest: type: boolean required: false @@ -79,14 +79,7 @@ jobs: exit 1 fi echo "latest=${{ inputs.latest }}" >> "$GITHUB_ENV" - verstag="${{ inputs.verstag }}" - if [[ "${verstag:0:1}" == "v" ]]; then - version="${verstag:1}" - echo "version=$version" >> "$GITHUB_ENV" - else - echo "invalid verstag: has no leading v" - exit 1 - fi + echo "version=${{ inputs.verstag }}" >> "$GITHUB_ENV" REPO_OWNER=${{ github.repository_owner }} echo "IMG_ADDR=${{ env.REGISTRY }}/${REPO_OWNER@L}/${{ env.IMG_NAME }}" >> "$GITHUB_ENV" DEV_STR="dev" diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 3098b099..dc273872 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -12,7 +12,7 @@ on: version: type: string required: true - description: "version: PEP440 package-version incl. leading 'v' (DON'T trigger build if you don't know what PEP440 is)" + description: "version: PEP440 package-version (DON'T trigger build if you don't know what PEP440 is)" buildDocker: type: boolean required: false @@ -46,7 +46,7 @@ jobs: runs-on: ubuntu-latest container: ghcr.io/i4ds/mambabuild-docker:latest outputs: - karabo-tag: ${{ steps.bcs.outputs.karabo_tag }} + karabo-version: ${{ steps.bcs.outputs.karabo_version }} build-docker: ${{ steps.bcs.outputs.build_docker == 'true' }} latest-docker: ${{ steps.bcs.outputs.latest_docker == 'true' }} steps: @@ -60,29 +60,35 @@ jobs: shell: bash -l {0} run: | DEV_STR="dev" - if [[ ${{ github.event_name }} == "release" ]] + if [[ ${{ github.event_name }} == 'release' ]] then echo "Conda build from release" build="0" KARABO_TAG=${{ steps.get-latest-tag.outputs.tag }} + if [[ ${KARABO_TAG:0:1} == "v" ]]; then + KARABO_VERSION="${KARABO_TAG:1}" + else + echo "invalid karabo-tag: has no leading v" + exit 1 + fi BUILD_DOCKER=true LATEST_DOCKER=true - elif [[ ${{ github.event_name }} == "workflow_dispatch" ]] || [[ ${{ github.event_name }} == "workflow_call" ]]; then + elif [[ ${{ github.event_name }} == 'workflow_dispatch' ]] || [[ ${{ github.event_name }} == 'workflow_call' ]]; then echo "Conda build from dispatch | call" build=${{ inputs.buildNumber }} BUILD_DOCKER=${{ inputs.buildDocker }} LATEST_DOCKER=${{ inputs.latestDocker }} - KARABO_TAG=${{ inputs.version }} + KARABO_VERSION=${{ inputs.version }} pip install versioneer VERSIONEER_VERSION=$(python -c 'import versioneer; print(versioneer.get_version())') if [[ "$VERSIONEER_VERSION" == *"+"* ]]; then - if [[ "$KARABO_TAG" != *"$DEV_STR"* ]]; then - echo "Dirty version needs to be a PEP440 conform dev-version" + if [[ "$KARABO_VERSION" != *"$DEV_STR"* ]]; then + echo "Dirty commit-version needs to be a PEP440 conform dev-version" exit 1 fi else - if [[ "$KARABO_TAG" != *"$VERSIONEER_VERSION"* ]]; then - echo "Provided version doesn't match the actual version" + if [[ "$KARABO_VERSION" != "$VERSIONEER_VERSION" ]]; then + echo "Provided version $KARABO_VERSION doesn't match the actual version $VERSIONEER_VERSION" exit 1 fi fi @@ -90,22 +96,16 @@ jobs: echo "Not specified github-event occured" exit 1 fi - if [[ ${KARABO_TAG:0:1} == "v" ]]; then - KARABO_VERSION="${KARABO_TAG:1}" - else - echo "invalid karabo-tag: has no leading v" - exit 1 - fi if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]]; then if [[ $LATEST_DOCKER == 'true' ]]; then - echo "Invalid configuration of workflow-inputs!" + echo "Dev build docker image can not be tagged as latest!" exit 1 fi build="$(($build + 1000))" fi export KARABO_VERSION=$KARABO_VERSION build=$build - echo "karabo_tag=$KARABO_TAG" >> $GITHUB_OUTPUT + echo "karabo_version=$KARABO_VERSION" >> $GITHUB_OUTPUT echo "build_docker=$BUILD_DOCKER" >> $GITHUB_OUTPUT echo "latest_docker=$LATEST_DOCKER" >> $GITHUB_OUTPUT @@ -125,6 +125,6 @@ jobs: needs: conda-build uses: ./.github/workflows/test-user-package.yml with: - version: ${{ needs.conda-build.outputs.karabo-tag }} + version: ${{ needs.conda-build.outputs.karabo-version }} buildDocker: ${{ needs.conda-build.outputs.build-docker == 'true' }} latestDocker: ${{ needs.conda-build.outputs.latest-docker == 'true' }} diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index c1980708..3245b2be 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -6,12 +6,12 @@ on: version: type: string required: true - description: "version: PEP440 package-version (incl. leading 'v')" + description: "version: PEP440 package-version" buildDocker: type: boolean required: false default: false - description: "build docker-img if tests succeeded?" + description: "build docker-img if tests succeeded? DON't enable if you don't know what PEP440 is!" latestDocker: type: boolean required: false @@ -40,8 +40,7 @@ jobs: - name: Set variables, Install Package & Dependencies shell: bash -l {0} run: | - KARABO_TAG=${{ inputs.version }} - KARABO_VERSION="${KARABO_TAG:1}" + KARABO_VERSION=${{ inputs.version }} DEV_STR="dev" if [[ "$KARABO_VERSION" == *"$DEV_STR"* ]] && [[ ${{ inputs.latestDocker }} == 'true' ]]; then echo "Invalid configuration of workflow-inputs!" From 4c6ab304a665fc1956f4e48299bd51988b6eb7ef Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 6 Dec 2023 11:29:42 +0100 Subject: [PATCH 139/207] updated codecov.yml to not fail if below target :slot_machine: --- codecov.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/codecov.yml b/codecov.yml index b6700aae..d863dd51 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,5 +1,8 @@ coverage: status: project: + default: + target: 0% # sets required coverage for a success + patch: default: target: 0% # sets required coverage for a success \ No newline at end of file From b5a982575f3ffe26bf79208dc8a5c037fdc14659 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 6 Dec 2023 11:42:43 +0100 Subject: [PATCH 140/207] bugfix: added python interpreter and versioneer through conda for checking conda-build worklow inputs :ru: --- .github/workflows/conda-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index dc273872..7e1f8174 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -79,7 +79,7 @@ jobs: BUILD_DOCKER=${{ inputs.buildDocker }} LATEST_DOCKER=${{ inputs.latestDocker }} KARABO_VERSION=${{ inputs.version }} - pip install versioneer + conda install -c conda-forge python versioneer VERSIONEER_VERSION=$(python -c 'import versioneer; print(versioneer.get_version())') if [[ "$VERSIONEER_VERSION" == *"+"* ]]; then if [[ "$KARABO_VERSION" != *"$DEV_STR"* ]]; then From 712ff46abef979d3724a0ad301c8cfda2d7be1fd Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 6 Dec 2023 11:53:10 +0100 Subject: [PATCH 141/207] added conda-prefix to python interpreter to hopefully get viable binary :m: --- .github/workflows/conda-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 7e1f8174..a9e214ea 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -80,7 +80,7 @@ jobs: LATEST_DOCKER=${{ inputs.latestDocker }} KARABO_VERSION=${{ inputs.version }} conda install -c conda-forge python versioneer - VERSIONEER_VERSION=$(python -c 'import versioneer; print(versioneer.get_version())') + VERSIONEER_VERSION=$($CONDA_PREFIX/bin/python -c 'import versioneer; print(versioneer.get_version())') if [[ "$VERSIONEER_VERSION" == *"+"* ]]; then if [[ "$KARABO_VERSION" != *"$DEV_STR"* ]]; then echo "Dirty commit-version needs to be a PEP440 conform dev-version" From 770be0a47c0fea5da46ba836c7a3a14883161d7d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 6 Dec 2023 11:57:20 +0100 Subject: [PATCH 142/207] added conda-prefix to python interpreter to hopefully get viable binary :bowtie: --- .github/workflows/conda-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index a9e214ea..7877603d 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -80,7 +80,7 @@ jobs: LATEST_DOCKER=${{ inputs.latestDocker }} KARABO_VERSION=${{ inputs.version }} conda install -c conda-forge python versioneer - VERSIONEER_VERSION=$($CONDA_PREFIX/bin/python -c 'import versioneer; print(versioneer.get_version())') + VERSIONEER_VERSION=$(/opt/conda/bin/python -c 'import versioneer; print(versioneer.get_version())') if [[ "$VERSIONEER_VERSION" == *"+"* ]]; then if [[ "$KARABO_VERSION" != *"$DEV_STR"* ]]; then echo "Dirty commit-version needs to be a PEP440 conform dev-version" From 7cac7f97f95564cf89c02f7693c4433451629027 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 7 Dec 2023 12:58:44 +0100 Subject: [PATCH 143/207] removed dev-deps in environment.yaml and meta.yaml :hourglass: --- conda/meta.yaml | 24 ++++++++++++------------ environment.yaml | 22 +++++++++++----------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 986472a5..76f92857 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -23,38 +23,38 @@ requirements: - versioneer run: - python {{ python }} - - aratmospy =1.0.dev0 + - aratmospy =1.0.0 - astropy - - bdsf =1.10.dev2 - - bluebild =0.1.dev0 + - bdsf =1.10.2 + - bluebild =0.1.0 - cuda-cudart - dask >=2022.12.1 - dask-mpi - distributed - - eidos =1.1.dev0 + - eidos =1.1.0 - healpy - h5py =*=mpi_mpich* # hdf5 mpich-whl python bindings - ipython - - katbeam =0.1.dev0 + - katbeam =0.1.0 - libcufft - matplotlib - - montagepy =6.0.dev0 + - montagepy =6.0.0 - mpi4py # python bindings for mpi, it's implementation (mpich or openmpi) depends on other wheels - mpich # explicit mpich as dep for `mpi4py`. mpich & openmpi support linux & mac (not windows) - nbformat - nbconvert - {{ pin_compatible('numpy') }} - - oskarpy =2.8.dev3 + - oskarpy =2.8.3 - pandas - psutil - - rascil =1.0.dev0 + - rascil =1.0.0 - reproject >=0.9,<=10.0 - requests - scipy >=1.10.1 - - ska-gridder-nifty-cuda =0.3.dev0 - - ska-sdp-datamodels =0.1.dev3 - - ska-sdp-func-python =0.1.dev4 - - tools21cm =2.0.dev2 + - ska-gridder-nifty-cuda =0.3.0 + - ska-sdp-datamodels =0.1.3 + - ska-sdp-func-python =0.1.4 + - tools21cm =2.0.2 - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel prio i4ds > conda-forge diff --git a/environment.yaml b/environment.yaml index 63e7f8b2..a708a10a 100644 --- a/environment.yaml +++ b/environment.yaml @@ -4,38 +4,38 @@ channels: - conda-forge dependencies: - python =3.9 - - aratmospy =1.0.dev0 + - aratmospy =1.0.0 - astropy - bdsf =1.10.dev2 - - bluebild =0.1.dev0 + - bluebild =0.1.0 - cuda-cudart - dask >=2022.12.1 - dask-mpi - distributed - - eidos =1.1.dev0 + - eidos =1.1.0 - healpy - h5py =*=mpi_mpich* - ipython - - katbeam =0.1.dev0 + - katbeam =0.1.0 - libcufft - matplotlib - - montagepy =6.0.dev0 + - montagepy =6.0.0 - mpi4py - mpich - nbformat - nbconvert - numpy >=1.21, !=1.24.0 - - oskarpy =2.8.dev3 + - oskarpy =2.8.3 - pandas - psutil - - rascil =1.0.dev0 + - rascil =1.0.0 - reproject >=0.9,<=10.0 - requests - scipy >=1.10.1 - - ska-gridder-nifty-cuda =0.3.dev0 - - ska-sdp-datamodels =0.1.dev3 - - ska-sdp-func-python =0.1.dev4 - - tools21cm =2.0.dev2 + - ska-gridder-nifty-cuda =0.3.0 + - ska-sdp-datamodels =0.1.3 + - ska-sdp-func-python =0.1.4 + - tools21cm =2.0.2 - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) \ No newline at end of file From eb9c1a6909e0a2da2df03c749b35b9306dab6a43 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 7 Dec 2023 13:05:58 +0100 Subject: [PATCH 144/207] bugfix: removed bdsf dev-dep :wedding: --- environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yaml b/environment.yaml index a708a10a..d4284f92 100644 --- a/environment.yaml +++ b/environment.yaml @@ -6,7 +6,7 @@ dependencies: - python =3.9 - aratmospy =1.0.0 - astropy - - bdsf =1.10.dev2 + - bdsf =1.10.2 - bluebild =0.1.0 - cuda-cudart - dask >=2022.12.1 From 63d1a9ae02efc197ca31502b556f8abf66bb014e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 7 Dec 2023 14:24:42 +0100 Subject: [PATCH 145/207] addressed mypy-issues :monorail: --- karabo/simulation/sky_model.py | 15 ++++++++------- karabo/sourcedetection/evaluation.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/karabo/simulation/sky_model.py b/karabo/simulation/sky_model.py index 5b4c285b..6dadcdfd 100644 --- a/karabo/simulation/sky_model.py +++ b/karabo/simulation/sky_model.py @@ -1187,9 +1187,11 @@ def get_sky_model_from_h5_to_xarray( data_arrays = [x.compute() for x in data_arrays] sky = xr.concat(data_arrays, dim=XARRAY_DIM_1_DEFAULT) sky = sky.T - sky = sky.chunk( - {XARRAY_DIM_0_DEFAULT: chunksize, XARRAY_DIM_1_DEFAULT: sky.shape[1]} - ) + chunks = { + XARRAY_DIM_0_DEFAULT: chunksize, + XARRAY_DIM_1_DEFAULT: sky.shape[1], + } + sky = sky.chunk(chunks=chunks) return SkyModel(sky, h5_file_connection=f) @staticmethod @@ -1381,13 +1383,12 @@ def get_sky_model_from_fits( data_array.coords[XARRAY_DIM_0_DEFAULT] = source_ids data_arrays.append(data_array) + chunks = {XARRAY_DIM_0_DEFAULT: chunksize} for freq_dataset in data_arrays: - freq_dataset.chunk({XARRAY_DIM_0_DEFAULT: chunksize}) + freq_dataset.chunk(chunks=chunks) result_dataset = ( - xr.concat(data_arrays, dim=XARRAY_DIM_0_DEFAULT) - .chunk({XARRAY_DIM_0_DEFAULT: chunksize}) - .T + xr.concat(data_arrays, dim=XARRAY_DIM_0_DEFAULT).chunk(chunks=chunks).T ) return SkyModel(result_dataset) diff --git a/karabo/sourcedetection/evaluation.py b/karabo/sourcedetection/evaluation.py index dc775908..fa9dc77c 100644 --- a/karabo/sourcedetection/evaluation.py +++ b/karabo/sourcedetection/evaluation.py @@ -331,7 +331,7 @@ def plot_confusion_matrix( conf_matrix = self.get_confusion_matrix() ax: Axes _, ax = plt.subplots() - ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3) # type: ignore[attr-defined] # noqa: E501 + ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3) for i in range(conf_matrix.shape[0]): for j in range(conf_matrix.shape[1]): ax.text( From 9ed76a27686e6ab8bbaa4c713e467bcd5e3528cc Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 7 Dec 2023 14:42:42 +0100 Subject: [PATCH 146/207] updated mypy-complaints chunks-dict issue in sky-model :mountain_railway: --- karabo/simulation/sky_model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/karabo/simulation/sky_model.py b/karabo/simulation/sky_model.py index 6dadcdfd..06e79ce2 100644 --- a/karabo/simulation/sky_model.py +++ b/karabo/simulation/sky_model.py @@ -473,7 +473,8 @@ def rechunk_array_based_on_self(self, array: xr.DataArray) -> xr.DataArray: raise KaraboSkyModelError("Rechunking of `sources` None is not allowed.") if self.sources.chunks is not None: chunk_size = max(self.sources.chunks[0][0], 1) - array = array.chunk({self._sources_dim_sources: chunk_size}) + chunks: Dict[str, Any] = {self._sources_dim_sources: chunk_size} + array = array.chunk(chunks=chunks) else: pass return array @@ -1187,7 +1188,7 @@ def get_sky_model_from_h5_to_xarray( data_arrays = [x.compute() for x in data_arrays] sky = xr.concat(data_arrays, dim=XARRAY_DIM_1_DEFAULT) sky = sky.T - chunks = { + chunks: Dict[str, Any] = { XARRAY_DIM_0_DEFAULT: chunksize, XARRAY_DIM_1_DEFAULT: sky.shape[1], } @@ -1383,7 +1384,7 @@ def get_sky_model_from_fits( data_array.coords[XARRAY_DIM_0_DEFAULT] = source_ids data_arrays.append(data_array) - chunks = {XARRAY_DIM_0_DEFAULT: chunksize} + chunks: Dict[str, Any] = {XARRAY_DIM_0_DEFAULT: chunksize} for freq_dataset in data_arrays: freq_dataset.chunk(chunks=chunks) From f6625e1e661c67e7c6420976321453e1cc58cde0 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 7 Dec 2023 16:14:05 +0100 Subject: [PATCH 147/207] fixed build-nr of feedstock-deps :hamster: --- conda/meta.yaml | 24 ++++++++++++------------ environment.yaml | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 76f92857..8b5d2a2b 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -23,38 +23,38 @@ requirements: - versioneer run: - python {{ python }} - - aratmospy =1.0.0 + - aratmospy =1.0.0=*_0 - astropy - - bdsf =1.10.2 - - bluebild =0.1.0 + - bdsf =1.10.2=*_0 + - bluebild =0.1.0=*_0 - cuda-cudart - dask >=2022.12.1 - dask-mpi - distributed - - eidos =1.1.0 + - eidos =1.1.0=*_0 - healpy - h5py =*=mpi_mpich* # hdf5 mpich-whl python bindings - ipython - - katbeam =0.1.0 + - katbeam =0.1.0=*_0 - libcufft - matplotlib - - montagepy =6.0.0 + - montagepy =6.0.0=*_0 - mpi4py # python bindings for mpi, it's implementation (mpich or openmpi) depends on other wheels - mpich # explicit mpich as dep for `mpi4py`. mpich & openmpi support linux & mac (not windows) - nbformat - nbconvert - {{ pin_compatible('numpy') }} - - oskarpy =2.8.3 + - oskarpy =2.8.3=*_0 - pandas - psutil - - rascil =1.0.0 + - rascil =1.0.0=*_0 - reproject >=0.9,<=10.0 - requests - scipy >=1.10.1 - - ska-gridder-nifty-cuda =0.3.0 - - ska-sdp-datamodels =0.1.3 - - ska-sdp-func-python =0.1.4 - - tools21cm =2.0.2 + - ska-gridder-nifty-cuda =0.3.0=*_0 + - ska-sdp-datamodels =0.1.3=*_0 + - ska-sdp-func-python =0.1.4=*_0 + - tools21cm =2.0.2=*_0 - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel prio i4ds > conda-forge diff --git a/environment.yaml b/environment.yaml index d4284f92..593257a4 100644 --- a/environment.yaml +++ b/environment.yaml @@ -4,38 +4,38 @@ channels: - conda-forge dependencies: - python =3.9 - - aratmospy =1.0.0 + - aratmospy =1.0.0=*_0 - astropy - - bdsf =1.10.2 - - bluebild =0.1.0 + - bdsf =1.10.2=*_0 + - bluebild =0.1.0=*_0 - cuda-cudart - dask >=2022.12.1 - dask-mpi - distributed - - eidos =1.1.0 + - eidos =1.1.0=*_0 - healpy - h5py =*=mpi_mpich* - ipython - - katbeam =0.1.0 + - katbeam =0.1.0=*_0 - libcufft - matplotlib - - montagepy =6.0.0 + - montagepy =6.0.0=*_0 - mpi4py - mpich - nbformat - nbconvert - numpy >=1.21, !=1.24.0 - - oskarpy =2.8.3 + - oskarpy =2.8.3=*_0 - pandas - psutil - - rascil =1.0.0 + - rascil =1.0.0=*_0 - reproject >=0.9,<=10.0 - requests - scipy >=1.10.1 - - ska-gridder-nifty-cuda =0.3.0 - - ska-sdp-datamodels =0.1.3 - - ska-sdp-func-python =0.1.4 - - tools21cm =2.0.2 + - ska-gridder-nifty-cuda =0.3.0=*_0 + - ska-sdp-datamodels =0.1.3=*_0 + - ska-sdp-func-python =0.1.4=*_0 + - tools21cm =2.0.2=*_0 - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) \ No newline at end of file From e56cd922fdc70a23b9fd31f4001758a1c518404e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 7 Dec 2023 16:26:55 +0100 Subject: [PATCH 148/207] addresses mypy attr-defined for matplotlib BLUE :pouting_cat: --- karabo/sourcedetection/evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karabo/sourcedetection/evaluation.py b/karabo/sourcedetection/evaluation.py index fa9dc77c..b35aafe6 100644 --- a/karabo/sourcedetection/evaluation.py +++ b/karabo/sourcedetection/evaluation.py @@ -331,7 +331,7 @@ def plot_confusion_matrix( conf_matrix = self.get_confusion_matrix() ax: Axes _, ax = plt.subplots() - ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3) + ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3) # type: ignore[attr-defined] # noqa: E501 for i in range(conf_matrix.shape[0]): for j in range(conf_matrix.shape[1]): ax.text( From da7882557f5f84ded793e5db22d4bc528ebfdb7e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 8 Dec 2023 08:52:50 +0100 Subject: [PATCH 149/207] renamed build-user-image do build-docker-image :shell: --- .github/workflows/build-dev-image.yml | 72 ------------------- ...-user-image.yml => build-docker-image.yml} | 2 +- .github/workflows/test-user-package.yml | 2 +- README.md | 2 +- 4 files changed, 3 insertions(+), 75 deletions(-) delete mode 100644 .github/workflows/build-dev-image.yml rename .github/workflows/{build-user-image.yml => build-docker-image.yml} (99%) diff --git a/.github/workflows/build-dev-image.yml b/.github/workflows/build-dev-image.yml deleted file mode 100644 index 2318c52c..00000000 --- a/.github/workflows/build-dev-image.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: Build Dev Image - -on: - workflow_dispatch: - inputs: - IMAGE_TAG: - required: true - type: string - # workflow_run: - # workflows: ["Test User Package"] - # types: - # - completed - -env: - REGISTRY: ghcr.io - IMAGE_NAME: karabo-dev - - -jobs: - Build_Dev_Image: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Get Previous tag - uses: actions-ecosystem/action-get-latest-tag@v1 - id: get-latest-tag - - - name: Log in to the Container registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Create image tag - id: imagetag - shell: bash -l {0} - run: | - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - IMAGE_TAG=${{ inputs.IMAGE_TAG }} - else - IMAGE_TAG=${{ steps.get-latest-tag.outputs.tag }} - fi - echo "tag=$IMAGE_TAG" >> $GITHUB_OUTPUT - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }} - tags: | - type=raw, value=${{ steps.imagetag.outputs.tag }} - - - name: Build and push Docker image - uses: docker/build-push-action@v5 - with: - file: docker/dev/Dockerfile - context: . - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/build-user-image.yml b/.github/workflows/build-docker-image.yml similarity index 99% rename from .github/workflows/build-user-image.yml rename to .github/workflows/build-docker-image.yml index 1074e45c..0de210f8 100644 --- a/.github/workflows/build-user-image.yml +++ b/.github/workflows/build-docker-image.yml @@ -1,4 +1,4 @@ -name: Build User Image +name: Build Docker Image on: workflow_call: diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index 3245b2be..760fe100 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -65,7 +65,7 @@ jobs: build-docker: needs: test-conda-build if: ${{ inputs.buildDocker }} - uses: ./.github/workflows/build-user-image.yml + uses: ./.github/workflows/build-docker-image.yml with: verstag: ${{ inputs.version }} latest: ${{ inputs.latestDocker }} diff --git a/README.md b/README.md index 3e939e30..614267e7 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Test Software](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/test.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/test.yml) [![Build Docs](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-docs.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-docs.yml) [![Build Conda](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/conda-build.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/conda-build.yml) -[![Build Docker User Image](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-user-image.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-user-image.yml) +[![Build Docker User Image](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-docker-image.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-docker-image.yml) [Documentation](https://i4ds.github.io/Karabo-Pipeline/) | [Example](karabo/examples/source_detection.ipynb) | From 9059ba7f9c7c64a585dd61580f8cf53ead65e840 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 8 Dec 2023 09:08:37 +0100 Subject: [PATCH 150/207] adapted readme-badges :cake: --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 614267e7..9483e46f 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ ![Alt text](doc/src/_static/logo.png?raw=true "Karabo") =========== -[![Test Software](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/test.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/test.yml) -[![Build Docs](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-docs.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-docs.yml) -[![Build Conda](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/conda-build.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/conda-build.yml) -[![Build Docker User Image](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-docker-image.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/build-docker-image.yml) +| | | +| --- | --- | +| Testing | [![CI - Test](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/test.yml/badge.svg)](https://github.com/i4Ds/Karabo-Pipeline/actions/workflows/test.yml) [![codecov](https://codecov.io/gh/i4Ds/Karabo-Pipeline/graph/badge.svg?token=WU4IC2MOXV)](https://codecov.io/gh/i4Ds/Karabo-Pipeline) | +| Package | [![Conda Latest Release](https://anaconda.org/i4ds/karabo-pipeline/badges/version.svg)](https://anaconda.org/i4ds/karabo-pipeline) [![Conda Downloads](https://anaconda.org/i4ds/karabo-pipeline/badges/downloads.svg)](https://anaconda.org/i4ds/karabo-pipeline) | +| Meta | [![License - BSD 3-Clause](https://anaconda.org/i4ds/karabo-pipeline/badges/license.svg)](https://github.com/i4Ds/Karabo-Pipeline/blob/main/LICENSE) | [Documentation](https://i4ds.github.io/Karabo-Pipeline/) | [Example](karabo/examples/source_detection.ipynb) | From e13371f4bc684ed7ea4cd98bea58275629f349f0 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 12 Dec 2023 13:57:24 +0100 Subject: [PATCH 151/207] addressed pr-request to install mpich via apt :monkey: --- conda/meta.yaml | 12 +++++------- docker/user/Dockerfile | 37 +++++++++++++++---------------------- environment.yaml | 9 +++++---- 3 files changed, 25 insertions(+), 33 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 8b5d2a2b..86552791 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -21,7 +21,7 @@ requirements: - numpy - tomli - versioneer - run: + run: # constrain-notes see `environment.yaml` - python {{ python }} - aratmospy =1.0.0=*_0 - astropy @@ -33,14 +33,14 @@ requirements: - distributed - eidos =1.1.0=*_0 - healpy - - h5py =*=mpi_mpich* # hdf5 mpich-whl python bindings + - h5py - ipython - katbeam =0.1.0=*_0 - libcufft - matplotlib - montagepy =6.0.0=*_0 - - mpi4py # python bindings for mpi, it's implementation (mpich or openmpi) depends on other wheels - - mpich # explicit mpich as dep for `mpi4py`. mpich & openmpi support linux & mac (not windows) + - mpi4py + - mpich =4.0 - nbformat - nbconvert - {{ pin_compatible('numpy') }} @@ -57,9 +57,7 @@ requirements: - tools21cm =2.0.2=*_0 - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel prio i4ds > conda-forge - # - casacore =*=mpi_openmpi* # oskarpy(oskar) -> casacore has just nompi & openmpi-wheels - + - conda-forge::fftw =*=mpi_mpich* test: diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 4d822a63..3fb1d0a1 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -47,26 +47,19 @@ RUN if [ "$BUILD" = "user" ] ; then \ WORKDIR /workspace ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] -# The following steps are needed if an mpi-hook through sarus must be allowed. -# However, because this steps takes more than 2 hours, and the github-runners run out -# of memory, these steps are outcommented at the time-beeing. +# The folowwing steps install mpich at a standard location to allow a native mpi-hook +# To make this work, conda additionally needs to link their mpi-installation in the +# virtual environment to the standard-location. Be aware that the mpi-installation +# and version is determined through `apt`. Therefore, to ensure abi-compatibility of mpi the +# version installed using `apt` and the version specified in the environment-files must match. -# # fetch mpich-version to have it consistent with it's installation from karabo -# ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' -# # install mpich on standard location to enable mpi-hook (may take a while) -# RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ -# mkdir -p /tmp/mpich-build && \ -# cd /tmp/mpich-build && \ -# wget http://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz && \ -# tar xvzf mpich-${MPICH_VERSION}.tar.gz && \ -# cd mpich-${MPICH_VERSION} && \ -# ./configure --enable-fast=all,O3 --prefix=/usr --with-cuda=/usr/local/cuda && \ -# make -j4 && \ -# make install && \ -# ldconfig && \ -# cp -p /tmp/mpich-build/mpich-${MPICH_VERSION}/examples/cpi /usr/bin/ && \ -# cd / && \ -# rm -rf /tmp/mpich-build -# # replace mpi with dummy-install (see issue #512) -# RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ -# conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" \ No newline at end of file +# fetch mpich-version to have it consistent with it's installation from karabo +ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' +# install mpich at standard location +RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ + MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ + apt update && \ + apt install mpich=$MPICH_VERSION_APT +# replace mpi with dummy-install (see issue #512) +RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ + conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" \ No newline at end of file diff --git a/environment.yaml b/environment.yaml index 593257a4..74fb55e8 100644 --- a/environment.yaml +++ b/environment.yaml @@ -2,7 +2,7 @@ channels: - i4ds - nvidia/label/cuda-11.7.0 - conda-forge -dependencies: +dependencies: # package-version & build-number of Karabo-Feedstock deps should be fixed (see PR #526) - python =3.9 - aratmospy =1.0.0=*_0 - astropy @@ -14,14 +14,14 @@ dependencies: - distributed - eidos =1.1.0=*_0 - healpy - - h5py =*=mpi_mpich* + - h5py # has mpich-wheel, but is not compatible with apt-compiled binary (see PR #526) - ipython - katbeam =0.1.0=*_0 - libcufft - matplotlib - montagepy =6.0.0=*_0 - mpi4py - - mpich + - mpich =4.0 # version needs to be compatible with the `apt` installer in Dockerfile (see PR #526) - nbformat - nbconvert - numpy >=1.21, !=1.24.0 @@ -38,4 +38,5 @@ dependencies: - tools21cm =2.0.2=*_0 - xarray >=2022.10.0 # transversal dependencies which we need to reference to get mpi-wheels - - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) \ No newline at end of file + # casacore hast just no-mpi & open-mpi, but no mpich-wheel + - conda-forge::fftw =*=mpi_mpich* # oskarpy(oskar(casacore)), tools21cm, bluebild(finufft) -> from conda-forge to ignore channel-prio & not take our legacy fftw-wheel \ No newline at end of file From 8e31f7c18f5f6f009f599d51101d3902b67b5528 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 12 Dec 2023 14:15:28 +0100 Subject: [PATCH 152/207] adapted documentation and dockerfile-steps :sake: --- doc/src/container.md | 8 ++++---- docker/user/Dockerfile | 12 ++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index 68b7918e..14d36579 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -35,11 +35,11 @@ We do not provide ready-made [Singularity containers](https://sylabs.io/). Howev singularity pull docker://ghcr.io/i4ds/karabo-pipeline:latest ``` -How to use Singularity containers (e.g. mount directories or enable gpu-support) can be seen in the [Singularity documentation](https://docs.sylabs.io/guides/3.1/user-guide/cli.html). +How to use Singularity containers (e.g. mount directories or enable gpu-support) can be seen in the [Singularity documentation](https://docs.sylabs.io/guides/3.1/user-guide/cli.html). Be aware that Singularity mounts the home-directory by default if start a container from your home-directory, which may not be desirable (e.g. `conda init` is done through .bashrc of the image). Be sure to disable this behavior by setting the `--no-home` flag when starting a container. ## Sarus Container -On CSCS, it is recommended to use [Sarus containers](https://sarus.readthedocs.io/en/stable/index.html) (see CSCS [Sarus guide](https://user.cscs.ch/tools/containers/sarus/)). Sarus commands are similar to Docker or Singularity. It is recommended to create a sarus image in an interactive SLURM job using `srun --pty bash`. +On CSCS, it is recommended to use [Sarus containers](https://sarus.readthedocs.io/en/stable/index.html) (see CSCS [Sarus guide](https://user.cscs.ch/tools/containers/sarus/)). Sarus commands are similar to Docker or Singularity. It is recommended to create a Sarus image in an interactive SLURM job using `srun --pty bash`. **Setup** @@ -50,7 +50,7 @@ module load daint-gpu \# or daint-mc module load sarus ``` -Then you can pull a Docker image to a sarus image as follows: +Then you can pull a Docker image to a Sarus image as follows: ```shell sarus pull ghcr.io/i4ds/karabo-pipeline:latest @@ -66,7 +66,7 @@ srun -N16 -n16 -C gpu sarus run --mount=type=bind,source=,destination Here, an MPI application with 16 processes is launched with your repository mounted in the container (/workspace is the default working-directory). Make sure that you know how many processes are reasonable to run because it can rapidly sum up to a large number of nodehours. -Currently, native-mpi-hook is NOT enabled, because the mpi-installation needs to live at a standard-location, which is not the case here since mpich lives in a conda-venv. If this is a feature you need, don't hesitate to contact us. In the container, you would have to install mpich from source (the same version which lives in the conda-venv), and replace the mpich in the conda-env with a dummy-installation. Then, you're able to use to use native MPI by hooking CSCS MPI into the container adding the `--mpi` flag as follows: +We support native-mpi hook, which allows to utilize the mpi of CSCS at optimized performance. To enable the hook, just add the `--mpi` flag of the `sarus run` command as follows: ```shell srun -N16 -n16 -C gpu sarus run --mpi --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline:latest diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 3fb1d0a1..f0697e61 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -48,18 +48,14 @@ WORKDIR /workspace ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] # The folowwing steps install mpich at a standard location to allow a native mpi-hook -# To make this work, conda additionally needs to link their mpi-installation in the -# virtual environment to the standard-location. Be aware that the mpi-installation +# To make this work, conda additionally needs to link their mpi-installation in the virtual +# environment to the standard-location (see issue #512). Be aware that the mpi-installation # and version is determined through `apt`. Therefore, to ensure abi-compatibility of mpi the # version installed using `apt` and the version specified in the environment-files must match. # fetch mpich-version to have it consistent with it's installation from karabo -ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' -# install mpich at standard location -RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ +RUN MPICH_VERSION=$(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/") && \ MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ apt update && \ - apt install mpich=$MPICH_VERSION_APT -# replace mpi with dummy-install (see issue #512) -RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ + apt install mpich=$MPICH_VERSION_APT && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" \ No newline at end of file From 7ae1430de2ebeeaf45dbfc0af072bd6f6e97c60e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 12 Dec 2023 14:31:50 +0100 Subject: [PATCH 153/207] bugfix added -y to apt install in Dockerfile [skip ci] :leftwards_arrow_with_hook: --- docker/user/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index f0697e61..018c6d58 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -57,5 +57,5 @@ ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] RUN MPICH_VERSION=$(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/") && \ MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ apt update && \ - apt install mpich=$MPICH_VERSION_APT && \ + apt install -y mpich=$MPICH_VERSION_APT && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" \ No newline at end of file From 6af5da5523db514a8ced173f884be23215f814d3 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 12 Dec 2023 15:32:56 +0100 Subject: [PATCH 154/207] improved docs :aries: --- README.md | 6 +++--- doc/src/container.md | 20 ++++++++++---------- doc/src/development.md | 11 ++++------- doc/src/installation_user.md | 29 +++++++++++++++-------------- 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 9483e46f..17d3d900 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [Example](karabo/examples/source_detection.ipynb) | [Contributors](CONTRIBUTORS.md) -Karabo is a radio astronomy software distribution for validation and benchmarking of radio telescopes and algorithms. It can be used to simulate the behaviour of the [Square Kilometer Array](https://www.skatelescope.org/the-ska-project/). Our goal is to make installation and ramp-up easier for researchers and developers. +Karabo is a radio astronomy software distribution for validation and benchmarking of radio telescopes and algorithms. It can be used to simulate the behaviour of the [Square Kilometer Array](https://www.skatelescope.org/the-ska-project/) or other supported telescopes. Our goal is to make installation and ramp-up easier for researchers and developers. Karabo includes and relies on OSKAR, RASCIL, PyBDSF, [MIGHTEE](https://arxiv.org/abs/2211.05741), [GLEAM](https://www.mwatelescope.org/science/galactic-science/gleam/), Aratmospy, Bluebild, Eidos, Dask, Tools21cm, katbeam plus configuration of 20 well-known telescopes. Karabo can simulate instrument behavior and atmospheric effects, run imaging algorithms, and evaluate results. @@ -21,12 +21,12 @@ You can use Karabo to build your own data processing pipelines by combinding exi Installation ------------ -The software can be installed on Linux, Windows or Windows WSL. +The software can be installed & used on Linux or Windows WSL. Please see our [documentation](https://i4ds.github.io/Karabo-Pipeline/installation_user.html) for the full installation instructions. -We also offer a [Docker](https://i4ds.github.io/Karabo-Pipeline/container.html) version. +We also offer [Docker](https://i4ds.github.io/Karabo-Pipeline/container.html) images. Contribute to Karabo --------------------- diff --git a/doc/src/container.md b/doc/src/container.md index 14d36579..d62a9bfd 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -15,10 +15,10 @@ Docker images have the advantage that the packages needed for Karabo-pipeline ar What the possibilities using Docker are is far too extensive to describe here. We refer to the official [Docker reference](https://docs.docker.com/reference/) for this. We only show here a minimal example of how Docker could be used, so you can use e.g. a [Jupyter Notebook](https://jupyter.org/) with sample code and an existing Karabo environment. ```shell -docker run -it --rm -p 8888:8888 ghcr.io/i4ds/karabo-pipeline:latest +docker run -it -v : -p 8888:8888 ghcr.io/i4ds/karabo-pipeline ``` -This starts the Docker container of the image interactively, where we forward port 8888. After that, we start the jupyter service in the container with the following command: +This starts the Docker container of the image interactively, where the port 8888 is forwarded and an editable directory is mounted. After that, you could do whatever you want. For demonstration purpose, we start the jupyter-server in the container with the following command: ```shell jupyter lab --ip 0.0.0.0 --no-browser --port=8888 --allow-root @@ -26,18 +26,18 @@ jupyter lab --ip 0.0.0.0 --no-browser --port=8888 --allow-root This will start a server on the same port as forwarded. Then copy the url which is given at the bottom and replace `hostname` with `localhost` and open it in a browser. -## Singularity Container +## Singularity Containers Singularity containers are often standard on HPC clusters, which do not require special permissions (unlike Docker). -We do not provide ready-made [Singularity containers](https://sylabs.io/). However, they can be easily created from Docker images with the following command (may take a while): +We do not provide ready-made [Singularity containers](https://sylabs.io/). However, they can be easily created from Docker images with the following command (may take a while). You may first have to load the module if it's not available `module load singularity`: ```shell -singularity pull docker://ghcr.io/i4ds/karabo-pipeline:latest +singularity pull docker://ghcr.io/i4ds/karabo-pipeline ``` -How to use Singularity containers (e.g. mount directories or enable gpu-support) can be seen in the [Singularity documentation](https://docs.sylabs.io/guides/3.1/user-guide/cli.html). Be aware that Singularity mounts the home-directory by default if start a container from your home-directory, which may not be desirable (e.g. `conda init` is done through .bashrc of the image). Be sure to disable this behavior by setting the `--no-home` flag when starting a container. +This creates a `.sif` file which acts as a singularity image and can be used to launch your application. How to use Singularity containers (e.g. mount directories or enable gpu-support) can be seen in the [Singularity documentation](https://docs.sylabs.io/guides/3.1/user-guide/cli.html). Be aware that Singularity mounts the home-directory by default if start a container from your home-directory, which may not be desirable (e.g. `conda init` is done through .bashrc of the image). Be sure to disable this behavior by setting the `--no-home` flag when starting a container. -## Sarus Container +## Sarus Containers On CSCS, it is recommended to use [Sarus containers](https://sarus.readthedocs.io/en/stable/index.html) (see CSCS [Sarus guide](https://user.cscs.ch/tools/containers/sarus/)). Sarus commands are similar to Docker or Singularity. It is recommended to create a Sarus image in an interactive SLURM job using `srun --pty bash`. @@ -53,7 +53,7 @@ module load sarus Then you can pull a Docker image to a Sarus image as follows: ```shell -sarus pull ghcr.io/i4ds/karabo-pipeline:latest +sarus pull ghcr.io/i4ds/karabo-pipeline ``` **MPI (MPICH) Support** @@ -61,7 +61,7 @@ sarus pull ghcr.io/i4ds/karabo-pipeline:latest Karabo >= `v0.22.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). ```shell -srun -N16 -n16 -C gpu sarus run --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline:latest +srun -N16 -n16 -C gpu sarus run --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline ``` Here, an MPI application with 16 processes is launched with your repository mounted in the container (/workspace is the default working-directory). Make sure that you know how many processes are reasonable to run because it can rapidly sum up to a large number of nodehours. @@ -69,5 +69,5 @@ Here, an MPI application with 16 processes is launched with your repository moun We support native-mpi hook, which allows to utilize the mpi of CSCS at optimized performance. To enable the hook, just add the `--mpi` flag of the `sarus run` command as follows: ```shell -srun -N16 -n16 -C gpu sarus run --mpi --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline:latest +srun -N16 -n16 -C gpu sarus run --mpi --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline ``` \ No newline at end of file diff --git a/doc/src/development.md b/doc/src/development.md index 68651d78..eeefcc87 100644 --- a/doc/src/development.md +++ b/doc/src/development.md @@ -158,20 +158,17 @@ make html We use the ` pytest` python package ([pytest docs](https://docs.pytest.org/)), with a few imports from the `unittest` package ([unittest docs](https://docs.python.org/3/library/unittest.html)). To add a new test simply go to the `karabo/test` folder. -Add tests for when you write some sort of new code that you feel like might break. - -TIP: -If you validate your code manually, consider just writing a method in a test class instead of opening a jupyter notebook and writing a new cell or a terminal window where you would execute the code you want to test. +Add tests for when you write some sort of new code that you feel like might break. Be aware that tests utilize the functionality of the testing-framework and therefore might not behave exaclty the same as you whould execute the code just as a function. The most important file to consider is `conftest.py`, which could impact the other tests. ## Create a Release -When everything is merged which should be merged, a new Release can be deployed on `conda-forge` as following: +When everything is merged which should be merged, a new Release can be deployed as following: - [Karabo-Pipeline | Releases](https://github.com/i4Ds/Karabo-Pipeline/releases) - Click on `Draft a new release` -- Define a Version by clicking `Choose a tag`. Currently we increment the minor version by 1. -- Set the version in pyproject.toml +- Define a Version by clicking `Choose a tag`. We follow PEP440 {major}.{minor}.{path} with a leading `v` at the beginning (see previous versions). Usually we increment the minor version by 1. - Check that the `Target` is set to `main`. - Describe the release (get inspired by the previous releases). - Click `Publish release`. - Check on [Karabo-Pipeline | Github Actions](https://github.com/i4Ds/Karabo-Pipeline/actions) that the release is succesful. - Check that the new version is on [Anaconda.org | Packages](https://anaconda.org/i4ds/karabo-pipeline) +- Check on [Karabo-Pipeline | Docker Images](https://github.com/i4ds/Karabo-Pipeline/pkgs/container/karabo-pipeline) that the released image is live. diff --git a/doc/src/installation_user.md b/doc/src/installation_user.md index 27b243ea..8c97e890 100644 --- a/doc/src/installation_user.md +++ b/doc/src/installation_user.md @@ -9,32 +9,33 @@ ## Install Karabo The following steps will install Karabo and its prerequisites (miniconda): -``` -wget https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.2-0-Linux-x86_64.sh -bash Miniconda3-py39_23.5.2-0-Linux-x86_64.sh -b -source ~/miniconda3/bin/activate -conda init bash -conda install -y -n base conda-libmamba-solver +**Install conda** +```shell +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh +bash Miniconda3-latest-Linux-x86_64.sh +source .bashrc +conda install -n base conda-libmamba-solver conda config --set solver libmamba -conda update -y -n base -c defaults conda -conda create -y -n karabo-env python=3.9 -conda activate karabo-env +``` + +**Install Package** +```shell +conda create -y -n karabo +conda activate karabo conda install -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline -conda clean --all -y ``` -Karabo versions older than `v0.15.0` are deprecated and therefore installation will most likely fail. In addition, we do not support Karabo older than latest-minor version in case dependency resolving or online resources is outdated. Therefore, we strongly recommend using the latest version of Karabo. If an older version of Karabo is required, we strongly recommend using a [container](container.md), as the environment is fixed in a container. However, outdated online resources may still occur. +Karabo versions older than `v0.15.0` are deprecated and therefore installation will most likely fail. In addition, we do not support Karabo older than latest-minor version in case dependency resolving or online resources are outdated. Therefore, we strongly recommend using the latest version of Karabo. If an older version of Karabo is required, we strongly recommend using a [container](container.md), as the environment is fixed in a container. However, outdated online resources may still occur. ## Update to latest Karabo version A Karabo installation can be updated the following way: ``` conda update -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline -conda clean --all -y ``` ## Additional Notes and Troubleshooting -- If the base environment was updated, *libmamba* might fail to install. In that case, reset conda to version 22 using `conda install --rev 0 --name base` or you can try installing Karabo without *libmamba*. Using *libmamba* is not strictly required, but strongly recommended, because it should make the installation much faster and more reliable. -- You can install miniconda into a different path, use ```bash Miniconda3-py39_22.11.1-1-Linux-x86_64.sh -b -p YourDesiredPath``` instead +- Dont' install anything into the base environment except libraries which are supposed to live in there. If you accientally install packages there which are not supposed to be there, you might break some functionalities of your conda-installation. +- If you're using a system conda, it might be that you don't have access to a libmamba-solver, because the solver lives in the base environment, which belongs to root. In this case, you can ask your admin to install the solver, try an installation without the libmamba solver OR we recommend to just install conda into your home (which is the recommended solution). - If you are using WSL and running a jupyter-notebook fails, you might have to set the path to the cuda libraries as follows: ```shell From 9ea966c483f64dacc14da69971cd7f19b4f836b2 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 13 Dec 2023 10:42:29 +0100 Subject: [PATCH 155/207] changed Dockerfile setup to use andromeda user instead of root :wink: --- doc/src/container.md | 2 +- docker/user/Dockerfile | 60 ++++++++++++++++++++---------------------- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index d62a9bfd..47218f27 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -21,7 +21,7 @@ docker run -it -v : -p 8888:8888 ghcr.io/i4ds/karabo-pip This starts the Docker container of the image interactively, where the port 8888 is forwarded and an editable directory is mounted. After that, you could do whatever you want. For demonstration purpose, we start the jupyter-server in the container with the following command: ```shell -jupyter lab --ip 0.0.0.0 --no-browser --port=8888 --allow-root +jupyter lab --ip 0.0.0.0 --no-browser --port=8888 ``` This will start a server on the same port as forwarded. Then copy the url which is given at the bottom and replace `hostname` with `localhost` and open it in a browser. diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index 018c6d58..f7ea29e6 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -1,61 +1,59 @@ -# for copying example-notebooks & in case env is installed through `environment.yaml` -FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-repo -ARG GIT_REV -RUN apt-get update && apt-get install -y git && \ - mkdir Karabo-Pipeline && \ - cd Karabo-Pipeline && \ - git init && \ - git remote add origin https://github.com/i4Ds/Karabo-Pipeline.git && \ - git fetch origin ${GIT_REV} && \ - git reset --hard ${GIT_REV} - -# main build FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 # build: user|test, KARABO_VERSION: version to install from anaconda.org in case build=user: `{major}.{minor}.{patch}` (no leading 'v') -ARG BUILD=user KARABO_VERSION="" +ARG GIT_REV="main" BUILD="user" KARABO_VERSION="" RUN apt-get update && apt-get install -y git gcc gfortran libarchive13 wget curl nano ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:/usr/local/cuda/lib64" \ - PATH="/opt/conda/bin:${PATH}" \ - CONDA_PREFIX="/opt/conda" \ + PATH="/home/andromeda/miniconda3/bin:${PATH}" \ IS_DOCKER_CONTAINER="true" +RUN useradd -ms /bin/bash andromeda +USER andromeda +WORKDIR /home/andromeda RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} && \ - conda init + /bin/bash ~/miniconda.sh -b && \ + conda init bash && \ + rm ~/miniconda.sh SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba && \ conda create -y -n karabo # change venv because libmamba solver lives in base and any serious environment update could f*** up the linked deps like `libarchive.so` SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] -COPY --from=karabo-repo Karabo-Pipeline/ repo/ -RUN if [ "$BUILD" = "user" ] ; then \ +RUN mkdir Karabo-Pipeline && \ + cd Karabo-Pipeline && \ + git init && \ + git remote add origin https://github.com/i4Ds/Karabo-Pipeline.git && \ + git fetch origin ${GIT_REV} && \ + git reset --hard ${GIT_REV} && \ + if [ "$BUILD" = "user" ] ; then \ conda install -y -c i4ds -c conda-forge -c "nvidia/label/cuda-11.7.1" karabo-pipeline="$KARABO_VERSION"; \ elif [ "$BUILD" = "test" ] ; then \ - cd "repo"; \ conda env update -f="environment.yaml"; \ pip install --no-deps "."; \ - cd ".." ; \ else \ exit 1; \ fi && \ echo "conda activate karabo" >> ~/.bashrc && \ - mkdir /workspace && \ - cp -r "repo/karabo/examples" "/workspace/examples/" && \ - rm -rf "repo/" && \ + mkdir ~/karabo && \ + cp -r "karabo/examples" ~/karabo/examples/ && \ + cd ".." && \ + rm -rf "Karabo-Pipeline/" && \ pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo -WORKDIR /workspace -ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] -# The folowwing steps install mpich at a standard location to allow a native mpi-hook +# The following steps installs mpich at a standard location to allow a native mpi-hook # To make this work, conda additionally needs to link their mpi-installation in the virtual # environment to the standard-location (see issue #512). Be aware that the mpi-installation # and version is determined through `apt`. Therefore, to ensure abi-compatibility of mpi the # version installed using `apt` and the version specified in the environment-files must match. # fetch mpich-version to have it consistent with it's installation from karabo -RUN MPICH_VERSION=$(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/") && \ +ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' +USER root +RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ - apt update && \ - apt install -y mpich=$MPICH_VERSION_APT && \ - conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" \ No newline at end of file + apt-get update && \ + apt-get install -y mpich=$MPICH_VERSION_APT +USER andromeda +RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ + conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" +ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] \ No newline at end of file From dce1db7c51f11e3acf4954dfbcadf1477d5f3bfc Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 13 Dec 2023 10:53:28 +0100 Subject: [PATCH 156/207] minor update in dockerfile :on: --- docker/user/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index f7ea29e6..ca71d406 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -15,7 +15,8 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Li SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba && \ - conda create -y -n karabo + conda create -y -n karabo && \ + conda config --set auto_activate_base false # change venv because libmamba solver lives in base and any serious environment update could f*** up the linked deps like `libarchive.so` SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] RUN mkdir Karabo-Pipeline && \ From 376581dd8c1aecb4e32c6b7aa1c745ef8e5f7e2d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 13 Dec 2023 10:56:19 +0100 Subject: [PATCH 157/207] changed run-stages of dockerfile :stew: --- docker/user/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index ca71d406..e5102370 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -16,6 +16,7 @@ SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba && \ conda create -y -n karabo && \ + echo "conda activate karabo" >> ~/.bashrc && \ conda config --set auto_activate_base false # change venv because libmamba solver lives in base and any serious environment update could f*** up the linked deps like `libarchive.so` SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] @@ -33,7 +34,6 @@ RUN mkdir Karabo-Pipeline && \ else \ exit 1; \ fi && \ - echo "conda activate karabo" >> ~/.bashrc && \ mkdir ~/karabo && \ cp -r "karabo/examples" ~/karabo/examples/ && \ cd ".." && \ From 98e67dc0bbac6b850e9e15446620ff6e573d96f6 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 13 Dec 2023 14:46:13 +0100 Subject: [PATCH 158/207] removed user-changeing because of singularity uid issues :gun: --- docker/user/Dockerfile | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/docker/user/Dockerfile b/docker/user/Dockerfile index e5102370..3d917e27 100644 --- a/docker/user/Dockerfile +++ b/docker/user/Dockerfile @@ -3,21 +3,17 @@ FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 ARG GIT_REV="main" BUILD="user" KARABO_VERSION="" RUN apt-get update && apt-get install -y git gcc gfortran libarchive13 wget curl nano ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:/usr/local/cuda/lib64" \ - PATH="/home/andromeda/miniconda3/bin:${PATH}" \ + PATH="/opt/conda/bin:${PATH}" \ IS_DOCKER_CONTAINER="true" -RUN useradd -ms /bin/bash andromeda -USER andromeda -WORKDIR /home/andromeda RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b && \ - conda init bash && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda && \ + conda init --system --all && \ rm ~/miniconda.sh SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba && \ conda create -y -n karabo && \ - echo "conda activate karabo" >> ~/.bashrc && \ - conda config --set auto_activate_base false + echo "conda activate karabo" >> ~/.bashrc # change venv because libmamba solver lives in base and any serious environment update could f*** up the linked deps like `libarchive.so` SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] RUN mkdir Karabo-Pipeline && \ @@ -49,12 +45,13 @@ RUN mkdir Karabo-Pipeline && \ # fetch mpich-version to have it consistent with it's installation from karabo ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' -USER root RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ apt-get update && \ apt-get install -y mpich=$MPICH_VERSION_APT -USER andromeda RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" + +# Additional setup +WORKDIR /workspace ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] \ No newline at end of file From def50fdf272fc45d60d6a9360a3caddc03e1e40c Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 13 Dec 2023 14:58:58 +0100 Subject: [PATCH 159/207] moved Dockerfile to root & removed docker-dir :bus: --- .github/workflows/build-docker-image.yml | 2 +- docker/user/Dockerfile => Dockerfile | 0 docker/dev/Dockerfile | 41 ------------------------ 3 files changed, 1 insertion(+), 42 deletions(-) rename docker/user/Dockerfile => Dockerfile (100%) delete mode 100644 docker/dev/Dockerfile diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml index 0de210f8..e019b2d0 100644 --- a/.github/workflows/build-docker-image.yml +++ b/.github/workflows/build-docker-image.yml @@ -104,7 +104,7 @@ jobs: --build-arg GIT_REV=${{ env.gitrev }} \ --build-arg BUILD=${{ env.build }} \ --build-arg KARABO_VERSION=${{ env.version }} \ - -f docker/user/Dockerfile \ + -f Dockerfile \ -t ${{ env.IMG_ADDR }}:${{ env.version }} \ . if [[ ${{ env.latest }} == 'true' ]]; then diff --git a/docker/user/Dockerfile b/Dockerfile similarity index 100% rename from docker/user/Dockerfile rename to Dockerfile diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile deleted file mode 100644 index ec89e060..00000000 --- a/docker/dev/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -# This Dockerfile is designed for CI/CD purpose (not mounting your repo and work with the container) -# If you want to mount your repo, you more or less just need the first stage, then install your own -# deps from the mounted repo, and don't kill the container once you've installed your environment. - -# first stage is to have a more or less consistent base-image -FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 as karabo-base -RUN apt-get update && apt-get install -y git libarchive13 wget curl nano -ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:/usr/local/cuda/lib64" \ - PATH="/opt/conda/bin:${PATH}" \ - CONDA_PREFIX="/opt/conda" \ - IS_DOCKER_CONTAINER="true" -RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p ${CONDA_PREFIX} -RUN conda init -SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] -RUN conda install -n base conda-libmamba-solver && \ - conda config --set solver libmamba && \ - conda create -y -n karabo && \ - echo "conda activate karabo" >> ~/.bashrc -# create venv to not f*** up base-env in which libmamba solver lives -SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] -WORKDIR /workspace - -# second stage is to build an image which is changing very often (e.g. env installation for CI-jobs) -# please ALWAYS pass the git-commit-rev (NOT the branch) as build-arg to ensure that not a cached layer is used. -# because dev-image is used for ci-purpose and it's mpi-implementation and version is not known prior, -# we don't compile the mpi from source here because it takes just too long. This makes an mpi-hook not possible. -FROM karabo-base -# redefine envs because they're just scoped per build-stage -ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:/usr/local/cuda/lib64" \ - PATH="/opt/conda/bin:${PATH}" \ - CONDA_PREFIX="/opt/conda" \ - IS_DOCKER_CONTAINER="true" -ARG GIT_REV -# note that installation like this has several assumptions about the used files like: -# conda-channel definition & not naming env in `environment.yaml`, dev-optional dep in pyproject.toml -RUN git clone --branch ${GIT_REV} --depth=1 https://github.com/i4Ds/Karabo-Pipeline.git && \ - cd Karabo-Pipeline && \ - conda env update -f=environment.yaml && \ - pip install -e ".[dev]" -ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] \ No newline at end of file From cd6532a5f18ecb82da3a8bfec60724383c1d1295 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 13 Dec 2023 17:27:32 +0100 Subject: [PATCH 160/207] added bash-env to env-vars for singularity noninteractive-shell [skip ci] :clock830: --- Dockerfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3d917e27..89c4c9a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:/usr/local/cuda/lib64" \ IS_DOCKER_CONTAINER="true" RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p /opt/conda && \ - conda init --system --all && \ + conda init && \ rm ~/miniconda.sh SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ @@ -52,6 +52,11 @@ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" +# add bashrc to non-root directory & set bash-env accordingly (important for singularity containers) +RUN mkdir opt/etc && \ + cp ~/.bashrc /opt/etc/bashrc +ENV BASH_ENV=/opt/etc/bashrc + # Additional setup WORKDIR /workspace ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] \ No newline at end of file From ec9e443e8ca4fa6e8700b68c4d13c7bd17fff65d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 14 Dec 2023 11:22:23 +0100 Subject: [PATCH 161/207] bugfix: correctly activate venv in docker & singularity container for interactive and non-interactive shells :yellow_heart: --- Dockerfile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 89c4c9a0..c6e50d89 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,10 +52,13 @@ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" -# add bashrc to non-root directory & set bash-env accordingly (important for singularity containers) +# add conda-init script to non-root directory & set bash-env accordingly for interactive and +# non-interactive shells for docker & singularity RUN mkdir opt/etc && \ - cp ~/.bashrc /opt/etc/bashrc -ENV BASH_ENV=/opt/etc/bashrc + echo "conda activate karabo" >> ~/.bashrc && \ + cat ~/.bashrc | sed -n '/conda initialize/,/conda activate/p' > /opt/etc/conda_init_script && \ + echo "source /opt/etc/conda_init_script" >> /etc/profile +ENV BASH_ENV=/opt/etc/conda_init_script # Additional setup WORKDIR /workspace From 8c4be18df0aa6f127953c368ee87c6e12f16da03 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 14 Dec 2023 12:59:27 +0100 Subject: [PATCH 162/207] fixed conda activate functionality for docker & singularity interactive and non-interactive shells :syringe: --- Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index c6e50d89..b6178b49 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,13 +52,14 @@ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" -# add conda-init script to non-root directory & set bash-env accordingly for interactive and -# non-interactive shells for docker & singularity +# set bash-env accordingly for interactive and non-interactive shells for docker & singularity RUN mkdir opt/etc && \ echo "conda activate karabo" >> ~/.bashrc && \ - cat ~/.bashrc | sed -n '/conda initialize/,/conda activate/p' > /opt/etc/conda_init_script && \ - echo "source /opt/etc/conda_init_script" >> /etc/profile + cat ~/.bashrc | sed -n '/conda initialize/,/conda activate/p' > /opt/etc/conda_init_script ENV BASH_ENV=/opt/etc/conda_init_script +RUN echo "source $BASH_ENV" >> /etc/bash.bashrc && \ + echo "source $BASH_ENV" >> /etc/skel/.bashrc && \ + echo "source $BASH_ENV" >> /etc/profile # Additional setup WORKDIR /workspace From 1a73a4a5dbcfb01a82c696f8562261668889f0f8 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 14 Dec 2023 14:03:43 +0100 Subject: [PATCH 163/207] removed unnecessary sourcing in dockerfile [skip ci] :symbols: --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b6178b49..f7709f0d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -58,7 +58,6 @@ RUN mkdir opt/etc && \ cat ~/.bashrc | sed -n '/conda initialize/,/conda activate/p' > /opt/etc/conda_init_script ENV BASH_ENV=/opt/etc/conda_init_script RUN echo "source $BASH_ENV" >> /etc/bash.bashrc && \ - echo "source $BASH_ENV" >> /etc/skel/.bashrc && \ echo "source $BASH_ENV" >> /etc/profile # Additional setup From ff833e972ce03325edb704a530180d3e4d195ea7 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 14 Dec 2023 15:23:50 +0100 Subject: [PATCH 164/207] added ldconfig after mpich-installation in dockerfile :bust_in_silhouette: --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index f7709f0d..359573b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,7 +48,8 @@ ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ apt-get update && \ - apt-get install -y mpich=$MPICH_VERSION_APT + apt-get install -y mpich=$MPICH_VERSION_APT && \ + ldconfig RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" From dbef0912c95a3a9f81f9ea1b934ef3096fce2b5d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 14 Dec 2023 16:08:48 +0100 Subject: [PATCH 165/207] minor changes in dockerfile [skip ci] :fish: --- Dockerfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 359573b6..1e6f5d57 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,8 +12,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.5.0-3-Li SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] RUN conda install -n base conda-libmamba-solver && \ conda config --set solver libmamba && \ - conda create -y -n karabo && \ - echo "conda activate karabo" >> ~/.bashrc + conda create -y -n karabo # change venv because libmamba solver lives in base and any serious environment update could f*** up the linked deps like `libarchive.so` SHELL ["conda", "run", "-n", "karabo", "/bin/bash", "-c"] RUN mkdir Karabo-Pipeline && \ @@ -48,8 +47,7 @@ ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+ RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ apt-get update && \ - apt-get install -y mpich=$MPICH_VERSION_APT && \ - ldconfig + apt-get install -y mpich=$MPICH_VERSION_APT RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" From cffda261d98d302eb623555c3a30309e46ec046f Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 21 Dec 2023 10:19:12 +0100 Subject: [PATCH 166/207] outcommented mpi-hook from dockerfile because it's still error-prone :loudspeaker: --- Dockerfile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1e6f5d57..3a27dc13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,13 +43,13 @@ RUN mkdir Karabo-Pipeline && \ # version installed using `apt` and the version specified in the environment-files must match. # fetch mpich-version to have it consistent with it's installation from karabo -ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' -RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ - MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ - apt-get update && \ - apt-get install -y mpich=$MPICH_VERSION_APT -RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ - conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" +# ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' +# RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ +# MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ +# apt-get update && \ +# apt-get install -y mpich=$MPICH_VERSION_APT +# RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ +# conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" # set bash-env accordingly for interactive and non-interactive shells for docker & singularity RUN mkdir opt/etc && \ From 7cdd667168d63ba5ca9910b80cd540a6df2ff8ca Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 21 Dec 2023 12:43:32 +0100 Subject: [PATCH 167/207] adapted container-doc :leftwards_arrow_with_hook: --- doc/src/container.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index 47218f27..a30bff42 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -28,6 +28,8 @@ This will start a server on the same port as forwarded. Then copy the url which ## Singularity Containers +**Note:** Currently, building a Singularity container from our docker-registry and run karabo within it doesn't work properly. This is work in progress. Therfore, the following doc regarding Singularity are not relevant. + Singularity containers are often standard on HPC clusters, which do not require special permissions (unlike Docker). We do not provide ready-made [Singularity containers](https://sylabs.io/). However, they can be easily created from Docker images with the following command (may take a while). You may first have to load the module if it's not available `module load singularity`: @@ -66,8 +68,4 @@ srun -N16 -n16 -C gpu sarus run --mount=type=bind,source=,destination Here, an MPI application with 16 processes is launched with your repository mounted in the container (/workspace is the default working-directory). Make sure that you know how many processes are reasonable to run because it can rapidly sum up to a large number of nodehours. -We support native-mpi hook, which allows to utilize the mpi of CSCS at optimized performance. To enable the hook, just add the `--mpi` flag of the `sarus run` command as follows: - -```shell -srun -N16 -n16 -C gpu sarus run --mpi --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline -``` \ No newline at end of file +Sarus containers allow native mpi-hook to utilize the mpi of CSCS at optimized performance. However, this feature currently is not available from the karabo image. This is work in progress. \ No newline at end of file From e9718c6fee17e25186500e93853a1c166ecf7057 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 9 Jan 2024 14:54:28 +0100 Subject: [PATCH 168/207] bugfix: in test docker-image to enable --only-mpi custom flag for pytest :notebook: --- .github/workflows/build-docker-image.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml index e019b2d0..72f0674f 100644 --- a/.github/workflows/build-docker-image.yml +++ b/.github/workflows/build-docker-image.yml @@ -112,9 +112,9 @@ jobs: fi - name: Test image - run: | # karabo-sitepackage-location used instead of --pyargs because --only-mpi is a custom-flag of karabo which lives in the site-packages + run: | # karabo-sitepackage-location used for mpirun instead of --pyargs because --only-mpi is a custom-flag of karabo which lives in the site-packages docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ - 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=false; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); mpirun -n 2 pytest --only-mpi; pytest $SITE_PKGS/karabo/test' + 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=false; pytest --pyargs karabo.test; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); cd $SITE_PKGS/karabo; mpirun -n 2 pytest --only-mpi' - name: Docker push shell: bash -l {0} run: | From 7abc796edf8f13bc6fdec8a9dee7017b81df412b Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 9 Jan 2024 14:55:37 +0100 Subject: [PATCH 169/207] added karabo shared lib to ldconfig cache to enable native cscs-mpi-hook :clock630: --- Dockerfile | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3a27dc13..beee8d30 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,21 +36,6 @@ RUN mkdir Karabo-Pipeline && \ pip install jupyterlab ipykernel pytest && \ python -m ipykernel install --user --name=karabo -# The following steps installs mpich at a standard location to allow a native mpi-hook -# To make this work, conda additionally needs to link their mpi-installation in the virtual -# environment to the standard-location (see issue #512). Be aware that the mpi-installation -# and version is determined through `apt`. Therefore, to ensure abi-compatibility of mpi the -# version installed using `apt` and the version specified in the environment-files must match. - -# fetch mpich-version to have it consistent with it's installation from karabo -# ARG MPICH_EVAL='echo $(conda list mpich -c | sed "s/.*mpich-\([0-9]\+\(\.[0-9]\+\)\+\)-.*/\1/")' -# RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ -# MPICH_VERSION_APT=$(echo "$MPICH_VERSION" | awk -F. '{print $1 "." $2 "-*"}') && \ -# apt-get update && \ -# apt-get install -y mpich=$MPICH_VERSION_APT -# RUN MPICH_VERSION=$(eval $MPICH_EVAL) && \ -# conda install --force-reinstall -c conda-forge -y "mpich=${MPICH_VERSION}=external_*" - # set bash-env accordingly for interactive and non-interactive shells for docker & singularity RUN mkdir opt/etc && \ echo "conda activate karabo" >> ~/.bashrc && \ @@ -59,6 +44,10 @@ ENV BASH_ENV=/opt/etc/conda_init_script RUN echo "source $BASH_ENV" >> /etc/bash.bashrc && \ echo "source $BASH_ENV" >> /etc/profile +# link packaged mpich-version with ldconfig to enable mpi-hook (it also links everything else, but shouldn't be an issue) +RUN echo "$CONDA_PREFIX"/lib > /etc/ld.so.conf.d/conda.conf && \ + ldconfig + # Additional setup WORKDIR /workspace ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "karabo"] \ No newline at end of file From da0bc8b940f2bec3f91e894cf18e742f78c372dd Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 9 Jan 2024 17:30:18 +0100 Subject: [PATCH 170/207] replaced weird file-handler root-dir setup with /tmp with honor of TMP, TMPDIR & TEMP :globe_with_meridians: --- karabo/util/file_handler.py | 76 +++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 4f1c43d5..93e6f108 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -11,31 +11,75 @@ from karabo.util.plotting_util import Font -def _get_default_root_dir() -> str: - karabo_folder = "karabo_folder" - scratch = os.environ.get("SCRATCH") - if scratch is not None: - root_parent = scratch - else: - root_parent = os.getcwd() - root_dir = os.path.join(root_parent, karabo_folder) - return os.path.abspath(root_dir) +def _get_tmp_dir() -> str: + """Gets the according tmpdir. + + Honors TMPDIR, TEMP and TMP environment variable(s). + The only thing not allowed is a collision between the mentioned env-vars. + + Returns: + path of tmpdir + """ + tmpdir = f"{os.path.sep}tmp" + env_check: Optional[str] = None # variable to check previous environment variables + environment_varname = "" + if (TMPDIR := os.environ.get("TMPDIR")) is not None: + tmpdir = os.path.abspath(TMPDIR) + env_check = TMPDIR + environment_varname = "TMPDIR" + if (TEMP := os.environ.get("TEMP")) is not None: + if env_check is not None: + if TEMP != env_check: + raise RuntimeError( + f"Environment variables collision: TEMP={TEMP} != " + + f"{environment_varname}={env_check}" + ) + else: + tmpdir = os.path.abspath(TEMP) + env_check = TEMP + environment_varname = "TEMP" + if (TMP := os.environ.get("TMP")) is not None: + if env_check is not None: + if TMP != env_check: + raise RuntimeError( + f"Environment variables collision: TMP={TMP} != " + + f"{environment_varname}={env_check}" + ) + else: + tmpdir = os.path.abspath(TMP) + env_check = TEMP + environment_varname = "TEMP" + return tmpdir + + +def _get_cache_dir() -> str: + """Gets a default cache-dir. + + Returns: + path of cache-dir + """ + tmpdir = _get_tmp_dir() + return os.path.join(tmpdir, "karabo-cache") class FileHandler: """Utility file-handler for unspecified directories. - Provides directory-management functionality in case no dir-path was specified. - `FileHandler.root` is a static root-directory where each subdir is located. + Provides chache-management functionality. + `FileHandler.root` is a static root-directory where each cache-dir is located. + In case you want to extract something specific from the cache, the path is usually + printed blue & bold in stdout. + Set `FileHandler.root` to change the directory where files and dirs will be saved. - Subdirs are usually {prefix}_{fh_dir_identifier}_{uuid4[:8]} in case `prefix` - is defined, otherwise just {fh_dir_identifier}_{uuid4[:8]}. + Otherwise, we provide $TMP, $TMPDIR & $TEMP with a following /karabo-cache as root. + Subdirs are usually {prefix}_{fh_dir_identifier}_{uuid4} in case `prefix` + is defined, otherwise just {fh_dir_identifier}_{uuid4}. This class provides an additional security layer for the removal of subdirs - in case a root is specified where other files and directories live. - FileHanlder can be used the same way as `tempfile.TemporaryDirectory` using with. + in case a root is specified where other files and directories live. + FileHanlder can be used the same way as `tempfile.TemporaryDirectory` using `with`. """ - root: str = _get_default_root_dir() + root: str = _get_cache_dir() fh_dir_identifier = "fhdir" # additional security to protect against dir-removal def __init__( From 02a42817350839fe312410c1b28e498607787d88 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 11 Jan 2024 11:30:50 +0100 Subject: [PATCH 171/207] changed tmp-dir-name setup to avoid collisions :ox: --- karabo/util/file_handler.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 93e6f108..c33fc6f3 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -2,7 +2,9 @@ import glob import os +import random import shutil +import string import uuid from types import TracebackType from typing import Optional, Union @@ -55,11 +57,21 @@ def _get_tmp_dir() -> str: def _get_cache_dir() -> str: """Gets a default cache-dir. + dir-name: karabo-($USER-)<10-rnd-asci-letters-and-digits> + Returns: path of cache-dir """ tmpdir = _get_tmp_dir() - return os.path.join(tmpdir, "karabo-cache") + delimiter = "-" + prefix = "karabo" + user = os.environ.get("USER") + if user is not None: + prefix = delimiter.join((prefix, user)) + suffix = "".join(random.choices(string.ascii_letters + string.digits, k=10)) + cache_dir_name = delimiter.join((prefix, suffix)) + cache_dir = os.path.join(tmpdir, cache_dir_name) + return cache_dir class FileHandler: From 01b86af1186dfaead80107a5af8c27e7576d6c96 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 11 Jan 2024 11:46:52 +0100 Subject: [PATCH 172/207] loosened mpich-version constraints because we no longer rely on apt to install mpich :white_medium_square: --- conda/meta.yaml | 2 +- doc/src/container.md | 4 +++- environment.yaml | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 86552791..11e84e9c 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -40,7 +40,7 @@ requirements: - matplotlib - montagepy =6.0.0=*_0 - mpi4py - - mpich =4.0 + - mpich - nbformat - nbconvert - {{ pin_compatible('numpy') }} diff --git a/doc/src/container.md b/doc/src/container.md index a30bff42..9f31c672 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -60,6 +60,8 @@ sarus pull ghcr.io/i4ds/karabo-pipeline **MPI (MPICH) Support** +This is something which is still in progress... Thus the support is not given atm. + Karabo >= `v0.22.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). ```shell @@ -68,4 +70,4 @@ srun -N16 -n16 -C gpu sarus run --mount=type=bind,source=,destination Here, an MPI application with 16 processes is launched with your repository mounted in the container (/workspace is the default working-directory). Make sure that you know how many processes are reasonable to run because it can rapidly sum up to a large number of nodehours. -Sarus containers allow native mpi-hook to utilize the mpi of CSCS at optimized performance. However, this feature currently is not available from the karabo image. This is work in progress. \ No newline at end of file +Sarus containers allow native mpi-hook to utilize the mpi of CSCS at optimized performance. This can be done by simply adding the `--mpi` flag to the sarus run command. Probably, there will be some warning about the minor version of some libmpi-files. However, according to [sarus abi-compatibility](https://sarus.readthedocs.io/en/stable/user/abi_compatibility.html) this shouldn't be an issue. \ No newline at end of file diff --git a/environment.yaml b/environment.yaml index 74fb55e8..738bd1c8 100644 --- a/environment.yaml +++ b/environment.yaml @@ -21,7 +21,7 @@ dependencies: # package-version & build-number of Karabo-Feedstock deps should - matplotlib - montagepy =6.0.0=*_0 - mpi4py - - mpich =4.0 # version needs to be compatible with the `apt` installer in Dockerfile (see PR #526) + - mpich - nbformat - nbconvert - numpy >=1.21, !=1.24.0 From 06739b7f0f16b02f9a610f83f2adb2b241d284d3 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 11 Jan 2024 16:25:26 +0100 Subject: [PATCH 173/207] updated mpi-doc :rage: --- doc/src/container.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index 9f31c672..a8e6faf1 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -60,14 +60,12 @@ sarus pull ghcr.io/i4ds/karabo-pipeline **MPI (MPICH) Support** -This is something which is still in progress... Thus the support is not given atm. - -Karabo >= `v0.22.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). +Karabo >= `v0.22.0` supports [MPICH](https://www.mpich.org/)-based MPI processes that enable multi-node workflows on CSCS (or any other system which supports MPICH MPI). Note, on CSCS, mpi-runs are launched through SLURM (not through mpirun or mpiexec) by setting the `-n` (total-mpi-tasks) and `-N` (mpi-tasks-per-node) options when launching a job. So you have to set them according to your task. ```shell -srun -N16 -n16 -C gpu sarus run --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline +srun -N2 -n2 -C gpu sarus run --mount=type=bind,source=,destination=/workspace ghcr.io/i4ds/karabo-pipeline ``` -Here, an MPI application with 16 processes is launched with your repository mounted in the container (/workspace is the default working-directory). Make sure that you know how many processes are reasonable to run because it can rapidly sum up to a large number of nodehours. +Here, an MPI application with 2 processes is launched with your repository mounted in the container (/workspace is the default working-directory). Make sure that you know how many processes are reasonable to run because it can rapidly sum up to a large number of nodehours. Sarus containers allow native mpi-hook to utilize the mpi of CSCS at optimized performance. This can be done by simply adding the `--mpi` flag to the sarus run command. Probably, there will be some warning about the minor version of some libmpi-files. However, according to [sarus abi-compatibility](https://sarus.readthedocs.io/en/stable/user/abi_compatibility.html) this shouldn't be an issue. \ No newline at end of file From edc4d9e60715081ebb9d99eeb658407ee84dd2f8 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 11 Jan 2024 16:57:26 +0100 Subject: [PATCH 174/207] minor bugfix for mpi-tests in ci :hurtrealbad: --- .github/workflows/build-docker-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml index 72f0674f..a7cc27c7 100644 --- a/.github/workflows/build-docker-image.yml +++ b/.github/workflows/build-docker-image.yml @@ -114,7 +114,7 @@ jobs: - name: Test image run: | # karabo-sitepackage-location used for mpirun instead of --pyargs because --only-mpi is a custom-flag of karabo which lives in the site-packages docker run --rm ${{ env.IMG_ADDR }}:${{ env.version }} bash -c \ - 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=false; pytest --pyargs karabo.test; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); cd $SITE_PKGS/karabo; mpirun -n 2 pytest --only-mpi' + 'export IS_GITHUB_RUNNER=true RUN_GPU_TESTS=false RUN_NOTEBOOK_TESTS=false; pytest --pyargs karabo.test; SITE_PKGS=$(pip show karabo-pipeline | grep Location | sed "s/.*\(\/opt\/conda.*\).*/\1/"); mpirun -n 2 pytest --only-mpi $SITE_PKGS/karabo/test' - name: Docker push shell: bash -l {0} run: | From c9dc5209a1028ec555d8f0af5ccf98827bc71361 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 12 Jan 2024 09:49:39 +0100 Subject: [PATCH 175/207] added scratch as a possible tmpdir in FileHandler :alarm_clock: --- karabo/util/file_handler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index c33fc6f3..3ab454ef 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -16,13 +16,21 @@ def _get_tmp_dir() -> str: """Gets the according tmpdir. + Defined env-var-dir > scratch-dir > tmp-dir + Honors TMPDIR, TEMP and TMP environment variable(s). The only thing not allowed is a collision between the mentioned env-vars. Returns: path of tmpdir """ + # first guess is just /tmp (low prio) tmpdir = f"{os.path.sep}tmp" + # second guess is if scratch is available (mid prio) + scratch = os.environ.get("SCRATCH") + if scratch is not None and os.path.exists(scratch): + tmpdir = scratch + # third guess is to honor the env-variables mentioned (high prio) env_check: Optional[str] = None # variable to check previous environment variables environment_varname = "" if (TMPDIR := os.environ.get("TMPDIR")) is not None: From 425b185e261e7c39443645c0790b9e5e0043ee56 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Fri, 12 Jan 2024 10:24:43 +0100 Subject: [PATCH 176/207] bugfix get-tmp-dir :dragon_face: --- karabo/util/file_handler.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 3ab454ef..230296d2 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -18,7 +18,7 @@ def _get_tmp_dir() -> str: Defined env-var-dir > scratch-dir > tmp-dir - Honors TMPDIR, TEMP and TMP environment variable(s). + Honors TMPDIR and TMP environment variable(s). The only thing not allowed is a collision between the mentioned env-vars. Returns: @@ -27,8 +27,7 @@ def _get_tmp_dir() -> str: # first guess is just /tmp (low prio) tmpdir = f"{os.path.sep}tmp" # second guess is if scratch is available (mid prio) - scratch = os.environ.get("SCRATCH") - if scratch is not None and os.path.exists(scratch): + if (scratch := os.environ.get("SCRATCH")) is not None and os.path.exists(scratch): tmpdir = scratch # third guess is to honor the env-variables mentioned (high prio) env_check: Optional[str] = None # variable to check previous environment variables @@ -37,17 +36,6 @@ def _get_tmp_dir() -> str: tmpdir = os.path.abspath(TMPDIR) env_check = TMPDIR environment_varname = "TMPDIR" - if (TEMP := os.environ.get("TEMP")) is not None: - if env_check is not None: - if TEMP != env_check: - raise RuntimeError( - f"Environment variables collision: TEMP={TEMP} != " - + f"{environment_varname}={env_check}" - ) - else: - tmpdir = os.path.abspath(TEMP) - env_check = TEMP - environment_varname = "TEMP" if (TMP := os.environ.get("TMP")) is not None: if env_check is not None: if TMP != env_check: @@ -57,8 +45,8 @@ def _get_tmp_dir() -> str: ) else: tmpdir = os.path.abspath(TMP) - env_check = TEMP - environment_varname = "TEMP" + env_check = TMP + environment_varname = "TMP" return tmpdir @@ -100,7 +88,7 @@ class FileHandler: """ root: str = _get_cache_dir() - fh_dir_identifier = "fhdir" # additional security to protect against dir-removal + fh_dir_identifier = "fhdir" # additional protection against dir-removal def __init__( self, From 5d8644ea81ab1888d827f9973b75799e775069f4 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 17 Jan 2024 08:34:27 +0100 Subject: [PATCH 177/207] added cachetools as dep :innocent: --- conda/meta.yaml | 1 + environment.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/conda/meta.yaml b/conda/meta.yaml index 11e84e9c..9f00a353 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -27,6 +27,7 @@ requirements: - astropy - bdsf =1.10.2=*_0 - bluebild =0.1.0=*_0 + - cachetools - cuda-cudart - dask >=2022.12.1 - dask-mpi diff --git a/environment.yaml b/environment.yaml index 738bd1c8..5b4a826b 100644 --- a/environment.yaml +++ b/environment.yaml @@ -8,6 +8,7 @@ dependencies: # package-version & build-number of Karabo-Feedstock deps should - astropy - bdsf =1.10.2=*_0 - bluebild =0.1.0=*_0 + - cachetools - cuda-cudart - dask >=2022.12.1 - dask-mpi From efaaf859f5f70ae3b72a5a5e81d8d37919556b6e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 17 Jan 2024 11:55:29 +0100 Subject: [PATCH 178/207] redesigned FileHandler for short- and long-term-memory caching :rage: --- conda/meta.yaml | 1 - environment.yaml | 1 - karabo/util/file_handler.py | 266 ++++++++++++++++++++---------------- 3 files changed, 150 insertions(+), 118 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 9f00a353..11e84e9c 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -27,7 +27,6 @@ requirements: - astropy - bdsf =1.10.2=*_0 - bluebild =0.1.0=*_0 - - cachetools - cuda-cudart - dask >=2022.12.1 - dask-mpi diff --git a/environment.yaml b/environment.yaml index 5b4a826b..738bd1c8 100644 --- a/environment.yaml +++ b/environment.yaml @@ -8,7 +8,6 @@ dependencies: # package-version & build-number of Karabo-Feedstock deps should - astropy - bdsf =1.10.2=*_0 - bluebild =0.1.0=*_0 - - cachetools - cuda-cudart - dask >=2022.12.1 - dask-mpi diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 230296d2..ac87fd99 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -5,13 +5,17 @@ import random import shutil import string -import uuid +from copy import copy from types import TracebackType -from typing import Optional, Union +from typing import Literal, Optional, Union, overload + +from typing_extensions import assert_never from karabo.util._types import DirPathType, FilePathType from karabo.util.plotting_util import Font +_LongShortTermType = Literal["long", "short"] + def _get_tmp_dir() -> str: """Gets the according tmpdir. @@ -50,24 +54,36 @@ def _get_tmp_dir() -> str: return tmpdir -def _get_cache_dir() -> str: - """Gets a default cache-dir. +def _get_rnd_str(k: int, seed: str | int | float | bytes | None = None) -> str: + random.seed(seed) + return "".join(random.choices(string.ascii_letters + string.digits, k=k)) + + +def _get_cache_dir(term: _LongShortTermType) -> str: + """Creates cache-dir-name. - dir-name: karabo-($USER-)<10-rnd-asci-letters-and-digits> + dir-name: karabo--($USER-)<10-rnd-asci-letters-and-digits> Returns: - path of cache-dir + cache-dir-name """ - tmpdir = _get_tmp_dir() delimiter = "-" prefix = "karabo" + if term == "long": + prefix = delimiter.join((prefix, "LTM")) + elif term == "short": + prefix = delimiter.join((prefix, "STM")) + else: + assert_never(term) user = os.environ.get("USER") if user is not None: prefix = delimiter.join((prefix, user)) - suffix = "".join(random.choices(string.ascii_letters + string.digits, k=10)) + seed = user + term + else: + seed = "42" + term + suffix = _get_rnd_str(k=10, seed=seed) cache_dir_name = delimiter.join((prefix, suffix)) - cache_dir = os.path.join(tmpdir, cache_dir_name) - return cache_dir + return cache_dir_name class FileHandler: @@ -79,146 +95,164 @@ class FileHandler: printed blue & bold in stdout. Set `FileHandler.root` to change the directory where files and dirs will be saved. - Otherwise, we provide $TMP, $TMPDIR & $TEMP with a following /karabo-cache as root. - Subdirs are usually {prefix}_{fh_dir_identifier}_{uuid4} in case `prefix` - is defined, otherwise just {fh_dir_identifier}_{uuid4}. - This class provides an additional security layer for the removal of subdirs - in case a root is specified where other files and directories live. + The dir-structure is as follows where "tmp" is `FileHandler.root`: + + tmp + ├── karabo-LTM--<10 rnd chars+digits> + │  ├── some-dir + │  └── some-file + └── karabo-STM--<10 rnd chars+digits> + ├── some-dir + └── some-file + + LTM stand for long-term-memory (self.ltm) and STM for short-term-memory (self.stm). + The data-products usually get into in the STM directory. + FileHanlder can be used the same way as `tempfile.TemporaryDirectory` using `with`. """ - root: str = _get_cache_dir() - fh_dir_identifier = "fhdir" # additional protection against dir-removal + root: str = _get_tmp_dir() def __init__( self, - prefix: Optional[str] = None, - verbose: bool = True, ) -> None: - """Creates `FileHandler` instance with the according sub-directory. + """Creates `FileHandler` instance.""" + self._ltm_dir_name = _get_cache_dir(term="long") + self._stm_dir_name = _get_cache_dir(term="short") + # tmps is an instance bound dirs and/or files registry for STM + self.tmps: list[str] = list() + + @property + def ltm(self) -> str: + ltm_path = os.path.join(FileHandler.root, self._ltm_dir_name) + os.makedirs(ltm_path, exist_ok=True) + return ltm_path + + @property + def stm(self) -> str: + stm_path = os.path.join(FileHandler.root, self._stm_dir_name) + os.makedirs(stm_path, exist_ok=True) + return stm_path + + def _get_term_dir(self, term: _LongShortTermType) -> str: + if term == "short": + dir_ = self.stm + elif term == "long": + dir_ = self.ltm + else: + assert_never(term) + return dir_ - Args: - prefix: Prefix for easier identification of sub-directory. - verbose: Subdir creation and removal verbose? - """ - self.verbose = verbose - subdir_name = str(uuid.uuid4()) - if ( - FileHandler.fh_dir_identifier is not None - and len(FileHandler.fh_dir_identifier) > 0 - ): - subdir_name = f"{FileHandler.fh_dir_identifier}_{subdir_name}" - if prefix is not None and len(prefix) > 0: - subdir_name = f"{prefix}_{subdir_name}" - self.subdir = os.path.join(FileHandler.root, subdir_name) - if self.verbose: - print( - f"Creating {Font.BLUE}{Font.BOLD}{self.subdir}{Font.END} " - "directory for data object storage." - ) - os.makedirs(self.subdir, exist_ok=False) - - def clean_up(self) -> None: - """Removes instance-bound `self.subdir`.""" - if os.path.exists(self.subdir): - if self.verbose: - print(f"Removing {self.subdir}") - shutil.rmtree(self.subdir) - if len(os.listdir(FileHandler.root)) == 0: - shutil.rmtree(FileHandler.root) + @overload + def get_tmp_dir( + self, + prefix: str | None = None, + term: Literal["short"] = "short", + purpose: str | None = None, + ) -> str: + ... + + @overload + def get_tmp_dir( + self, + prefix: str, + term: Literal["long"], + purpose: str | None = None, + ) -> str: + ... - @staticmethod - def remove_empty_dirs(consider_fh_dir_identifier: bool = True) -> None: - """Removes emtpy directories in `FileHandler.root`. + def get_tmp_dir( + self, + prefix: str | None = None, + term: _LongShortTermType = "short", + purpose: str | None = None, + ) -> str: + """Gets a tmp-dir path. - Just manual use recommended since it doesn't consider directories which - are currently in use and therefore it could interrupt running code. + This is the to-go function to get a tmp-dir in the according directory. Args: - consider_fh_dir_identifier: Consider `fh_dir_identifier` for dir matching? - """ - paths = glob.glob(os.path.join(FileHandler.root, "*"), recursive=False) - for path in paths: - if os.path.isdir(path) and len(os.listdir(path=path)) == 0: - if consider_fh_dir_identifier: - if FileHandler.fh_dir_identifier in os.path.split(path)[-1]: - shutil.rmtree(path=path) - else: - shutil.rmtree(path=path) - - @staticmethod - def clean_up_fh_root(force: bool = False, verbose: bool = True) -> None: - """Removes the from `FileHandler` created directories. + prefix: Dir-name prefix for STM (optional) and dir-name for LTM (required). + term: "short" for STM or "long" for LTM. + purpose: Creates a verbose print-msg with it's purpose if set. - Args: - force: Remove `FileHandler.root` entirely regardless of content? - verbose: Verbose removal? + Returns: + tmp-dir path """ - if os.path.exists(FileHandler.root): - if force: # force remove fh-root - if verbose: - print(f"Force remove {FileHandler.root}") - shutil.rmtree(FileHandler.root) - elif ( # check if fh-dir-identifier is properly set for safe removal - FileHandler.fh_dir_identifier is None - or len(FileHandler.fh_dir_identifier) < 1 - ): - print( - "`clean_up_fh_root` can't remove anything because " - f"{FileHandler.fh_dir_identifier=}. Set `fh_dir_identifier` " - f"correctly or use `force` to remove {FileHandler.root} regardless." + dir_path = self._get_term_dir(term=term) + if term == "short": + dir_name = _get_rnd_str(k=10, seed=None) + if prefix is not None: + dir_name = "".join((prefix, dir_name)) + dir_path = os.path.join(dir_path, dir_name) + os.makedirs(dir_path, exist_ok=False) + self.tmps.append(dir_path) + elif term == "long": + if prefix is None: + raise RuntimeError( + "For long-term-memory, `prefix` must be set to have unique dirs." ) - else: - if verbose: - print( - f"Remove {FileHandler.root} in case all subdirs match " - f"{FileHandler.fh_dir_identifier=}" - ) - paths = glob.glob(os.path.join(FileHandler.root, "*")) - for path in paths: - if ( - os.path.isdir(path) - and FileHandler.fh_dir_identifier in os.path.split(path)[-1] - ): # safe removal of subdir because it has the fh-dir-identifier - shutil.rmtree(path=path) - if len(os.listdir(FileHandler.root)) > 0: - if verbose: - print( - f"`clean_up_fh_root` is not able safely remove " - f"{FileHandler.root} because there are directories which " - f"don't match {FileHandler.fh_dir_identifier=} or files." - ) - else: # remove fh-root if dir is empty - shutil.rmtree(FileHandler.root) + dir_name = prefix + dir_path = os.path.join(dir_path, dir_name) + os.makedirs(dir_path, exist_ok=True) + else: + assert_never(term) + if purpose: + if len(purpose) > 0: + purpose = f" for {purpose}" + print(f"Creating {Font.BLUE}{Font.BOLD}{dir_path}{Font.END}{purpose}") + return dir_path + + def clean_instance(self) -> None: + """Cleans instance-bound tmp-dirs of `self.tmps` from disk.""" + tmps = copy(self.tmps) + for tmp in tmps: + if os.path.exists(tmp): + shutil.rmtree(tmp) + self.tmps.remove(tmp) + + def clean( + self, + term: _LongShortTermType = "short", + ) -> None: + """Removes the entire directory specified by `term`.""" + dir_ = self._get_term_dir(term=term) + if os.path.exists(dir_): + shutil.rmtree(dir_) + + @staticmethod + def remove_empty_dirs(term: _LongShortTermType = "short") -> None: + """Removes emtpy directories in the chosen cache-dir.""" + dir_ = _get_cache_dir(term=term) + paths = glob.glob(os.path.join(dir_, "*"), recursive=False) + for path in paths: + if os.path.isdir(path) and len(os.listdir(path=path)) == 0: + shutil.rmtree(path=path) @staticmethod def get_file_handler( obj: object, - prefix: Optional[str] = None, - verbose: bool = True, ) -> FileHandler: - """Utility function to always get unique `FileHandler` bound to `obj`. + """Utility function to always get & set unique `FileHandler` bound to `obj`. - `FileHandler` args have just an effect while the first instance is created. + Assumes that `FileHandler` is unique in each `obj`. Args: obj: Any object which should have an unique `FileHandler` assigned. - prefix: See `FileHandler.__init__` - verbose: See `FileHandler.__init__` Returns: - The `FileHandler` bound to `obj`. + `FileHandler` bound to `obj`. """ for attr_name in obj.__dict__: attr = getattr(obj, attr_name) if isinstance(attr, FileHandler): return attr - fh = FileHandler(prefix=prefix, verbose=verbose) + fh = FileHandler() setattr(obj, "file_handler", fh) return fh def __enter__(self) -> str: - return self.subdir + return self.get_tmp_dir(prefix=None, term="short") def __exit__( self, @@ -226,7 +260,7 @@ def __exit__( exc_val: Optional[BaseException], exc_tb: Optional[TracebackType], ) -> None: - self.clean_up() + self.clean_instance() def check_ending(path: Union[str, FilePathType, DirPathType], ending: str) -> None: From 02993ffb643bb5d0b96ab593f70880743c36dea3 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 17 Jan 2024 15:07:34 +0100 Subject: [PATCH 179/207] improved file-handler by getting unique tmp-dir per unique object :collision: --- karabo/util/file_handler.py | 89 +++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index ac87fd99..0b9f212f 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -99,11 +99,11 @@ class FileHandler: tmp ├── karabo-LTM--<10 rnd chars+digits> - │  ├── some-dir - │  └── some-file + │  ├── a-dir + │  └── another-dir └── karabo-STM--<10 rnd chars+digits> - ├── some-dir - └── some-file + ├── a-dir + └── another-dir LTM stand for long-term-memory (self.ltm) and STM for short-term-memory (self.stm). The data-products usually get into in the STM directory. @@ -146,9 +146,10 @@ def _get_term_dir(self, term: _LongShortTermType) -> str: @overload def get_tmp_dir( self, - prefix: str | None = None, + prefix: Union[str, None] = None, term: Literal["short"] = "short", - purpose: str | None = None, + purpose: Union[str, None] = None, + unique: object = None, ) -> str: ... @@ -157,15 +158,17 @@ def get_tmp_dir( self, prefix: str, term: Literal["long"], - purpose: str | None = None, + purpose: Union[str, None] = None, + unique: object = None, ) -> str: ... def get_tmp_dir( self, - prefix: str | None = None, + prefix: Union[str, None] = None, term: _LongShortTermType = "short", - purpose: str | None = None, + purpose: Union[str, None] = None, + unique: object = None, ) -> str: """Gets a tmp-dir path. @@ -175,10 +178,31 @@ def get_tmp_dir( prefix: Dir-name prefix for STM (optional) and dir-name for LTM (required). term: "short" for STM or "long" for LTM. purpose: Creates a verbose print-msg with it's purpose if set. + unique: If an object which has attributes is provided, then you get + the same tmp-dir for the unique instance. Returns: tmp-dir path """ + set_unique = False + obj_tmp_dir_name = "_karabo_tmp_dir" + if unique is not None: + if term != "short": + raise RuntimeError( + "`unique` not None is just supported for short-term tmp-dirs." + ) + try: + unique.__dict__ # just to test try-except + if hasattr(unique, obj_tmp_dir_name): + return getattr(unique, obj_tmp_dir_name) + else: + set_unique = True + except AttributeError: + raise AttributeError( + "`unique` must be an object with attributes, " + + f"but is of type {type(unique)} instead." + ) + dir_path = self._get_term_dir(term=term) if term == "short": dir_name = _get_rnd_str(k=10, seed=None) @@ -197,6 +221,8 @@ def get_tmp_dir( os.makedirs(dir_path, exist_ok=True) else: assert_never(term) + if set_unique: + setattr(unique, obj_tmp_dir_name, dir_path) if purpose: if len(purpose) > 0: purpose = f" for {purpose}" @@ -215,42 +241,30 @@ def clean( self, term: _LongShortTermType = "short", ) -> None: - """Removes the entire directory specified by `term`.""" + """Removes the entire directory specified by `term`. + + Be careful with cleaning, to not mess up dirs of other processes. + + Args: + term: "long" or "short" term memory + """ dir_ = self._get_term_dir(term=term) if os.path.exists(dir_): shutil.rmtree(dir_) @staticmethod def remove_empty_dirs(term: _LongShortTermType = "short") -> None: - """Removes emtpy directories in the chosen cache-dir.""" + """Removes emtpy directories in the chosen cache-dir. + + Args: + term: "long" or "short" term memory + """ dir_ = _get_cache_dir(term=term) paths = glob.glob(os.path.join(dir_, "*"), recursive=False) for path in paths: if os.path.isdir(path) and len(os.listdir(path=path)) == 0: shutil.rmtree(path=path) - @staticmethod - def get_file_handler( - obj: object, - ) -> FileHandler: - """Utility function to always get & set unique `FileHandler` bound to `obj`. - - Assumes that `FileHandler` is unique in each `obj`. - - Args: - obj: Any object which should have an unique `FileHandler` assigned. - - Returns: - `FileHandler` bound to `obj`. - """ - for attr_name in obj.__dict__: - attr = getattr(obj, attr_name) - if isinstance(attr, FileHandler): - return attr - fh = FileHandler() - setattr(obj, "file_handler", fh) - return fh - def __enter__(self) -> str: return self.get_tmp_dir(prefix=None, term="short") @@ -264,6 +278,15 @@ def __exit__( def check_ending(path: Union[str, FilePathType, DirPathType], ending: str) -> None: + """Utility function to check if the ending of `path` is `ending`. + + Args: + path: Path to check. + ending: Ending match. + + Raises: + ValueError: When the ending of `path` doesn't match `ending`. + """ path_ = str(path) if not path_.endswith(ending): fname = path_.split(os.path.sep)[-1] From b1c228eb8425e7f42eb08dff8ca9230a8088417d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 17 Jan 2024 15:46:17 +0100 Subject: [PATCH 180/207] refactored image and imager to new filehandler-setup :pensive: --- karabo/examples/source_detection.ipynb | 25 +--- karabo/imaging/image.py | 14 +- karabo/imaging/imager.py | 130 ++++++++++++------ .../time_karabo_parallelization_by_channel.py | 6 +- 4 files changed, 97 insertions(+), 78 deletions(-) diff --git a/karabo/examples/source_detection.ipynb b/karabo/examples/source_detection.ipynb index a1f6ca3f..c6726c37 100644 --- a/karabo/examples/source_detection.ipynb +++ b/karabo/examples/source_detection.ipynb @@ -1086,32 +1086,11 @@ "# Plot error ra and dec\n", "sde_dirty.plot_flux_ratio_to_ra_dec()" ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "3618b64c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Remove /home/kenfus/Karabo-Pipeline/karabo/examples/karabo_folder in case all subdirs match FileHandler.fh_dir_identifier='fhdir'\n" - ] - } - ], - "source": [ - "from karabo.util.file_handler import FileHandler\n", - "\n", - "# Clean up directories which you didn't explicitly define, if you like\n", - "FileHandler.clean_up_fh_root()" - ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3.9.16 ('karabo_dev_install')", + "display_name": "Python 3.9.18 ('karabo_dev_env')", "language": "python", "name": "python3" }, @@ -1129,7 +1108,7 @@ }, "vscode": { "interpreter": { - "hash": "a0707ef8e99ec41b82a4764f949180fcff23fc58a100beb7b507eb1cd0a0e228" + "hash": "ccbb141bba2c4c019c63d533d94ea415ff9cd07ec39847273b695c7aaf48b1d0" } } }, diff --git a/karabo/imaging/image.py b/karabo/imaging/image.py index 9b0162d1..5f283e3c 100644 --- a/karabo/imaging/image.py +++ b/karabo/imaging/image.py @@ -71,9 +71,6 @@ def __init__( header: Optional[fits.header.Header] = None, **kwargs: Any, ) -> None: - self._fh_prefix = "image" - self._fh_verbose = False - if path is not None and (data is None and header is None): self.path = path self.data, self.header = fits.getdata( @@ -86,13 +83,12 @@ def __init__( self.data = data self.header = header - # Generate a random path for the data - fh = FileHandler.get_file_handler( - obj=self, - prefix=self._fh_prefix, - verbose=self._fh_verbose, + tmp_dir = FileHandler().get_tmp_dir( + prefix="Image-", + purpose="restored fits-path", ) - restored_fits_path = os.path.join(fh.subdir, "image.fits") + + restored_fits_path = os.path.join(tmp_dir, "image.fits") # Write the FITS file self.write_to_file(restored_fits_path) diff --git a/karabo/imaging/imager.py b/karabo/imaging/imager.py index 1cdab078..3a3ff544 100644 --- a/karabo/imaging/imager.py +++ b/karabo/imaging/imager.py @@ -29,7 +29,7 @@ from karabo.simulation.visibility import Visibility from karabo.util._types import FilePathType from karabo.util.dask import DaskHandler -from karabo.util.file_handler import FileHandler +from karabo.util.file_handler import FileHandler, check_ending ImageContextType = Literal["awprojection", "2d", "ng", "wg"] CleanAlgorithmType = Literal["hogbom", "msclean", "mmclean"] @@ -207,25 +207,36 @@ def __init__( self.imaging_uvmax = imaging_uvmax self.imaging_uvmin = imaging_uvmin - self._fh_prefix = "imager" - self._fh_verbose = True - - def get_dirty_image(self, fits_path: Optional[FilePathType] = None) -> Image: + def get_dirty_image( + self, + fits_path: Optional[FilePathType] = None, + exist_ok: bool = False, + ) -> Image: """Get Dirty Image of visibilities passed to the Imager. Args: - fits_path: Path to where the .fits file will get exported. + fits_path: Path to where the .fits file will get saved. + exist_ok: When True and an existing .fits file is found, then + the Image is created from the .fits file and not from the .ms file. Returns: Dirty Image """ if fits_path is None: - fh = FileHandler.get_file_handler( - obj=self, - prefix=self._fh_prefix, - verbose=self._fh_verbose, + tmp_dir = FileHandler().get_tmp_dir( + prefix="dirty-", + purpose="saving dirty.fits", + unique=self, ) - fits_path = os.path.join(fh.subdir, "dirty.fits") + fits_path = os.path.join(tmp_dir, "dirty.fits") + else: + check_ending(path=fits_path, ending=".fits") + + if os.path.exists(fits_path): + if not exist_ok: + raise FileExistsError(f"{fits_path} already exists.") + else: + return Image(path=fits_path) block_visibilities = create_visibility_from_ms( str(self.visibility.ms_file_path) @@ -241,7 +252,7 @@ def get_dirty_image(self, fits_path: Optional[FilePathType] = None) -> Image: override_cellsize=self.override_cellsize, ) dirty, _ = invert_visibility(visibility, model, context="2d") - dirty.image_acc.export_to_fits(fits_file=f"{fits_path}") + dirty.image_acc.export_to_fits(fits_file=fits_path) image = Image(path=fits_path) return image @@ -275,6 +286,7 @@ def imaging_rascil( clean_restore_overlap: int = 32, clean_restore_taper: CleanTaperType = "tukey", clean_restored_output: CleanRestoredOutputType = "list", + exist_ok: bool = False, ) -> Tuple[Image, Image, Image]: """Starts imaging process using RASCIL using CLEAN. @@ -314,10 +326,59 @@ def imaging_rascil( restore step (none, linear or tukey). clean_restored_output: Type of restored image output: taylor, list, or integrated. + exist_ok: Whether it's ok if the output-images already exist. Returns: deconvolved, restored, residual """ + tmp_dir = "" # workaround for Unbound complaints + if ( + deconvolved_fits_path is None + or restored_fits_path is None + or residual_fits_path is None + ): + tmp_dir = FileHandler().get_tmp_dir( + prefix="imaging-", + purpose="disk-cache for non-specified .fits files.", + unique=self, + ) + + if deconvolved_fits_path is None: + deconvolved_fits_path = os.path.join(tmp_dir, "deconvolved.fits") + if restored_fits_path is None: + restored_fits_path = os.path.join(tmp_dir, "restored.fits") + if residual_fits_path is None: + residual_fits_path = os.path.join(tmp_dir, "residual.fits") + + # handle if already existing images could be used before computing anything + if ( + os.path.exists(deconvolved_fits_path) + or os.path.exists(restored_fits_path) + or os.path.exists(residual_fits_path) + ): + if ( + os.path.exists(deconvolved_fits_path) + and os.path.exists(restored_fits_path) + and os.path.exists(residual_fits_path) + ): + if exist_ok: + deconvolved_image = Image(path=deconvolved_fits_path) + restored_image = Image(path=restored_fits_path) + residual_image = Image(path=residual_fits_path) + + return deconvolved_image, restored_image, residual_image + else: + raise RuntimeError( + f"{deconvolved_fits_path=}, {restored_fits_path=}, " + + f"{residual_fits_path=}\n exist, but this {exist_ok=}" + ) + else: + raise RuntimeError( + f"{deconvolved_fits_path=}, {restored_fits_path=}, " + + f"{residual_fits_path=}\nThey exist partially, but this " + + "use-case is not supported." + ) + if client and not use_dask: raise EnvironmentError("Client passed but use_dask is False") if use_dask: @@ -390,50 +451,35 @@ def imaging_rascil( imaging_uvmin=self.imaging_uvmin, ) + # perform checks before doing anything to capture errors early + if deconvolved_fits_path is not None: + check_ending(path=deconvolved_fits_path, ending=".fits") + if restored_fits_path is not None: + check_ending(path=restored_fits_path, ending=".fits") + if residual_fits_path is not None: + check_ending(path=residual_fits_path, ending=".fits") + result = rsexecute.compute(result, sync=True) residual, restored, skymodel = result - if deconvolved_fits_path is None: - fh = FileHandler.get_file_handler( - obj=self, - prefix=self._fh_prefix, - verbose=self._fh_verbose, - ) - deconvolved_fits_path = os.path.join(fh.subdir, "deconvolved.fits") - deconvolved = [sm.image for sm in skymodel] deconvolved_image_rascil = image_gather_channels(deconvolved) - deconvolved_image_rascil.image_acc.export_to_fits( - fits_file=str(deconvolved_fits_path) - ) - deconvolved_image = Image(path=deconvolved_fits_path) - - if restored_fits_path is None: - fh = FileHandler.get_file_handler( - obj=self, - prefix=self._fh_prefix, - verbose=self._fh_verbose, - ) - restored_fits_path = os.path.join(fh.subdir, "restored.fits") if isinstance(restored, list): restored = image_gather_channels(restored) - restored.image_acc.export_to_fits(fits_file=str(restored_fits_path)) - restored_image = Image(path=restored_fits_path) - - if residual_fits_path is None: - fh = FileHandler.get_file_handler( - obj=self, - prefix=self._fh_prefix, - verbose=self._fh_verbose, - ) - residual_fits_path = os.path.join(fh.subdir, "residual.fits") residual = remove_sumwt(residual) if isinstance(residual, list): residual = image_gather_channels(residual) + + deconvolved_image_rascil.image_acc.export_to_fits( + fits_file=str(deconvolved_fits_path) + ) + restored.image_acc.export_to_fits(fits_file=str(restored_fits_path)) residual.image_acc.export_to_fits(fits_file=str(residual_fits_path)) + deconvolved_image = Image(path=deconvolved_fits_path) + restored_image = Image(path=restored_fits_path) residual_image = Image(path=residual_fits_path) return deconvolved_image, restored_image, residual_image diff --git a/karabo/performance_test/time_karabo_parallelization_by_channel.py b/karabo/performance_test/time_karabo_parallelization_by_channel.py index 0cb8afb7..11d380f4 100644 --- a/karabo/performance_test/time_karabo_parallelization_by_channel.py +++ b/karabo/performance_test/time_karabo_parallelization_by_channel.py @@ -8,8 +8,7 @@ from karabo.simulation.sky_model import SkyModel from karabo.simulation.telescope import Telescope from karabo.util.dask import DaskHandler - -# from karabo.util.file_handler import FileHandler +from karabo.util.file_handler import FileHandler def main(n_channels: int, memory_limit: Optional[int] = None) -> None: @@ -80,8 +79,7 @@ def main(n_channels: int, memory_limit: Optional[int] = None) -> None: ) file.flush() - # Clean up - # FileHandler.clean_up_fh_root() + FileHandler().clean() if __name__ == "__main__": From 666eb16ab878689e53c115d4187dd76aa9d6038f Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 17 Jan 2024 16:47:22 +0100 Subject: [PATCH 181/207] adapted interferometer.py and telescope.py to new FileHandler setup :arrow_right_hook: --- karabo/simulation/interferometer.py | 40 ++++++++++++++++------------- karabo/simulation/telescope.py | 18 ++++++------- karabo/util/file_handler.py | 18 +++++++++++++ 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/karabo/simulation/interferometer.py b/karabo/simulation/interferometer.py index 06e09905..d9599cd7 100644 --- a/karabo/simulation/interferometer.py +++ b/karabo/simulation/interferometer.py @@ -287,18 +287,16 @@ def __init__( self.ionosphere_screen_pixel_size_m = ionosphere_screen_pixel_size_m self.ionosphere_isoplanatic_screen = ionosphere_isoplanatic_screen - # FileHandler args if needed - self._fh_prefix = "interferometer_sim" - self._fh_verbose = True - @property def ms_file_path(self) -> str: ms_file_path = self._ms_file_path if ms_file_path is None: - fh = FileHandler.get_file_handler( - self, prefix=self._fh_prefix, verbose=self._fh_verbose + tmp_dir = FileHandler().get_tmp_dir( + prefix="interferometer-", + purpose="interferometer disk-cache.", + unique=self, ) - ms_file_path = os.path.join(fh.subdir, "measurements.MS") + ms_file_path = os.path.join(tmp_dir, "measurements.MS") self._ms_file_path = ms_file_path return ms_file_path @@ -310,10 +308,12 @@ def ms_file_path(self, value: str) -> None: def vis_path(self) -> str: vis_path = self._vis_path if vis_path is None: - fh = FileHandler.get_file_handler( - self, prefix=self._fh_prefix, verbose=self._fh_verbose + tmp_dir = FileHandler().get_tmp_dir( + prefix="interferometer-", + purpose="interferometer disk-cache.", + unique=self, ) - vis_path = os.path.join(fh.subdir, "visibility.vis") + vis_path = os.path.join(tmp_dir, "visibility.vis") self._vis_path = vis_path return vis_path @@ -407,12 +407,14 @@ def __run_simulation_parallized_observation( # Scatter sky array_sky = self.client.scatter(array_sky) - fh = FileHandler.get_file_handler( - self, prefix=self._fh_prefix, verbose=self._fh_verbose + tmp_dir = FileHandler().get_tmp_dir( + prefix="simulation-praallezed-observation-", + purpose="simulation praallezed observation", + unique=self, ) - ms_dir = os.path.join(fh.subdir, "measurements") + ms_dir = os.path.join(tmp_dir, "measurements") os.makedirs(ms_dir, exist_ok=True) - vis_dir = os.path.join(fh.subdir, "visibilities") + vis_dir = os.path.join(tmp_dir, "visibilities") os.makedirs(vis_dir, exist_ok=True) for observation_params in observations: start_freq = observation_params["observation"]["start_frequency_hz"] @@ -547,12 +549,14 @@ def __run_simulation_long( "`telescope.path` must be set but is None." ) - fh = FileHandler.get_file_handler( - self, prefix=self._fh_prefix, verbose=self._fh_verbose + tmp_dir = FileHandler().get_tmp_dir( + prefix="simulation-long-", + purpose="simulation long", + unique=self, ) - ms_dir = os.path.join(fh.subdir, "measurements") + ms_dir = os.path.join(tmp_dir, "measurements") os.makedirs(ms_dir, exist_ok=True) - vis_dir = os.path.join(fh.subdir, "visibilities") + vis_dir = os.path.join(tmp_dir, "visibilities") os.makedirs(vis_dir, exist_ok=True) # Loop over days diff --git a/karabo/simulation/telescope.py b/karabo/simulation/telescope.py index b4a96391..86596b56 100644 --- a/karabo/simulation/telescope.py +++ b/karabo/simulation/telescope.py @@ -114,10 +114,6 @@ class Telescope(KaraboResource): WGS84 latitude at the center of the telescope. centre_altitude : float Altitude (in meters) at the center of the telescope. - temp_dir : None - Temporary directory. - path : None - Hotfix for issue #59. """ def __init__( @@ -134,7 +130,6 @@ def __init__( altitude : float, optional Altitude (in meters) at the center of the telescope, default is 0. """ - self._fh = FileHandler(prefix="telescope", verbose=False) self.path: Optional[DirPathType] = None self.centre_longitude = longitude self.centre_latitude = latitude @@ -379,11 +374,16 @@ def get_OSKAR_telescope(self) -> OskarTelescope: Retrieve the OSKAR Telescope object from the karabo.Telescope object. :return: OSKAR Telescope object """ - - self.write_to_file(self._fh.subdir) + tmp_dir = FileHandler().get_tmp_dir( + prefix="oskar-telescope-", + purpose="saving files to disk for oskar-telescope.", + unique=self, + ) + if FileHandler.is_dir_empty(dirname=tmp_dir): + self.write_to_file(tmp_dir) tel = OskarTelescope() - tel.load(self._fh.subdir) - self.path = self._fh.subdir + tel.load(tmp_dir) + self.path = tmp_dir return tel def write_to_file(self, dir: DirPathType) -> None: diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 0b9f212f..3d56a5d0 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -252,6 +252,24 @@ def clean( if os.path.exists(dir_): shutil.rmtree(dir_) + @staticmethod + def is_dir_empty(dirname: DirPathType) -> bool: + """Checks if `dirname` is empty assuming `dirname` exists. + + Args: + dirname: Directory to check. + + Raises: + NotADirectoryError: If `dirname` is not an existing directory. + + Returns: + True if dir is empty, else False + """ + if not os.path.isdir(dirname): + raise NotADirectoryError(f"{dirname} is not an existing directory.") + is_empty = len(os.listdir(path=dirname)) == 0 + return is_empty + @staticmethod def remove_empty_dirs(term: _LongShortTermType = "short") -> None: """Removes emtpy directories in the chosen cache-dir. From 6b4ba876970a61acb60342bac25ad862f33679d1 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 17 Jan 2024 17:34:53 +0100 Subject: [PATCH 182/207] adapted visibility and sourcedetection to new FileHandler setup :raised_hands: --- karabo/simulation/visibility.py | 33 ++++++++++++++++++-------------- karabo/sourcedetection/result.py | 20 +++++++++++-------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/karabo/simulation/visibility.py b/karabo/simulation/visibility.py index 8f88f6f5..4a6114ad 100644 --- a/karabo/simulation/visibility.py +++ b/karabo/simulation/visibility.py @@ -37,19 +37,19 @@ def __init__( ------- None """ - self._fh_prefix = "visibility" - self._fh_verbose = False - if vis_path is None: - fh = FileHandler.get_file_handler( - obj=self, prefix=self._fh_prefix, verbose=self._fh_verbose + tmp_dir = "" + if vis_path is None or ms_file_path is None: + tmp_dir = FileHandler().get_tmp_dir( + prefix="visibility-", + purpose="visibility disk-cache.", + unique=self, ) - vis_path = os.path.join(fh.subdir, "visibility.vis") + + if vis_path is None: + vis_path = os.path.join(tmp_dir, "visibility.vis") self.vis_path = vis_path if ms_file_path is None: - fh = FileHandler.get_file_handler( - obj=self, prefix=self._fh_prefix, verbose=self._fh_verbose - ) - ms_file_path = os.path.join(fh.subdir, "measurements.MS") + ms_file_path = os.path.join(tmp_dir, "measurements.MS") self.ms_file_path = ms_file_path def write_to_file(self, path: FilePathType) -> None: @@ -182,8 +182,10 @@ def combine_vis( ) -> Optional[DirPathType]: print(f"Combining {len(visiblity_files)} visibilities...") if combined_ms_filepath is None: - fh = FileHandler(prefix="combine_vis", verbose=True) - combined_ms_filepath = os.path.join(fh.subdir, "combined.MS") + tmp_dir = FileHandler().get_tmp_dir( + prefix="combine-vis-", purpose="combine-vis disk-cache." + ) + combined_ms_filepath = os.path.join(tmp_dir, "combined.MS") # Initialize lists to store data out_vis, uui, vvi, wwi, time_start, time_inc, time_ave = ([] for _ in range(7)) @@ -290,8 +292,11 @@ def combine_vis_sky_chunks( ) -> Optional[DirPathType]: print(f"Combining {len(visibility_files)} visibilities...") if combined_ms_filepath is None: - fh = FileHandler(prefix="combine_vis_sky_chunks", verbose=True) - combined_ms_filepath = os.path.join(fh.subdir, "combined.MS") + tmp_dir = FileHandler().get_tmp_dir( + prefix="combine-vis-sky-chunks-", + purpose="combine-vis-sky-chunks disk-cache.", + ) + combined_ms_filepath = os.path.join(tmp_dir, "combined.MS") # Initialize lists to store data out_vis, uui, vvi, wwi, time_start, time_inc, time_ave = ([] for _ in range(7)) diff --git a/karabo/sourcedetection/result.py b/karabo/sourcedetection/result.py index 12a535aa..5f636d91 100644 --- a/karabo/sourcedetection/result.py +++ b/karabo/sourcedetection/result.py @@ -297,12 +297,12 @@ def __init__( functions on PyBDSF results :param bdsf_detection: PyBDSF result image """ - self._fh_prefix = "pybdsf_sdr" - self._fh_verbose = True - fh = FileHandler.get_file_handler( - obj=self, prefix=self._fh_prefix, verbose=self._fh_verbose + tmp_dir = FileHandler().get_tmp_dir( + prefix="pybdsf-sdr-", + purpose="pybdsf source-detection-result disk-cache.", + unique=self, ) - sources_file = os.path.join(fh.subdir, "sources.csv") + sources_file = os.path.join(tmp_dir, "sources.csv") bdsf_detection.write_catalog( outfile=sources_file, catalog_type="gaul", format="csv", clobber=True ) @@ -346,10 +346,14 @@ def __transform_bdsf_to_reduced_result_array( return sources def __get_result_image(self, image_type: str, **kwargs: Any) -> Image: - fh = FileHandler.get_file_handler( - obj=self, prefix=self._fh_prefix, verbose=self._fh_verbose + tmp_dir = FileHandler().get_tmp_dir( + prefix="pybdsf-sdr-", + purpose="pybdsf source-detection-result disk-cache.", + unique=self, ) - outfile = os.path.join(fh.subdir, "result.fits") + outfile = os.path.join(tmp_dir, f"{image_type}-result.fits") + if os.path.exists(outfile): # allow overwriting for new results + os.remove(path=outfile) self.bdsf_result.export_image( outfile=outfile, img_format="fits", From 4f0707348963189485387fd3e20fb5ce868a2fbb Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 22 Jan 2024 10:13:24 +0100 Subject: [PATCH 183/207] enhanced FileHandler get-tmp-dir with subdir & mkdir option :oncoming_automobile: --- karabo/util/file_handler.py | 72 ++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 3d56a5d0..d5fb3bf0 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -94,16 +94,27 @@ class FileHandler: In case you want to extract something specific from the cache, the path is usually printed blue & bold in stdout. + The root STM and LTM should be unique per user (seeded rnd chars+digits), thus just + having two disk-cache directories per user. + Set `FileHandler.root` to change the directory where files and dirs will be saved. The dir-structure is as follows where "tmp" is `FileHandler.root`: tmp ├── karabo-LTM--<10 rnd chars+digits> - │  ├── a-dir - │  └── another-dir + │  ├── <10 rnd chars+digits> + | | ├── + | | └── + │  └── <10 rnd chars+digits> + | ├── + | └── └── karabo-STM--<10 rnd chars+digits> - ├── a-dir - └── another-dir + ├── <10 rnd chars+digits> + | ├── + | └── + └── <10 rnd chars+digits> + ├── + └── LTM stand for long-term-memory (self.ltm) and STM for short-term-memory (self.stm). The data-products usually get into in the STM directory. @@ -150,6 +161,8 @@ def get_tmp_dir( term: Literal["short"] = "short", purpose: Union[str, None] = None, unique: object = None, + subdir: Union[DirPathType, None] = None, + mkdir: bool = True, ) -> str: ... @@ -160,6 +173,8 @@ def get_tmp_dir( term: Literal["long"], purpose: Union[str, None] = None, unique: object = None, + subdir: Union[DirPathType, None] = None, + mkdir: bool = True, ) -> str: ... @@ -169,6 +184,8 @@ def get_tmp_dir( term: _LongShortTermType = "short", purpose: Union[str, None] = None, unique: object = None, + subdir: Union[DirPathType, None] = None, + mkdir: bool = True, ) -> str: """Gets a tmp-dir path. @@ -180,53 +197,66 @@ def get_tmp_dir( purpose: Creates a verbose print-msg with it's purpose if set. unique: If an object which has attributes is provided, then you get the same tmp-dir for the unique instance. + subdir: If set, it directly creates & returns /subdir + mkdir: Make-dir directly? Returns: tmp-dir path """ - set_unique = False - obj_tmp_dir_name = "_karabo_tmp_dir" + obj_tmp_dir_short_name = "_karabo_tmp_dir_short" + tmp_dir: Union[str, None] = None # without subdir if unique is not None: if term != "short": raise RuntimeError( "`unique` not None is just supported for short-term tmp-dirs." ) try: - unique.__dict__ # just to test try-except - if hasattr(unique, obj_tmp_dir_name): - return getattr(unique, obj_tmp_dir_name) - else: - set_unique = True + unique.__dict__ # just to test try-except AttributeError + if hasattr(unique, obj_tmp_dir_short_name): + tmp_dir = getattr(unique, obj_tmp_dir_short_name) except AttributeError: raise AttributeError( "`unique` must be an object with attributes, " + f"but is of type {type(unique)} instead." ) - dir_path = self._get_term_dir(term=term) - if term == "short": + if tmp_dir is not None: + dir_path = tmp_dir + if subdir is not None: + dir_path = os.path.join(dir_path, subdir) + exist_ok = True + elif term == "short": + dir_path = self._get_term_dir(term=term) dir_name = _get_rnd_str(k=10, seed=None) if prefix is not None: dir_name = "".join((prefix, dir_name)) dir_path = os.path.join(dir_path, dir_name) - os.makedirs(dir_path, exist_ok=False) + setattr(unique, obj_tmp_dir_short_name, dir_path) self.tmps.append(dir_path) + if subdir is not None: + dir_path = os.path.join(dir_path, subdir) + exist_ok = False elif term == "long": + dir_path = self._get_term_dir(term=term) if prefix is None: raise RuntimeError( "For long-term-memory, `prefix` must be set to have unique dirs." ) - dir_name = prefix + dir_name = _get_rnd_str(k=10, seed=prefix) + dir_name = "".join((prefix, dir_name)) dir_path = os.path.join(dir_path, dir_name) - os.makedirs(dir_path, exist_ok=True) + if subdir is not None: + dir_path = os.path.join(dir_path, subdir) + exist_ok = True else: assert_never(term) - if set_unique: - setattr(unique, obj_tmp_dir_name, dir_path) - if purpose: - if len(purpose) > 0: + if not exist_ok and os.path.exists(dir_path): + raise FileExistsError(f"{dir_path} already exists") + if mkdir: + os.makedirs(dir_path, exist_ok=exist_ok) + if purpose and len(purpose) > 0: purpose = f" for {purpose}" - print(f"Creating {Font.BLUE}{Font.BOLD}{dir_path}{Font.END}{purpose}") + print(f"Creating {Font.BLUE}{Font.BOLD}{dir_path}{Font.END}{purpose}") return dir_path def clean_instance(self) -> None: From 83357d327c0df0d84df6b92fa10d75b9416bfa28 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 22 Jan 2024 14:04:25 +0100 Subject: [PATCH 184/207] adapted Karabo to new FileHandler setup :snake: --- karabo/imaging/image.py | 1 + karabo/imaging/imager.py | 94 +++++++++---------------- karabo/simulation/interferometer.py | 14 ++-- karabo/simulation/telescope.py | 14 +++- karabo/simulation/visibility.py | 17 +++-- karabo/sourcedetection/result.py | 4 +- karabo/test/conftest.py | 6 +- karabo/test/test_filehandler.py | 102 +++++++++++++--------------- karabo/util/file_handler.py | 15 +++- 9 files changed, 121 insertions(+), 146 deletions(-) diff --git a/karabo/imaging/image.py b/karabo/imaging/image.py index 5f283e3c..add751c6 100644 --- a/karabo/imaging/image.py +++ b/karabo/imaging/image.py @@ -86,6 +86,7 @@ def __init__( tmp_dir = FileHandler().get_tmp_dir( prefix="Image-", purpose="restored fits-path", + unique=self, ) restored_fits_path = os.path.join(tmp_dir, "image.fits") diff --git a/karabo/imaging/imager.py b/karabo/imaging/imager.py index 3a3ff544..491d454b 100644 --- a/karabo/imaging/imager.py +++ b/karabo/imaging/imager.py @@ -23,7 +23,6 @@ from ska_sdp_func_python.visibility import convert_visibility_to_stokesI from karabo.data.external_data import MGCLSContainerDownloadObject -from karabo.error import KaraboError from karabo.imaging.image import Image from karabo.simulation.sky_model import SkyModel from karabo.simulation.visibility import Visibility @@ -210,34 +209,27 @@ def __init__( def get_dirty_image( self, fits_path: Optional[FilePathType] = None, - exist_ok: bool = False, ) -> Image: """Get Dirty Image of visibilities passed to the Imager. + Note: If `fits_path` is provided and already exists, then this function will + overwrite `fits_path`. + Args: fits_path: Path to where the .fits file will get saved. - exist_ok: When True and an existing .fits file is found, then - the Image is created from the .fits file and not from the .ms file. Returns: Dirty Image """ if fits_path is None: tmp_dir = FileHandler().get_tmp_dir( - prefix="dirty-", - purpose="saving dirty.fits", - unique=self, + prefix="Imager-Dirty-", + purpose="disk-cache for dirty.fits", ) fits_path = os.path.join(tmp_dir, "dirty.fits") else: check_ending(path=fits_path, ending=".fits") - if os.path.exists(fits_path): - if not exist_ok: - raise FileExistsError(f"{fits_path} already exists.") - else: - return Image(path=fits_path) - block_visibilities = create_visibility_from_ms( str(self.visibility.ms_file_path) ) @@ -252,6 +244,8 @@ def get_dirty_image( override_cellsize=self.override_cellsize, ) dirty, _ = invert_visibility(visibility, model, context="2d") + if os.path.exists(fits_path): + os.remove(fits_path) dirty.image_acc.export_to_fits(fits_file=fits_path) image = Image(path=fits_path) @@ -286,10 +280,13 @@ def imaging_rascil( clean_restore_overlap: int = 32, clean_restore_taper: CleanTaperType = "tukey", clean_restored_output: CleanRestoredOutputType = "list", - exist_ok: bool = False, ) -> Tuple[Image, Image, Image]: """Starts imaging process using RASCIL using CLEAN. + Note: For `deconvolved_fits_path`, `restored_fits_path` & `residual_fits_path`, + if one or more of them are provided and already exist on the disk, then + they will get overwritten if the imaging succeeds. + Clean args see https://developer.skao.int/_/downloads/rascil/en/latest/pdf/ Args: @@ -326,58 +323,31 @@ def imaging_rascil( restore step (none, linear or tukey). clean_restored_output: Type of restored image output: taylor, list, or integrated. - exist_ok: Whether it's ok if the output-images already exist. Returns: deconvolved, restored, residual """ - tmp_dir = "" # workaround for Unbound complaints + if deconvolved_fits_path is not None: + check_ending(path=deconvolved_fits_path, ending=".fits") + if restored_fits_path is not None: + check_ending(path=deconvolved_fits_path, ending=".fits") + if residual_fits_path is not None: + check_ending(path=residual_fits_path, ending=".fits") if ( deconvolved_fits_path is None or restored_fits_path is None or residual_fits_path is None ): tmp_dir = FileHandler().get_tmp_dir( - prefix="imaging-", + prefix="Imaging-Rascil-", purpose="disk-cache for non-specified .fits files.", - unique=self, ) - - if deconvolved_fits_path is None: - deconvolved_fits_path = os.path.join(tmp_dir, "deconvolved.fits") - if restored_fits_path is None: - restored_fits_path = os.path.join(tmp_dir, "restored.fits") - if residual_fits_path is None: - residual_fits_path = os.path.join(tmp_dir, "residual.fits") - - # handle if already existing images could be used before computing anything - if ( - os.path.exists(deconvolved_fits_path) - or os.path.exists(restored_fits_path) - or os.path.exists(residual_fits_path) - ): - if ( - os.path.exists(deconvolved_fits_path) - and os.path.exists(restored_fits_path) - and os.path.exists(residual_fits_path) - ): - if exist_ok: - deconvolved_image = Image(path=deconvolved_fits_path) - restored_image = Image(path=restored_fits_path) - residual_image = Image(path=residual_fits_path) - - return deconvolved_image, restored_image, residual_image - else: - raise RuntimeError( - f"{deconvolved_fits_path=}, {restored_fits_path=}, " - + f"{residual_fits_path=}\n exist, but this {exist_ok=}" - ) - else: - raise RuntimeError( - f"{deconvolved_fits_path=}, {restored_fits_path=}, " - + f"{residual_fits_path=}\nThey exist partially, but this " - + "use-case is not supported." - ) + if deconvolved_fits_path is None: + deconvolved_fits_path = os.path.join(tmp_dir, "deconvolved.fits") + if restored_fits_path is None: + restored_fits_path = os.path.join(tmp_dir, "restored.fits") + if residual_fits_path is None: + residual_fits_path = os.path.join(tmp_dir, "residual.fits") if client and not use_dask: raise EnvironmentError("Client passed but use_dask is False") @@ -391,7 +361,7 @@ def imaging_rascil( rsexecute.set_client(use_dask=use_dask, client=client, use_dlg=False) if self.ingest_vis_nchan is None: - raise KaraboError("`ingest_vis_nchan` is None but must be of type 'int'.") + raise ValueError("`ingest_vis_nchan` is None but must be of type 'int'.") blockviss = create_visibility_from_ms_rsexecute( msname=str(self.visibility.ms_file_path), @@ -451,14 +421,6 @@ def imaging_rascil( imaging_uvmin=self.imaging_uvmin, ) - # perform checks before doing anything to capture errors early - if deconvolved_fits_path is not None: - check_ending(path=deconvolved_fits_path, ending=".fits") - if restored_fits_path is not None: - check_ending(path=restored_fits_path, ending=".fits") - if residual_fits_path is not None: - check_ending(path=residual_fits_path, ending=".fits") - result = rsexecute.compute(result, sync=True) residual, restored, skymodel = result @@ -473,10 +435,16 @@ def imaging_rascil( if isinstance(residual, list): residual = image_gather_channels(residual) + if os.path.exists(deconvolved_fits_path): + os.remove(deconvolved_fits_path) deconvolved_image_rascil.image_acc.export_to_fits( fits_file=str(deconvolved_fits_path) ) + if os.path.exists(restored_fits_path): + os.remove(restored_fits_path) restored.image_acc.export_to_fits(fits_file=str(restored_fits_path)) + if os.path.exists(residual_fits_path): + os.remove(residual_fits_path) residual.image_acc.export_to_fits(fits_file=str(residual_fits_path)) deconvolved_image = Image(path=deconvolved_fits_path) restored_image = Image(path=restored_fits_path) diff --git a/karabo/simulation/interferometer.py b/karabo/simulation/interferometer.py index d9599cd7..5d627c35 100644 --- a/karabo/simulation/interferometer.py +++ b/karabo/simulation/interferometer.py @@ -409,13 +409,12 @@ def __run_simulation_parallized_observation( array_sky = self.client.scatter(array_sky) tmp_dir = FileHandler().get_tmp_dir( prefix="simulation-praallezed-observation-", - purpose="simulation praallezed observation", - unique=self, + purpose="disk-cache simulation-praallezed-observation", ) ms_dir = os.path.join(tmp_dir, "measurements") - os.makedirs(ms_dir, exist_ok=True) + os.makedirs(ms_dir, exist_ok=False) vis_dir = os.path.join(tmp_dir, "visibilities") - os.makedirs(vis_dir, exist_ok=True) + os.makedirs(vis_dir, exist_ok=False) for observation_params in observations: start_freq = observation_params["observation"]["start_frequency_hz"] ms_file_path = os.path.join(ms_dir, f"start_freq_{start_freq}.MS") @@ -551,13 +550,12 @@ def __run_simulation_long( tmp_dir = FileHandler().get_tmp_dir( prefix="simulation-long-", - purpose="simulation long", - unique=self, + purpose="disk-cache simulation-long", ) ms_dir = os.path.join(tmp_dir, "measurements") - os.makedirs(ms_dir, exist_ok=True) + os.makedirs(ms_dir, exist_ok=False) vis_dir = os.path.join(tmp_dir, "visibilities") - os.makedirs(vis_dir, exist_ok=True) + os.makedirs(vis_dir, exist_ok=False) # Loop over days for i, current_date in enumerate( diff --git a/karabo/simulation/telescope.py b/karabo/simulation/telescope.py index 86596b56..45c48996 100644 --- a/karabo/simulation/telescope.py +++ b/karabo/simulation/telescope.py @@ -372,15 +372,23 @@ def plot_telescope_OSKAR(self, file: Optional[str] = None) -> None: def get_OSKAR_telescope(self) -> OskarTelescope: """ Retrieve the OSKAR Telescope object from the karabo.Telescope object. + + Note: Once this function is called, it returns the same `OskarTelescope` + for each function call bound to this object-instance. Thus, changing + Telescope-parameters on this instance after calling this function + won't have an effect on the returned `OskarTelescope` anymore. + :return: OSKAR Telescope object """ tmp_dir = FileHandler().get_tmp_dir( prefix="oskar-telescope-", - purpose="saving files to disk for oskar-telescope.", + purpose="oskar-telescope disk-cache", unique=self, + subdir="oskar-telescope", # in case other files should get cached by self ) - if FileHandler.is_dir_empty(dirname=tmp_dir): - self.write_to_file(tmp_dir) + if not FileHandler.is_dir_empty(dirname=tmp_dir): + FileHandler.empty_dir(dir_path=tmp_dir) + self.write_to_file(tmp_dir) tel = OskarTelescope() tel.load(tmp_dir) self.path = tmp_dir diff --git a/karabo/simulation/visibility.py b/karabo/simulation/visibility.py index 4a6114ad..39885d78 100644 --- a/karabo/simulation/visibility.py +++ b/karabo/simulation/visibility.py @@ -37,19 +37,17 @@ def __init__( ------- None """ - tmp_dir = "" if vis_path is None or ms_file_path is None: tmp_dir = FileHandler().get_tmp_dir( prefix="visibility-", - purpose="visibility disk-cache.", + purpose="visibility disk-cache", unique=self, ) - - if vis_path is None: - vis_path = os.path.join(tmp_dir, "visibility.vis") + if vis_path is None: + vis_path = os.path.join(tmp_dir, "visibility.vis") + if ms_file_path is None: + ms_file_path = os.path.join(tmp_dir, "measurements.MS") self.vis_path = vis_path - if ms_file_path is None: - ms_file_path = os.path.join(tmp_dir, "measurements.MS") self.ms_file_path = ms_file_path def write_to_file(self, path: FilePathType) -> None: @@ -183,7 +181,8 @@ def combine_vis( print(f"Combining {len(visiblity_files)} visibilities...") if combined_ms_filepath is None: tmp_dir = FileHandler().get_tmp_dir( - prefix="combine-vis-", purpose="combine-vis disk-cache." + prefix="combine-vis-", + purpose="combine-vis disk-cache.", ) combined_ms_filepath = os.path.join(tmp_dir, "combined.MS") @@ -219,7 +218,7 @@ def combine_vis( ) # Write combined visibility data - print("### Writing combined visibilities in ", combined_ms_filepath) + print(f"### Writing combined visibilities in {combined_ms_filepath} ...") num_files = len(visiblity_files) if group_by == "day": diff --git a/karabo/sourcedetection/result.py b/karabo/sourcedetection/result.py index 5f636d91..874d3798 100644 --- a/karabo/sourcedetection/result.py +++ b/karabo/sourcedetection/result.py @@ -299,7 +299,7 @@ def __init__( """ tmp_dir = FileHandler().get_tmp_dir( prefix="pybdsf-sdr-", - purpose="pybdsf source-detection-result disk-cache.", + purpose="pybdsf source-detection-result disk-cache", unique=self, ) sources_file = os.path.join(tmp_dir, "sources.csv") @@ -348,7 +348,7 @@ def __transform_bdsf_to_reduced_result_array( def __get_result_image(self, image_type: str, **kwargs: Any) -> Image: tmp_dir = FileHandler().get_tmp_dir( prefix="pybdsf-sdr-", - purpose="pybdsf source-detection-result disk-cache.", + purpose="pybdsf source-detection-result disk-cache", unique=self, ) outfile = os.path.join(tmp_dir, f"{image_type}-result.fits") diff --git a/karabo/test/conftest.py b/karabo/test/conftest.py index 3d172dc1..609508fd 100644 --- a/karabo/test/conftest.py +++ b/karabo/test/conftest.py @@ -16,7 +16,6 @@ IS_GITHUB_RUNNER = os.environ.get("IS_GITHUB_RUNNER", "false").lower() == "true" RUN_GPU_TESTS = os.environ.get("RUN_GPU_TESTS", "false").lower() == "true" -file_handler_test_dir = os.path.join(os.path.dirname(__file__), "karabo_test") def pytest_addoption(parser: Parser) -> None: @@ -122,16 +121,15 @@ def tobject() -> TFiles: @pytest.fixture(scope="function", autouse=True) def clean_disk() -> Generator[None, None, None]: - """Automatically clears FileHandler.root after each test. + """Automatically clears FileHandler's short-term-memory after each test. Needed in some cases where the underlying functions do use FileHanlder which could lead to IOError because of disk-space limitations. """ # Setup: fill with logic - FileHandler.root = file_handler_test_dir yield # testing happens here # Teardown: fill with logic - FileHandler.clean_up_fh_root(force=True, verbose=False) + FileHandler().clean() plt.close("all") diff --git a/karabo/test/test_filehandler.py b/karabo/test/test_filehandler.py index a670bc8a..2564743d 100644 --- a/karabo/test/test_filehandler.py +++ b/karabo/test/test_filehandler.py @@ -2,79 +2,69 @@ import os import tempfile -from karabo.test.conftest import file_handler_test_dir from karabo.util.file_handler import FileHandler -def test_file_handler_global(): +def test_file_handler(): """Test global FileHanlder functionality.""" with tempfile.TemporaryDirectory() as tmpdir: - # test root - assert FileHandler.root == file_handler_test_dir FileHandler.root = tmpdir - - # add 2 dirs created through FileHanlder with and without random content - fh1 = FileHandler(prefix="my_domain", verbose=True) - with open(os.path.join(fh1.subdir, "my_json.json"), "w") as outfile1: + assert FileHandler.is_dir_empty(dirname=tmpdir) + assert len(os.listdir(tmpdir)) == 0 + tmpdir_fh1 = FileHandler().get_tmp_dir( + prefix="dummy-", + purpose="test-file-handler-global disk-cache", + ) + assert len(os.listdir(tmpdir)) == 1 + assert not FileHandler.is_dir_empty(dirname=tmpdir) + assert len(os.listdir(FileHandler.stm)) == 1 + json_path = os.path.join(tmpdir_fh1, "my_json.json") + with open(json_path, "w") as outfile1: json.dump({"A": "B"}, outfile1) - _ = FileHandler(prefix="my_other_domain", verbose=False) + assert os.path.exists(json_path) + fh_instance = FileHandler() + _ = fh_instance.get_tmp_dir( + prefix="dummy-", # same name as fh1 is intentional + subdir="dummy-dir", + ) + assert len(os.listdir(FileHandler.stm)) == 2 + _ = fh_instance.get_tmp_dir( + mkdir=False, + ) + assert len(os.listdir(FileHandler.stm)) == 2 + _ = FileHandler().get_tmp_dir( + term="long", + subdir="dummy-dir", + ) assert len(os.listdir(tmpdir)) == 2 + assert len(os.listdir(FileHandler.ltm)) == 1 + assert len(os.listdir(FileHandler.stm)) == 2 - # create 3 additional random other dirs and files on `tmpdir` level - # and fill one dir with random content - os.mkdir(path=os.path.join(tmpdir, "my_dir1")) - with open(os.path.join(tmpdir, "my_dir1", "my_json.json"), "w") as outfile2: - json.dump({"A": "B"}, outfile2) - os.mkdir(path=os.path.join(tmpdir, "my_dir2")) - with open(os.path.join(tmpdir, "my_root_json.json"), "w") as outfile3: - json.dump({"A": "B"}, outfile3) - assert len(os.listdir(tmpdir)) == 5 + fh_instance.clean_instance() + assert len(os.listdir(FileHandler.stm)) == 1 - # test removal of dirs not created from FileHandler - FileHandler.clean_up_fh_root(force=False, verbose=True) - assert len(os.listdir(tmpdir)) == 3 - # test removal of emtpy (remaining) dirs - FileHandler.remove_empty_dirs(consider_fh_dir_identifier=False) - assert len(os.listdir(tmpdir)) == 2 # 1 file & 1 non-empty dir - # test removal of FileHandler root - FileHandler.clean_up_fh_root(force=True, verbose=False) - assert not os.path.exists(tmpdir) + empty_path = FileHandler.get_tmp_dir() + _ = FileHandler.get_tmp_dir() + assert len(os.listdir(FileHandler.stm)) == 3 + FileHandler.empty_dir(dir_path=empty_path) + assert len(os.listdir(FileHandler.stm)) == 2 - -def test_file_handler_instances(): - """Test instance bound dir creation and removal.""" - with tempfile.TemporaryDirectory() as tmpdir: - FileHandler.root = tmpdir - fh1 = FileHandler(prefix="my_domain", verbose=True) - assert len(os.listdir(tmpdir)) == 1 - fh2 = FileHandler() - assert len(os.listdir(tmpdir)) == 2 - fh1.clean_up() - assert len(os.listdir(tmpdir)) == 1 - fh2.clean_up() - assert not os.path.exists(tmpdir) - with FileHandler() as fhdir: - assert os.path.exists(fhdir) - assert len(os.listdir(tmpdir)) == 1 - assert not os.path.exists(tmpdir) + FileHandler.clean() + assert len(os.listdir(FileHandler.stm)) == 0 -def test_get_file_handler(): +def test_object_bound_file_handler(): """Test obj unique FileHandler creation.""" - prefix = "my_domain" class MyClass: ... with tempfile.TemporaryDirectory() as tmpdir: FileHandler.root = tmpdir - my_obj1 = MyClass() - my_obj2 = MyClass() - _ = FileHandler.get_file_handler(obj=my_obj1, prefix=prefix, verbose=True) - assert len(os.listdir(tmpdir)) == 1 - _ = FileHandler.get_file_handler(obj=my_obj1, prefix=prefix, verbose=False) - len(os.listdir(tmpdir)) - assert len(os.listdir(tmpdir)) == 1 - _ = FileHandler.get_file_handler(obj=my_obj2, prefix=prefix, verbose=False) - len(os.listdir(tmpdir)) - assert len(os.listdir(tmpdir)) == 2 + my_obj = MyClass() + assert len(os.listdir(FileHandler.stm)) == 0 + tmpdir_fh1 = FileHandler().get_tmp_dir(unique=my_obj) + assert len(os.listdir(FileHandler.stm)) == 1 + tmpdir_fh2 = FileHandler().get_tmp_dir(unique=my_obj) + assert len(os.listdir(FileHandler.stm)) == 1 + assert tmpdir_fh1 == tmpdir_fh2 diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index d5fb3bf0..f17d73b8 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -252,7 +252,7 @@ def get_tmp_dir( assert_never(term) if not exist_ok and os.path.exists(dir_path): raise FileExistsError(f"{dir_path} already exists") - if mkdir: + if mkdir and not os.path.exists(dir_path): os.makedirs(dir_path, exist_ok=exist_ok) if purpose and len(purpose) > 0: purpose = f" for {purpose}" @@ -313,6 +313,19 @@ def remove_empty_dirs(term: _LongShortTermType = "short") -> None: if os.path.isdir(path) and len(os.listdir(path=path)) == 0: shutil.rmtree(path=path) + @staticmethod + def empty_dir(dir_path: DirPathType) -> None: + """Deletes all contents of `dir_path`, but not the directory itself. + + This function assumes that all filed and directories are owned by + the function-user. + + Args: + dir_path: Directory to empty. + """ + shutil.rmtree(dir_path) + os.makedirs(dir_path, exist_ok=False) + def __enter__(self) -> str: return self.get_tmp_dir(prefix=None, term="short") From b7d88e281840d1100672a3370ebb96637420b550 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 22 Jan 2024 14:37:04 +0100 Subject: [PATCH 185/207] bugfix accessing .ltm and .stm through FileHandler :tongue: --- karabo/util/file_handler.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index f17d73b8..6daf5bc6 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -116,40 +116,28 @@ class FileHandler: ├── └── - LTM stand for long-term-memory (self.ltm) and STM for short-term-memory (self.stm). - The data-products usually get into in the STM directory. + LTM stand for long-term-memory (FileHandler.ltm) and STM for short-term-memory + (FileHandler.stm). The data-products usually get into in the STM directory. FileHanlder can be used the same way as `tempfile.TemporaryDirectory` using `with`. """ root: str = _get_tmp_dir() + ltm = os.path.join(root, _get_cache_dir(term="long")) + stm = os.path.join(root, _get_cache_dir(term="short")) def __init__( self, ) -> None: """Creates `FileHandler` instance.""" - self._ltm_dir_name = _get_cache_dir(term="long") - self._stm_dir_name = _get_cache_dir(term="short") # tmps is an instance bound dirs and/or files registry for STM self.tmps: list[str] = list() - @property - def ltm(self) -> str: - ltm_path = os.path.join(FileHandler.root, self._ltm_dir_name) - os.makedirs(ltm_path, exist_ok=True) - return ltm_path - - @property - def stm(self) -> str: - stm_path = os.path.join(FileHandler.root, self._stm_dir_name) - os.makedirs(stm_path, exist_ok=True) - return stm_path - def _get_term_dir(self, term: _LongShortTermType) -> str: if term == "short": - dir_ = self.stm + dir_ = FileHandler.stm elif term == "long": - dir_ = self.ltm + dir_ = FileHandler.ltm else: assert_never(term) return dir_ @@ -231,7 +219,8 @@ def get_tmp_dir( if prefix is not None: dir_name = "".join((prefix, dir_name)) dir_path = os.path.join(dir_path, dir_name) - setattr(unique, obj_tmp_dir_short_name, dir_path) + if unique is not None: + setattr(unique, obj_tmp_dir_short_name, dir_path) self.tmps.append(dir_path) if subdir is not None: dir_path = os.path.join(dir_path, subdir) From e106a4aeb1f82c14da43390d8bdfd486b6be081b Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 22 Jan 2024 15:17:50 +0100 Subject: [PATCH 186/207] bugfixes FileHandler & according tests :memo: --- .../time_karabo_parallelization_by_channel.py | 2 +- karabo/test/conftest.py | 2 +- karabo/test/test_filehandler.py | 47 +++++++++++++------ karabo/util/file_handler.py | 30 +++++++----- 4 files changed, 53 insertions(+), 28 deletions(-) diff --git a/karabo/performance_test/time_karabo_parallelization_by_channel.py b/karabo/performance_test/time_karabo_parallelization_by_channel.py index 11d380f4..53005941 100644 --- a/karabo/performance_test/time_karabo_parallelization_by_channel.py +++ b/karabo/performance_test/time_karabo_parallelization_by_channel.py @@ -79,7 +79,7 @@ def main(n_channels: int, memory_limit: Optional[int] = None) -> None: ) file.flush() - FileHandler().clean() + FileHandler.clean() if __name__ == "__main__": diff --git a/karabo/test/conftest.py b/karabo/test/conftest.py index 609508fd..acbc9b51 100644 --- a/karabo/test/conftest.py +++ b/karabo/test/conftest.py @@ -129,7 +129,7 @@ def clean_disk() -> Generator[None, None, None]: # Setup: fill with logic yield # testing happens here # Teardown: fill with logic - FileHandler().clean() + FileHandler.clean() plt.close("all") diff --git a/karabo/test/test_filehandler.py b/karabo/test/test_filehandler.py index 2564743d..5ee14d16 100644 --- a/karabo/test/test_filehandler.py +++ b/karabo/test/test_filehandler.py @@ -2,6 +2,8 @@ import os import tempfile +import pytest + from karabo.util.file_handler import FileHandler @@ -17,40 +19,55 @@ def test_file_handler(): ) assert len(os.listdir(tmpdir)) == 1 assert not FileHandler.is_dir_empty(dirname=tmpdir) - assert len(os.listdir(FileHandler.stm)) == 1 + assert len(os.listdir(FileHandler.stm())) == 1 json_path = os.path.join(tmpdir_fh1, "my_json.json") with open(json_path, "w") as outfile1: json.dump({"A": "B"}, outfile1) assert os.path.exists(json_path) fh_instance = FileHandler() _ = fh_instance.get_tmp_dir( - prefix="dummy-", # same name as fh1 is intentional + prefix="dummy-", subdir="dummy-dir", ) - assert len(os.listdir(FileHandler.stm)) == 2 + assert len(os.listdir(FileHandler.stm())) == 2 _ = fh_instance.get_tmp_dir( mkdir=False, ) - assert len(os.listdir(FileHandler.stm)) == 2 + assert len(os.listdir(FileHandler.stm())) == 2 + with pytest.raises(RuntimeError): + _ = FileHandler().get_tmp_dir( + term="long", + subdir="dummy-dir", + ) _ = FileHandler().get_tmp_dir( term="long", + prefix="dummy-ltm-name", subdir="dummy-dir", ) assert len(os.listdir(tmpdir)) == 2 - assert len(os.listdir(FileHandler.ltm)) == 1 - assert len(os.listdir(FileHandler.stm)) == 2 + assert len(os.listdir(FileHandler.ltm())) == 1 + assert len(os.listdir(FileHandler.stm())) == 2 fh_instance.clean_instance() - assert len(os.listdir(FileHandler.stm)) == 1 + assert len(os.listdir(FileHandler.stm())) == 1 - empty_path = FileHandler.get_tmp_dir() - _ = FileHandler.get_tmp_dir() - assert len(os.listdir(FileHandler.stm)) == 3 + _ = FileHandler().get_tmp_dir() + fh_empty = FileHandler() + empty_path = fh_empty.get_tmp_dir() + assert len(os.listdir(FileHandler.stm())) == 3 + assert len(os.listdir(empty_path)) == 0 + json_empty_path = os.path.join(empty_path, "my_json.json") + with open(json_empty_path, "w") as outfile2: + json.dump({"A": "B"}, outfile2) + assert len(os.listdir(empty_path)) == 1 FileHandler.empty_dir(dir_path=empty_path) - assert len(os.listdir(FileHandler.stm)) == 2 + assert len(os.listdir(empty_path)) == 0 + assert len(os.listdir(FileHandler.stm())) == 3 + fh_empty.clean_instance() + assert len(os.listdir(FileHandler.stm())) == 2 FileHandler.clean() - assert len(os.listdir(FileHandler.stm)) == 0 + assert not os.path.exists(FileHandler.stm()) def test_object_bound_file_handler(): @@ -62,9 +79,9 @@ class MyClass: with tempfile.TemporaryDirectory() as tmpdir: FileHandler.root = tmpdir my_obj = MyClass() - assert len(os.listdir(FileHandler.stm)) == 0 + assert not os.path.exists(FileHandler.stm()) tmpdir_fh1 = FileHandler().get_tmp_dir(unique=my_obj) - assert len(os.listdir(FileHandler.stm)) == 1 + assert len(os.listdir(FileHandler.stm())) == 1 tmpdir_fh2 = FileHandler().get_tmp_dir(unique=my_obj) - assert len(os.listdir(FileHandler.stm)) == 1 + assert len(os.listdir(FileHandler.stm())) == 1 assert tmpdir_fh1 == tmpdir_fh2 diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 6daf5bc6..770f0fbc 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -116,15 +116,22 @@ class FileHandler: ├── └── - LTM stand for long-term-memory (FileHandler.ltm) and STM for short-term-memory - (FileHandler.stm). The data-products usually get into in the STM directory. + LTM stand for long-term-memory (FileHandler.ltm()) and STM for short-term-memory + (FileHandler.stm()). The data-products usually get into in the STM directory. FileHanlder can be used the same way as `tempfile.TemporaryDirectory` using `with`. """ root: str = _get_tmp_dir() - ltm = os.path.join(root, _get_cache_dir(term="long")) - stm = os.path.join(root, _get_cache_dir(term="short")) + + @staticmethod + def ltm() -> str: + """Gives LTM (long-term-memory) path.""" + return os.path.join(FileHandler.root, _get_cache_dir(term="long")) + + def stm() -> str: + """Gives the STM (short-term-memory) path.""" + return os.path.join(FileHandler.root, _get_cache_dir(term="short")) def __init__( self, @@ -133,11 +140,12 @@ def __init__( # tmps is an instance bound dirs and/or files registry for STM self.tmps: list[str] = list() - def _get_term_dir(self, term: _LongShortTermType) -> str: + @staticmethod + def _get_term_dir(term: _LongShortTermType) -> str: if term == "short": - dir_ = FileHandler.stm + dir_ = FileHandler.stm() elif term == "long": - dir_ = FileHandler.ltm + dir_ = FileHandler.ltm() else: assert_never(term) return dir_ @@ -214,7 +222,7 @@ def get_tmp_dir( dir_path = os.path.join(dir_path, subdir) exist_ok = True elif term == "short": - dir_path = self._get_term_dir(term=term) + dir_path = FileHandler._get_term_dir(term=term) dir_name = _get_rnd_str(k=10, seed=None) if prefix is not None: dir_name = "".join((prefix, dir_name)) @@ -226,7 +234,7 @@ def get_tmp_dir( dir_path = os.path.join(dir_path, subdir) exist_ok = False elif term == "long": - dir_path = self._get_term_dir(term=term) + dir_path = FileHandler._get_term_dir(term=term) if prefix is None: raise RuntimeError( "For long-term-memory, `prefix` must be set to have unique dirs." @@ -256,8 +264,8 @@ def clean_instance(self) -> None: shutil.rmtree(tmp) self.tmps.remove(tmp) + @staticmethod def clean( - self, term: _LongShortTermType = "short", ) -> None: """Removes the entire directory specified by `term`. @@ -267,7 +275,7 @@ def clean( Args: term: "long" or "short" term memory """ - dir_ = self._get_term_dir(term=term) + dir_ = FileHandler._get_term_dir(term=term) if os.path.exists(dir_): shutil.rmtree(dir_) From c59fbe3509ecf1ea407a7bab720da881b380d7b4 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 22 Jan 2024 17:10:56 +0100 Subject: [PATCH 187/207] changed Downloadobject-storage cache from site-packages to tmp :foggy: --- karabo/data/external_data.py | 65 ++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/karabo/data/external_data.py b/karabo/data/external_data.py index 4679285a..6b62b3f3 100644 --- a/karabo/data/external_data.py +++ b/karabo/data/external_data.py @@ -1,32 +1,11 @@ import os import re -import site from typing import List import requests from karabo.util._types import FilePathType - - -class KaraboCache: - """Organizes the caching of Karabo. - - Set `KaraboCache.base_path` manually for custom cache directory. - """ - - base_path: str = site.getsitepackages()[0] - use_scratch_folder_if_exist: bool = True - - if "SCRATCH" in os.environ and use_scratch_folder_if_exist: - base_path = os.environ["SCRATCH"] - - @staticmethod - def get_cache_directory(mkdir: bool = False) -> str: - cache_path = os.path.join(KaraboCache.base_path, "karabo_cache") - if mkdir and not os.path.exists(cache_path): - os.mkdir(cache_path) - return cache_path - +from karabo.util.file_handler import FileHandler cscs_base_url = "https://object.cscs.ch/v1/AUTH_1e1ed97536cf4e8f9e214c7ca2700d62" cscs_karabo_public_base_url = f"{cscs_base_url}/karabo_public" @@ -34,6 +13,22 @@ def get_cache_directory(mkdir: bool = False) -> str: class DownloadObject: + """Download handler for remote files & dirs. + + Important: There is a remote file-naming-convention, to be able to provide + updates of cached dirs/files & to simplify maintainability. + The convention for each object is _v, where the version + should be an integer, starting from 1. should be the same as . + The additional is to have a single directory for each object, so that + additional file/dir versions don't disturb the overall remote structure. + + The version of a downloaded object is determined by the current version of Karabo, + meaning that they're hard-coded. Because Karabo relies partially on remote-objects, + we don't guarantee their availability for deprecated Karabo versions. + """ + + split = "/" + def __init__( self, remote_base_url: str, @@ -59,15 +54,27 @@ def download(url: str, local_file_path: FilePathType) -> int: return response.status_code def get_object(self, remote_file_path: str, verbose: bool = True) -> str: - local_cache_dir = KaraboCache.get_cache_directory(mkdir=True) + if verbose: + purpose = "download-objects caching" + else: + purpose = None + local_cache_dir = FileHandler().get_tmp_dir( + prefix="objects-download", + term="long", + purpose=purpose, + ) local_file_path = os.path.join( local_cache_dir, - os.path.join(*remote_file_path.split("/")), # convert to local filesys sep + os.path.join( + *remote_file_path.split(DownloadObject.split) + ), # convert to local filesys.sep ) if not os.path.exists(local_file_path): - remote_url = f"{self.remote_base_url}/{remote_file_path}" + remote_url = ( + f"{self.remote_base_url}{DownloadObject.split}{remote_file_path}" + ) if verbose: - print(f"Download {remote_file_path} to {local_file_path} for caching.") + print(f"Download {remote_file_path} to {local_file_path} ...") _ = DownloadObject.download(url=remote_url, local_file_path=local_file_path) return local_file_path @@ -103,7 +110,9 @@ def get(self, verbose: bool = True) -> str: ) def is_available(self) -> bool: - remote_url = f"{self.remote_base_url}/{self.remote_file_path}" + remote_url = ( + f"{self.remote_base_url}{DownloadObject.split}{self.remote_file_path}" + ) return DownloadObject.is_url_available(url=remote_url) @@ -226,5 +235,5 @@ def __init__( ) -> None: super().__init__( remote_url=cscs_karabo_public_base_url, - regexr_pattern=f"MGCLS/{regexr_pattern}", + regexr_pattern=f"MGCLS{DownloadObject.split}{regexr_pattern}", ) From b4fd8cd758eab140533fcd92afb794d36f9b1891 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 23 Jan 2024 08:20:39 +0100 Subject: [PATCH 188/207] removed weird KaraboResource pseudo-interface from repo :hearts: --- karabo/data/external_data.py | 6 +++++- karabo/imaging/image.py | 3 +-- karabo/karabo_resource.py | 19 +------------------ karabo/simulation/telescope.py | 3 +-- karabo/simulation/visibility.py | 3 +-- karabo/sourcedetection/result.py | 3 +-- 6 files changed, 10 insertions(+), 27 deletions(-) diff --git a/karabo/data/external_data.py b/karabo/data/external_data.py index 6b62b3f3..f7904e57 100644 --- a/karabo/data/external_data.py +++ b/karabo/data/external_data.py @@ -1,5 +1,6 @@ import os import re +import shutil from typing import List import requests @@ -49,7 +50,10 @@ def download(url: str, local_file_path: FilePathType) -> int: file.write(chunk) except BaseException: # cleanup if interrupted if os.path.exists(local_file_path): - os.remove(local_file_path) + if os.path.isdir(local_file_path): + shutil.rmtree(local_file_path) + else: + os.remove(local_file_path) raise return response.status_code diff --git a/karabo/imaging/image.py b/karabo/imaging/image.py index add751c6..0a128005 100644 --- a/karabo/imaging/image.py +++ b/karabo/imaging/image.py @@ -28,7 +28,6 @@ from reproject.mosaicking import find_optimal_celestial_wcs, reproject_and_coadd from scipy.interpolate import RegularGridInterpolator -from karabo.karabo_resource import KaraboResource from karabo.util._types import FilePathType from karabo.util.file_handler import FileHandler, check_ending from karabo.util.plotting_util import get_slices @@ -40,7 +39,7 @@ matplotlib.use(previous_backend) -class Image(KaraboResource): +class Image: @overload def __init__( self, diff --git a/karabo/karabo_resource.py b/karabo/karabo_resource.py index 258248eb..1d44a9c0 100644 --- a/karabo/karabo_resource.py +++ b/karabo/karabo_resource.py @@ -3,27 +3,10 @@ import os import sys from types import TracebackType -from typing import Any, Literal, Optional, TextIO +from typing import Literal, Optional, TextIO import numpy as np - -class KaraboResource: - def write_to_file(self, path: str) -> None: - """ - Save the specified resource to disk (in format specified by resource itself) - """ - raise NotImplementedError() - - @staticmethod - def read_from_file(path: str) -> Any: - """ - Read the specified resource from disk into Karabo. - (format specified by resource itself) - """ - raise NotImplementedError() - - ErrKind = Literal["ignore", "warn", "raise", "call", "print", "log"] diff --git a/karabo/simulation/telescope.py b/karabo/simulation/telescope.py index 45c48996..ef7690f7 100644 --- a/karabo/simulation/telescope.py +++ b/karabo/simulation/telescope.py @@ -19,7 +19,6 @@ import karabo.error from karabo.error import KaraboError -from karabo.karabo_resource import KaraboResource from karabo.simulation.coordinate_helper import east_north_to_long_lat from karabo.simulation.east_north_coordinate import EastNorthCoordinate from karabo.simulation.station import Station @@ -96,7 +95,7 @@ } -class Telescope(KaraboResource): +class Telescope: """Telescope WGS84 longitude and latitude and altitude in metres centre of the telescope.png diff --git a/karabo/simulation/visibility.py b/karabo/simulation/visibility.py index 39885d78..eb2462a3 100644 --- a/karabo/simulation/visibility.py +++ b/karabo/simulation/visibility.py @@ -9,12 +9,11 @@ import oskar from numpy.typing import NDArray -from karabo.karabo_resource import KaraboResource from karabo.util._types import DirPathType, FilePathType from karabo.util.file_handler import FileHandler -class Visibility(KaraboResource): +class Visibility: def __init__( self, vis_path: Optional[FilePathType] = None, diff --git a/karabo/sourcedetection/result.py b/karabo/sourcedetection/result.py index 874d3798..743a97f3 100644 --- a/karabo/sourcedetection/result.py +++ b/karabo/sourcedetection/result.py @@ -16,7 +16,6 @@ from karabo.imaging.image import Image, ImageMosaicker from karabo.imaging.imager import Imager -from karabo.karabo_resource import KaraboResource from karabo.util.dask import DaskHandler from karabo.util.data_util import read_CSV_to_ndarray from karabo.util.file_handler import FileHandler @@ -72,7 +71,7 @@ def get_source_image(self) -> Optional[Image]: ... -class SourceDetectionResult(ISourceDetectionResult, KaraboResource): +class SourceDetectionResult(ISourceDetectionResult): def __init__( self, detected_sources: NDArray[np.float_], From 0ab6c721073552b4a190910e01e6a3f398f3bc1a Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 23 Jan 2024 11:05:02 +0100 Subject: [PATCH 189/207] removed FileHandler get-tmp-dir subdir option because seems unnecessary :fuelpump: --- karabo/data/external_data.py | 3 ++- karabo/simulation/telescope.py | 8 +++++--- karabo/test/test_filehandler.py | 3 --- karabo/util/file_handler.py | 35 ++++++++++++++++++++++----------- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/karabo/data/external_data.py b/karabo/data/external_data.py index f7904e57..12f22113 100644 --- a/karabo/data/external_data.py +++ b/karabo/data/external_data.py @@ -42,7 +42,8 @@ def download(url: str, local_file_path: FilePathType) -> int: response = requests.get(url, stream=True) response.raise_for_status() - os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + download_dir = os.path.dirname(local_file_path) + os.makedirs(download_dir, exist_ok=True) with open(local_file_path, "wb") as file: for chunk in response.iter_content( chunk_size=8192 diff --git a/karabo/simulation/telescope.py b/karabo/simulation/telescope.py index ef7690f7..0e95d11c 100644 --- a/karabo/simulation/telescope.py +++ b/karabo/simulation/telescope.py @@ -380,11 +380,13 @@ def get_OSKAR_telescope(self) -> OskarTelescope: :return: OSKAR Telescope object """ tmp_dir = FileHandler().get_tmp_dir( - prefix="oskar-telescope-", - purpose="oskar-telescope disk-cache", + prefix="telescope-", + purpose="telescope disk-cache", unique=self, - subdir="oskar-telescope", # in case other files should get cached by self + mkdir=False, ) + tmp_dir = os.path.join(tmp_dir, "oskar-telescope") + os.makedirs(tmp_dir, exist_ok=True) if not FileHandler.is_dir_empty(dirname=tmp_dir): FileHandler.empty_dir(dir_path=tmp_dir) self.write_to_file(tmp_dir) diff --git a/karabo/test/test_filehandler.py b/karabo/test/test_filehandler.py index 5ee14d16..d74a1c77 100644 --- a/karabo/test/test_filehandler.py +++ b/karabo/test/test_filehandler.py @@ -27,7 +27,6 @@ def test_file_handler(): fh_instance = FileHandler() _ = fh_instance.get_tmp_dir( prefix="dummy-", - subdir="dummy-dir", ) assert len(os.listdir(FileHandler.stm())) == 2 _ = fh_instance.get_tmp_dir( @@ -37,12 +36,10 @@ def test_file_handler(): with pytest.raises(RuntimeError): _ = FileHandler().get_tmp_dir( term="long", - subdir="dummy-dir", ) _ = FileHandler().get_tmp_dir( term="long", prefix="dummy-ltm-name", - subdir="dummy-dir", ) assert len(os.listdir(tmpdir)) == 2 assert len(os.listdir(FileHandler.ltm())) == 1 diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 770f0fbc..c9dc1250 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -25,6 +25,14 @@ def _get_tmp_dir() -> str: Honors TMPDIR and TMP environment variable(s). The only thing not allowed is a collision between the mentioned env-vars. + In a container-setup, this dir is preferably a mounted dir. For long-term-memory + so that each object doesn't have to be downloaded for each run. For + short-term-memory so that the created artifacts are locatable on the launch system. + + Singularity & Sarus container usually use a mounted /tmp. However, this is not the + default case for Docker containers. This may be a reason to put the download-objects + into /tmp of the Docker-image. + Returns: path of tmpdir """ @@ -54,7 +62,18 @@ def _get_tmp_dir() -> str: return tmpdir -def _get_rnd_str(k: int, seed: str | int | float | bytes | None = None) -> str: +def _get_rnd_str(k: int, seed: Optional[Union[str, int, float, bytes]] = None) -> str: + """Creates a random ascii+digits string with length=`k`. + + Most tmp-file tools are using a sting-length of 10. + + Args: + k: Length of random string. + seed: Seed. + + Returns: + Random generated string. + """ random.seed(seed) return "".join(random.choices(string.ascii_letters + string.digits, k=k)) @@ -64,6 +83,8 @@ def _get_cache_dir(term: _LongShortTermType) -> str: dir-name: karabo--($USER-)<10-rnd-asci-letters-and-digits> + The random-part of the cache-dir is seeded for relocation purpose. + Returns: cache-dir-name """ @@ -157,7 +178,6 @@ def get_tmp_dir( term: Literal["short"] = "short", purpose: Union[str, None] = None, unique: object = None, - subdir: Union[DirPathType, None] = None, mkdir: bool = True, ) -> str: ... @@ -169,7 +189,6 @@ def get_tmp_dir( term: Literal["long"], purpose: Union[str, None] = None, unique: object = None, - subdir: Union[DirPathType, None] = None, mkdir: bool = True, ) -> str: ... @@ -180,7 +199,6 @@ def get_tmp_dir( term: _LongShortTermType = "short", purpose: Union[str, None] = None, unique: object = None, - subdir: Union[DirPathType, None] = None, mkdir: bool = True, ) -> str: """Gets a tmp-dir path. @@ -193,14 +211,13 @@ def get_tmp_dir( purpose: Creates a verbose print-msg with it's purpose if set. unique: If an object which has attributes is provided, then you get the same tmp-dir for the unique instance. - subdir: If set, it directly creates & returns /subdir mkdir: Make-dir directly? Returns: tmp-dir path """ obj_tmp_dir_short_name = "_karabo_tmp_dir_short" - tmp_dir: Union[str, None] = None # without subdir + tmp_dir: Union[str, None] = None if unique is not None: if term != "short": raise RuntimeError( @@ -218,8 +235,6 @@ def get_tmp_dir( if tmp_dir is not None: dir_path = tmp_dir - if subdir is not None: - dir_path = os.path.join(dir_path, subdir) exist_ok = True elif term == "short": dir_path = FileHandler._get_term_dir(term=term) @@ -230,8 +245,6 @@ def get_tmp_dir( if unique is not None: setattr(unique, obj_tmp_dir_short_name, dir_path) self.tmps.append(dir_path) - if subdir is not None: - dir_path = os.path.join(dir_path, subdir) exist_ok = False elif term == "long": dir_path = FileHandler._get_term_dir(term=term) @@ -242,8 +255,6 @@ def get_tmp_dir( dir_name = _get_rnd_str(k=10, seed=prefix) dir_name = "".join((prefix, dir_name)) dir_path = os.path.join(dir_path, dir_name) - if subdir is not None: - dir_path = os.path.join(dir_path, subdir) exist_ok = True else: assert_never(term) From d8993447ede6d549d118d2ad9a360b9bbdd1d43e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 23 Jan 2024 11:16:22 +0100 Subject: [PATCH 190/207] implemented seed-option to FileHandler.get-tmp-dir :oden: --- karabo/util/file_handler.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index c9dc1250..27b98702 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -15,6 +15,7 @@ from karabo.util.plotting_util import Font _LongShortTermType = Literal["long", "short"] +_SeedType = Optional[Union[str, int, float, bytes]] def _get_tmp_dir() -> str: @@ -62,7 +63,7 @@ def _get_tmp_dir() -> str: return tmpdir -def _get_rnd_str(k: int, seed: Optional[Union[str, int, float, bytes]] = None) -> str: +def _get_rnd_str(k: int, seed: _SeedType = None) -> str: """Creates a random ascii+digits string with length=`k`. Most tmp-file tools are using a sting-length of 10. @@ -179,6 +180,7 @@ def get_tmp_dir( purpose: Union[str, None] = None, unique: object = None, mkdir: bool = True, + seed: _SeedType = None, ) -> str: ... @@ -190,6 +192,7 @@ def get_tmp_dir( purpose: Union[str, None] = None, unique: object = None, mkdir: bool = True, + seed: _SeedType = None, ) -> str: ... @@ -200,6 +203,7 @@ def get_tmp_dir( purpose: Union[str, None] = None, unique: object = None, mkdir: bool = True, + seed: _SeedType = None, ) -> str: """Gets a tmp-dir path. @@ -212,6 +216,11 @@ def get_tmp_dir( unique: If an object which has attributes is provided, then you get the same tmp-dir for the unique instance. mkdir: Make-dir directly? + seed: Seed rnd chars+digits of a STM sub-dir for relocation + purpose of different processes? Shouldn't be used for LTM sub-dirs, + unless you know what you're doing. LTM sub-dirs are already seeded with + `prefix`. However, if they are seeded for some reason, the seed is then + something like `prefix` + `seed`, which leads to different LTM sub-dirs. Returns: tmp-dir path @@ -238,7 +247,7 @@ def get_tmp_dir( exist_ok = True elif term == "short": dir_path = FileHandler._get_term_dir(term=term) - dir_name = _get_rnd_str(k=10, seed=None) + dir_name = _get_rnd_str(k=10, seed=seed) if prefix is not None: dir_name = "".join((prefix, dir_name)) dir_path = os.path.join(dir_path, dir_name) @@ -252,7 +261,9 @@ def get_tmp_dir( raise RuntimeError( "For long-term-memory, `prefix` must be set to have unique dirs." ) - dir_name = _get_rnd_str(k=10, seed=prefix) + if seed is not None: + seed = prefix + str(seed) + dir_name = _get_rnd_str(k=10, seed=seed) dir_name = "".join((prefix, dir_name)) dir_path = os.path.join(dir_path, dir_name) exist_ok = True From ebf3c8527f513cfef9bbd2f6812046202ffcd757 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 23 Jan 2024 13:09:09 +0100 Subject: [PATCH 191/207] changed ltm & stm of FileHandler from static functions to lazy class-attributes :page_facing_up: --- karabo/test/test_filehandler.py | 26 +++++++++++++------------- karabo/util/file_handler.py | 26 ++++++++++++++++---------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/karabo/test/test_filehandler.py b/karabo/test/test_filehandler.py index d74a1c77..3e6a7e6d 100644 --- a/karabo/test/test_filehandler.py +++ b/karabo/test/test_filehandler.py @@ -19,7 +19,7 @@ def test_file_handler(): ) assert len(os.listdir(tmpdir)) == 1 assert not FileHandler.is_dir_empty(dirname=tmpdir) - assert len(os.listdir(FileHandler.stm())) == 1 + assert len(os.listdir(FileHandler.stm)) == 1 json_path = os.path.join(tmpdir_fh1, "my_json.json") with open(json_path, "w") as outfile1: json.dump({"A": "B"}, outfile1) @@ -28,11 +28,11 @@ def test_file_handler(): _ = fh_instance.get_tmp_dir( prefix="dummy-", ) - assert len(os.listdir(FileHandler.stm())) == 2 + assert len(os.listdir(FileHandler.stm)) == 2 _ = fh_instance.get_tmp_dir( mkdir=False, ) - assert len(os.listdir(FileHandler.stm())) == 2 + assert len(os.listdir(FileHandler.stm)) == 2 with pytest.raises(RuntimeError): _ = FileHandler().get_tmp_dir( term="long", @@ -42,16 +42,16 @@ def test_file_handler(): prefix="dummy-ltm-name", ) assert len(os.listdir(tmpdir)) == 2 - assert len(os.listdir(FileHandler.ltm())) == 1 - assert len(os.listdir(FileHandler.stm())) == 2 + assert len(os.listdir(FileHandler.ltm)) == 1 + assert len(os.listdir(FileHandler.stm)) == 2 fh_instance.clean_instance() - assert len(os.listdir(FileHandler.stm())) == 1 + assert len(os.listdir(FileHandler.stm)) == 1 _ = FileHandler().get_tmp_dir() fh_empty = FileHandler() empty_path = fh_empty.get_tmp_dir() - assert len(os.listdir(FileHandler.stm())) == 3 + assert len(os.listdir(FileHandler.stm)) == 3 assert len(os.listdir(empty_path)) == 0 json_empty_path = os.path.join(empty_path, "my_json.json") with open(json_empty_path, "w") as outfile2: @@ -59,12 +59,12 @@ def test_file_handler(): assert len(os.listdir(empty_path)) == 1 FileHandler.empty_dir(dir_path=empty_path) assert len(os.listdir(empty_path)) == 0 - assert len(os.listdir(FileHandler.stm())) == 3 + assert len(os.listdir(FileHandler.stm)) == 3 fh_empty.clean_instance() - assert len(os.listdir(FileHandler.stm())) == 2 + assert len(os.listdir(FileHandler.stm)) == 2 FileHandler.clean() - assert not os.path.exists(FileHandler.stm()) + assert not os.path.exists(FileHandler.stm) def test_object_bound_file_handler(): @@ -76,9 +76,9 @@ class MyClass: with tempfile.TemporaryDirectory() as tmpdir: FileHandler.root = tmpdir my_obj = MyClass() - assert not os.path.exists(FileHandler.stm()) + assert not os.path.exists(FileHandler.stm) tmpdir_fh1 = FileHandler().get_tmp_dir(unique=my_obj) - assert len(os.listdir(FileHandler.stm())) == 1 + assert len(os.listdir(FileHandler.stm)) == 1 tmpdir_fh2 = FileHandler().get_tmp_dir(unique=my_obj) - assert len(os.listdir(FileHandler.stm())) == 1 + assert len(os.listdir(FileHandler.stm)) == 1 assert tmpdir_fh1 == tmpdir_fh2 diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 27b98702..3d6e70a8 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -138,22 +138,25 @@ class FileHandler: ├── └── - LTM stand for long-term-memory (FileHandler.ltm()) and STM for short-term-memory - (FileHandler.stm()). The data-products usually get into in the STM directory. + LTM stand for long-term-memory (FileHandler.ltm) and STM for short-term-memory + (FileHandler.stm). The data-products usually get into in the STM directory. FileHanlder can be used the same way as `tempfile.TemporaryDirectory` using `with`. """ root: str = _get_tmp_dir() - @staticmethod - def ltm() -> str: + @classmethod + @property + def ltm(cls) -> str: """Gives LTM (long-term-memory) path.""" - return os.path.join(FileHandler.root, _get_cache_dir(term="long")) + return os.path.join(cls.root, _get_cache_dir(term="long")) - def stm() -> str: + @classmethod + @property + def stm(cls) -> str: """Gives the STM (short-term-memory) path.""" - return os.path.join(FileHandler.root, _get_cache_dir(term="short")) + return os.path.join(cls.root, _get_cache_dir(term="short")) def __init__( self, @@ -165,9 +168,9 @@ def __init__( @staticmethod def _get_term_dir(term: _LongShortTermType) -> str: if term == "short": - dir_ = FileHandler.stm() + dir_ = FileHandler.stm elif term == "long": - dir_ = FileHandler.ltm() + dir_ = FileHandler.ltm else: assert_never(term) return dir_ @@ -254,7 +257,10 @@ def get_tmp_dir( if unique is not None: setattr(unique, obj_tmp_dir_short_name, dir_path) self.tmps.append(dir_path) - exist_ok = False + if seed is None: + exist_ok = False + else: + exist_ok = True elif term == "long": dir_path = FileHandler._get_term_dir(term=term) if prefix is None: From 5bffefa39ad6807a69a6756019d70303e14f19fe Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 23 Jan 2024 15:52:34 +0100 Subject: [PATCH 192/207] intermediate commit separating Dask & Slurm concerns in dask.py :clock5: --- karabo/__init__.py | 4 +- karabo/util/dask.py | 768 +++++++++++++++++++----------------- karabo/util/file_handler.py | 4 +- 3 files changed, 411 insertions(+), 365 deletions(-) diff --git a/karabo/__init__.py b/karabo/__init__.py index b4bc8844..cdfb9666 100644 --- a/karabo/__init__.py +++ b/karabo/__init__.py @@ -29,9 +29,9 @@ # Setup dask for slurm if "SLURM_JOB_ID" in os.environ: # ugly workaraound to not import stuff not available at build-time, but on import. - from karabo.util.dask import prepare_slurm_nodes_for_dask + from karabo.util.dask import DaskSlurmHandler - prepare_slurm_nodes_for_dask() + DaskSlurmHandler.prepare_slurm_nodes_for_dask() # set rascil data directory environment variable # see https://ska-telescope.gitlab.io/external/rascil/RASCIL_install.html diff --git a/karabo/util/dask.py b/karabo/util/dask.py index 09ca500d..17d48336 100644 --- a/karabo/util/dask.py +++ b/karabo/util/dask.py @@ -4,10 +4,12 @@ import atexit import json import os +import shutil import sys import time from collections.abc import Iterable from typing import Any, Callable, List, Optional, Tuple, Union +from warnings import warn import psutil from dask import compute, delayed # type: ignore[attr-defined] @@ -15,22 +17,11 @@ from dask_mpi import initialize from mpi4py import MPI -from karabo.error import KaraboDaskError from karabo.util._types import IntFloat from karabo.util.data_util import extract_chars_from_string, extract_digit_from_string +from karabo.util.file_handler import FileHandler from karabo.warning import KaraboWarning -DASK_INFO_FOLDER = ".karabo_dask" -DASK_INFO_FILE = "dask_info.json" -DASK_RUN_STATUS = "dask_run_status.txt" - -## -if "SLURM_JOB_ID" in os.environ: - DASK_INFO_FOLDER = os.path.join(DASK_INFO_FOLDER, str(os.environ["SLURM_JOB_ID"])) -os.makedirs(DASK_INFO_FOLDER, exist_ok=True) -DASK_INFO_ADDRESS = os.path.join(DASK_INFO_FOLDER, DASK_INFO_FILE) -DASK_RUN_STATUS = os.path.join(DASK_INFO_FOLDER, DASK_RUN_STATUS) - class DaskHandler: """ @@ -97,398 +88,453 @@ class DaskHandler: _nodes_prepared: bool = False _setup_called: bool = False - @staticmethod - def setup() -> None: - _ = DaskHandler.get_dask_client() - DaskHandler._setup_called = True - - @staticmethod - def get_dask_client() -> Client: - if MPI.COMM_WORLD.Get_size() > 1: - n_threads_per_worker = DaskHandler.n_threads_per_worker + @classmethod + def setup(cls) -> None: + _ = cls.get_dask_client() + cls._setup_called = True + + @classmethod + def get_dask_client(cls) -> Client: + if cls.dask_client is not None: + return cls.dask_client + if MPI.COMM_WORLD.Get_size() > 1: # TODO: testing of whole if-block + n_threads_per_worker = cls.n_threads_per_worker if n_threads_per_worker is None: initialize(comm=MPI.COMM_WORLD) else: initialize(nthreads=n_threads_per_worker, comm=MPI.COMM_WORLD) - DaskHandler.dask_client = Client(processes=DaskHandler.use_proccesses) - elif DaskHandler.dask_client is None: - if ( - not DaskHandler._setup_called - and is_on_slurm_cluster() - and is_first_node() - ): - print( - KaraboWarning( - "DaskHandler.setup() has to be called at the beginning " - "of the script. This could lead to unexpected behaviour " - "on a SLURM cluster if not (see documentation)." - ) - ) - if is_on_slurm_cluster() and get_number_of_nodes() > 1: - DaskHandler.dask_client = setup_dask_for_slurm( - DaskHandler.n_workers_scheduler_node, DaskHandler.memory_limit - ) - else: - DaskHandler.dask_client = get_local_dask_client( - DaskHandler.memory_limit - ) - # Write the dashboard link to a file - with open("karabo-dask-dashboard.txt", "w") as f: - f.write(DaskHandler.dask_client.dashboard_link) + cls.dask_client = Client(processes=cls.use_proccesses) # TODO: testing + if MPI.COMM_WORLD.rank == 0: + print(f"Dashboard link: {cls.dask_client.dashboard_link}", flush=True) + atexit.register(cls.dask_cleanup, cls.dask_client) + else: + cls.dask_client = cls.get_local_dask_client(cls.memory_limit) # Register cleanup function - print(f"Dashboard link: {DaskHandler.dask_client.dashboard_link}") - atexit.register(dask_cleanup, DaskHandler.dask_client) - return DaskHandler.dask_client + print(f"Dashboard link: {cls.dask_client.dashboard_link}", flush=True) + atexit.register(cls.dask_cleanup, cls.dask_client) + return cls.dask_client - @staticmethod - def should_dask_be_used(override: Optional[bool] = None) -> bool: + @classmethod + def should_dask_be_used(cls, override: Optional[bool] = None) -> bool: if override is not None: return override - elif DaskHandler.use_dask is not None: - return DaskHandler.use_dask - elif DaskHandler.dask_client is not None: - return True - elif is_on_slurm_cluster() and get_number_of_nodes() > 1: + elif cls.use_dask is not None: + return cls.use_dask + elif cls.dask_client is not None: return True else: return False + @classmethod + def calc_num_of_workers( + cls, + memory_limit: Optional[IntFloat], + ) -> int: + """Estimates the number of workers considering settings and availability. + + Args: + memory_limit: Memory constraint. + + Returns: + Etimated number of workers. + """ + if memory_limit is None: + return 1 + # Calculate number of workers + ram = psutil.virtual_memory().available / 1e9 # GB + n_workers = int(ram / memory_limit) + if ram < memory_limit: + warn( + KaraboWarning( + f"Only {ram} GB of RAM available. Requested at least " + + f"{memory_limit} GB. Setting number of " + + "workers to 1." + ) + ) + n_workers = 1 + + if n_workers > (cpu_count := psutil.cpu_count()): + warn( + KaraboWarning( + f"Only {cpu_count} CPUs available. Requested " + + f"{n_workers} workers per node. Setting number of " + + f"workers to {cpu_count}." + ) + ) + n_workers = cpu_count -def parallelize_with_dask( - iterate_function: Callable[..., Any], - iterable: Iterable[Any], - *args: Any, - **kwargs: Any, -) -> Union[Any, Tuple[Any, ...], List[Any]]: - """ - Run a function over an iterable in parallel using Dask, and gather the results. - - Parameters - ---------- - iterate_function : callable - The function to be applied to each element of the iterable. The function should - take the current element of the iterable as its first argument, followed by any - positional arguments, and then any keyword arguments. - - iterable : iterable - The iterable over which the function will be applied. Each element of this - iterable will be passed to the `iterate_function`. - - *args : tuple - Positional arguments that will be passed to the `iterate_function` after the - current element of the iterable. - - **kwargs : dict - Keyword arguments that will be passed to the `iterate_function`. - - Returns - ------- - tuple - A tuple containing the results of the `iterate_function` for each element in the - iterable. The results are gathered using Dask's compute function. - - Notes - ----- - - If 'verbose' is present in **kwargs and is set to True, additional progress - messages will be printed. - - This function utilizes the distributed scheduler of Dask. - """ - if not DaskHandler._setup_called: - DaskHandler.setup() - - delayed_results = [] - - for element in iterable: - if "verbose" in kwargs and kwargs["verbose"]: - print(f"Processing element {element}...\nExtracting data...") - - delayed_result = delayed(iterate_function)(element, *args, **kwargs) - delayed_results.append(delayed_result) - - return compute(*delayed_results, scheduler="distributed") - - -def dask_cleanup(client: Client) -> None: - # Renove run status file - if os.path.exists(DASK_RUN_STATUS): - os.remove(DASK_RUN_STATUS) - - # Wait for nannys to shut down - time.sleep(10) - - # Remove the scheduler file if somehow it was not removed - if os.path.exists(DASK_INFO_ADDRESS): - os.remove(DASK_INFO_ADDRESS) - - # Remove the dashboard file if somehow it was not removed - if os.path.exists("karabo-dask-dashboard.txt"): - os.remove("karabo-dask-dashboard.txt") - - if client is not None: - client.shutdown() - client.close() - - -def prepare_slurm_nodes_for_dask() -> None: - # Detect if we are on a slurm cluster - if not is_on_slurm_cluster() or get_number_of_nodes() <= 1: - DaskHandler.use_dask = False - return - elif ( - is_first_node() - and DaskHandler.dask_client is None - and not DaskHandler._nodes_prepared - ): - DaskHandler._nodes_prepared = True - slurm_job_nodelist = check_env_var( - var="SLURM_JOB_NODELIST", fun=prepare_slurm_nodes_for_dask - ) - print( - f""" - Preparing SLURM nodes for dask... - First Node, containing the scheduler, is: {get_node_name()} - With the help of dask, the following nodes will be used: - {slurm_job_nodelist} - """ - ) - - elif not is_first_node() and not DaskHandler._nodes_prepared: - # TODO: Here setup_nannies_workers_for_slurm() could be called - # but there is no if name == main guard in this file. - pass - - -def calculate_number_of_workers_per_node( - memory_limit: Optional[IntFloat], -) -> int: - if memory_limit is None: - return 1 - # Calculate number of workers per node - ram = psutil.virtual_memory().available / 1e9 # GB - n_workers_per_node = int(ram / (memory_limit)) - if ram < memory_limit: - KaraboWarning( - f"Only {ram} GB of RAM available. Requested at least " - f"{memory_limit} GB. Setting number of " - f"workers per node to 1." - ) - n_workers_per_node = 1 + return n_workers - if n_workers_per_node > psutil.cpu_count(): - KaraboWarning( - f"Only {psutil.cpu_count()} CPUs available. Requested " - f"{n_workers_per_node} workers per node. Setting number of " - f"workers per node to {psutil.cpu_count()}." - ) - n_workers_per_node = psutil.cpu_count() - - return n_workers_per_node - - -def get_local_dask_client( - memory_limit: Optional[IntFloat], -) -> Client: - # Calculate number of workers per node - n_workers = calculate_number_of_workers_per_node(memory_limit) - client = Client( - n_workers=n_workers, - threads_per_worker=DaskHandler.n_threads_per_worker, - processes=DaskHandler.use_proccesses, - ) - return client - - -def setup_nannies_workers_for_slurm() -> None: - # Wait until dask info file is created - while not os.path.exists(DASK_INFO_ADDRESS): - time.sleep(1) - - # Load dask info file - with open(DASK_INFO_ADDRESS, "r") as f: - dask_info = json.load(f) - - # Calculate memory usage of each worker - if DaskHandler.memory_limit is None: - memory_limit = f"{psutil.virtual_memory().available / 1e9}GB" - else: - memory_limit = f"{DaskHandler.memory_limit}GB" - - async def start_worker(scheduler_address: str) -> Worker: - worker = await Worker( - scheduler_address, - nthreads=DaskHandler.n_threads_per_worker, - memory_limit=memory_limit, - ) - await worker.finished() - return worker # type: ignore - - async def start_nanny(scheduler_address: str) -> Nanny: - nanny = await Nanny( - scheduler_address, - nthreads=DaskHandler.n_threads_per_worker, - memory_limit=memory_limit, + @classmethod + def get_local_dask_client( + cls, + memory_limit: Optional[IntFloat], + ) -> Client: + # Calculate number of workers per node + n_workers = cls.calc_num_of_workers(memory_limit) + client = Client( + n_workers=n_workers, + threads_per_worker=cls.n_threads_per_worker, + processes=cls.use_proccesses, ) - await nanny.finished() - return nanny # type: ignore - - scheduler_address = str(dask_info["scheduler_address"]) - n_workers = int(str(dask_info["n_workers_per_node"])) - - # Start workers or nannies - workers_or_nannies: List[Union[Worker, Nanny]] = [] - for _ in range(n_workers): - if DaskHandler.use_workers_or_nannies == "workers": - worker = asyncio.run(start_worker(scheduler_address)) - workers_or_nannies.append(worker) - else: - nanny = asyncio.run(start_nanny(scheduler_address)) - workers_or_nannies.append(nanny) - - # Keep the process alive - while os.path.exists(DASK_RUN_STATUS): - time.sleep(1) + return client + + @classmethod + def parallelize_with_dask( + cls, + iterate_function: Callable[..., Any], + iterable: Iterable[Any], + *args: Any, + **kwargs: Any, + ) -> Union[Any, Tuple[Any, ...], List[Any]]: + """ + Run a function over an iterable in parallel using Dask, and gather the results. + + Parameters + ---------- + iterate_function : callable + The function to be applied to each element of the iterable. The function + should take the current element of the iterable as its first argument, + followed by any positional arguments, and then any keyword arguments. + + iterable : iterable + The iterable over which the function will be applied. Each element of this + iterable will be passed to the `iterate_function`. + + *args : tuple + Positional arguments that will be passed to the `iterate_function` after the + current element of the iterable. + + **kwargs : dict + Keyword arguments that will be passed to the `iterate_function`. + + Returns + ------- + tuple + A tuple containing the results of the `iterate_function` for each element in + the iterable. The results are gathered using Dask's compute function. + + Notes + ----- + - If 'verbose' is present in **kwargs and is set to True, additional progress + messages will be printed. + - This function utilizes the distributed scheduler of Dask. + """ + if not DaskHandler._setup_called: + DaskHandler.setup() + + delayed_results = [] + + for element in iterable: + if "verbose" in kwargs and kwargs["verbose"]: + print(f"Processing element {element}...\nExtracting data...") + + delayed_result = delayed(iterate_function)(element, *args, **kwargs) + delayed_results.append(delayed_result) + + return compute(*delayed_results, scheduler="distributed") + + @classmethod + def dask_cleanup(cls, client: Client) -> None: + if client is not None: + client.shutdown() + client.close() + + +class DaskSlurmHandler(DaskHandler): + """Dask & Slurm related functionality resides here.""" + + @classmethod + @property + def dask_info_address(cls) -> str: + """dask_info.json path""" + _, info_address, _ = cls._get_dask_paths_for_slurm() + return info_address + + @classmethod + @property + def dask_run_status(cls) -> str: + """dask_run_status.txt path""" + _, _, run_status = cls._get_dask_paths_for_slurm() + return run_status + + @classmethod + def get_dask_client(cls) -> Client: + if cls.dask_client is not None: + return cls.dask_client + if not cls._setup_called and cls.is_first_node(): + cls_name = cls.__name__ + warn( + KaraboWarning( + f"{cls_name}.setup() has to be called at the beginning " + + "of the script. This could lead to unexpected behaviour " + + "on a SLURM cluster if not (see documentation)." + ) + ) + if cls.get_number_of_nodes() > 1: + cls.dask_client = DaskSlurmHandler.setup_dask_for_slurm( + cls.n_workers_scheduler_node, + cls.memory_limit, + ) - # Shutdown process - for worker_or_nanny in workers_or_nannies: - result = asyncio.run(worker_or_nanny.close()) - if result == "OK": - pass - else: + @classmethod + def prepare_slurm_nodes_for_dask(cls) -> None: + # Detect if we are on a slurm cluster + if not cls.is_on_slurm_cluster() or cls.get_number_of_nodes() <= 1: + cls.use_dask = False + return + elif ( + cls.is_first_node() and cls.dask_client is None and not cls._nodes_prepared + ): + cls._nodes_prepared = True + slurm_job_nodelist = cls.get_job_nodelist() + slurm_node_name = cls.get_node_name() print( f""" - There was an issue closing the worker or nanny at - {worker_or_nanny.address} + Preparing SLURM nodes for dask... + First Node, containing the scheduler, is: {slurm_node_name} + With the help of dask, the following nodes will be used: + {slurm_job_nodelist} """ ) - # Stop the script successfully - sys.exit(0) - - -def setup_dask_for_slurm( - n_workers_scheduler_node: int, memory_limit: Optional[IntFloat] -) -> Client: - if is_first_node(): - # Create file to show that the run is still ongoing - with open(DASK_RUN_STATUS, "w") as f: - f.write("ongoing") - - # Create client and scheduler - cluster = LocalCluster( - ip=get_node_name(), - n_workers=n_workers_scheduler_node, - threads_per_worker=DaskHandler.n_threads_per_worker, - ) - dask_client = Client(cluster, proccesses=DaskHandler.use_proccesses) - - # Calculate number of workers per node - n_workers_per_node = calculate_number_of_workers_per_node(memory_limit) - - # Create dictionary with the information - dask_info = { - "scheduler_address": cluster.scheduler_address, - "n_workers_per_node": n_workers_per_node, - } - - # Write scheduler file - with open(DASK_INFO_ADDRESS, "w") as f: - json.dump(dask_info, f) + elif not cls.is_first_node() and not cls._nodes_prepared: + # TODO: Here setup_nannies_workers_for_slurm() could be called + # but there is no if name == main guard in this file. + pass - # Wait until all workers are connected - n_workers_requested = ( - get_number_of_nodes() - 1 - ) * n_workers_per_node + n_workers_scheduler_node + @classmethod + def setup_nannies_workers_for_slurm(cls) -> None: + # Wait until dask info file is created + _, dask_info_address, dask_run_status = cls._get_dask_paths_for_slurm() + while not os.path.exists(dask_info_address): + time.sleep(1) - dask_client.wait_for_workers( - n_workers=n_workers_requested, timeout=DaskHandler.TIMEOUT - ) + # Load dask info file + with open(dask_info_address, "r") as f: + dask_info = json.load(f) - print(f"All {len(dask_client.scheduler_info()['workers'])} workers connected!") - return dask_client - - else: - setup_nannies_workers_for_slurm() - return None # type: ignore - - -def extract_node_ids_from_node_list() -> List[int]: - slurm_job_nodelist = check_env_var( - var="SLURM_JOB_NODELIST", fun=extract_node_ids_from_node_list - ) - if get_number_of_nodes() == 1: - # Node name will be something like "psanagpu115" - return [extract_digit_from_string(slurm_job_nodelist)] - node_list = slurm_job_nodelist.split("[")[1].split("]")[0] - id_ranges = node_list.split(",") - node_ids = [] - for id_range in id_ranges: - if "-" in id_range: - min_id, max_id = id_range.split("-") - node_ids += [i for i in range(int(min_id), int(max_id) + 1)] + # Calculate memory usage of each worker + if cls.memory_limit is None: + memory_limit = f"{psutil.virtual_memory().available / 1e9}GB" else: - node_ids.append(int(id_range)) + memory_limit = f"{cls.memory_limit}GB" - return node_ids + async def start_worker(scheduler_address: str) -> Worker: + worker = await Worker( + scheduler_address, + nthreads=cls.n_threads_per_worker, + memory_limit=memory_limit, + ) + await worker.finished() + return worker # type: ignore + + async def start_nanny(scheduler_address: str) -> Nanny: + nanny = await Nanny( + scheduler_address, + nthreads=cls.n_threads_per_worker, + memory_limit=memory_limit, + ) + await nanny.finished() + return nanny # type: ignore + + scheduler_address = str(dask_info["scheduler_address"]) + n_workers = int(str(dask_info["n_workers_per_node"])) + + # Start workers or nannies + workers_or_nannies: List[Union[Worker, Nanny]] = [] + for _ in range(n_workers): + if cls.use_workers_or_nannies == "workers": + worker = asyncio.run(start_worker(scheduler_address)) + workers_or_nannies.append(worker) + else: + nanny = asyncio.run(start_nanny(scheduler_address)) + workers_or_nannies.append(nanny) + + # Keep the process alive + while os.path.exists(dask_run_status): + time.sleep(1) + + # Shutdown process + for worker_or_nanny in workers_or_nannies: + result = asyncio.run(worker_or_nanny.close()) + if result == "OK": + pass + else: + print( + "There was an issue closing the worker or nanny at " + + f"{worker_or_nanny.address}" + ) + # Stop the script successfully + sys.exit(0) + + @classmethod + def setup_dask_for_slurm( + cls, + n_workers_scheduler_node: int, + memory_limit: Optional[IntFloat], + ) -> Client: + if cls.is_first_node(): + _, dask_info_address, dask_run_status = cls._get_dask_paths_for_slurm() + # Create file to show that the run is still ongoing + with open(dask_run_status, "w") as f: + f.write("ongoing") + + # Create client and scheduler + cluster = LocalCluster( + ip=cls.get_node_name(), + n_workers=n_workers_scheduler_node, + threads_per_worker=cls.n_threads_per_worker, + ) + dask_client = Client(cluster, proccesses=cls.use_proccesses) -def get_min_max_of_node_id() -> Tuple[int, int]: - """ - Returns the min max from SLURM_JOB_NODELIST. - Works if it's run only on two nodes (separated with a comma) - of if it runs on more than two nodes (separated with a dash). - """ - node_list = extract_node_ids_from_node_list() - return min(node_list), max(node_list) + # Calculate number of workers per node + n_workers_per_node = cls.calc_num_of_workers(memory_limit) + # Create dictionary with the information + dask_info = { + "scheduler_address": cluster.scheduler_address, + "n_workers_per_node": n_workers_per_node, + } -def get_lowest_node_id() -> int: - return get_min_max_of_node_id()[0] + # Write scheduler file + with open(dask_info_address, "w") as f: + json.dump(dask_info, f) + # Wait until all workers are connected + n_workers_requested = ( + cls.get_number_of_nodes() - 1 + ) * n_workers_per_node + n_workers_scheduler_node -def get_base_string_node_list() -> str: - slurm_job_nodelist = check_env_var( - var="SLURM_JOB_NODELIST", fun=get_base_string_node_list - ) - if get_number_of_nodes() == 1: - return extract_chars_from_string(slurm_job_nodelist) - else: - return slurm_job_nodelist.split("[")[0] + dask_client.wait_for_workers( + n_workers=n_workers_requested, timeout=cls.TIMEOUT + ) + print( + f"All {len(dask_client.scheduler_info()['workers'])} workers connected!" + ) + return dask_client -def get_lowest_node_name() -> str: - return get_base_string_node_list() + str(get_lowest_node_id()) + else: + cls.setup_nannies_workers_for_slurm() + return None + + @classmethod + def _get_dask_paths_for_slurm(cls) -> Tuple[str, str, str]: + """Gets dask-file paths for SLURM setup. + + This needs to be a function, to enable the `FileHandler` lazy path-loading, + hence allowing path-changes at run-time. + + Returns: + dask_info_dir, dask-info-address, dask-run-status + """ + slurm_job_id = cls.get_job_id() + prefix = f"-dask-info-slurm-{slurm_job_id}-" + dask_info_dir = FileHandler().get_tmp_dir( + prefix=prefix, + purpose=f"dask-info-slurm-{slurm_job_id} disk-cache", + mkdir=True, + seed=slurm_job_id, + ) + dask_info_address = os.path.join(dask_info_dir, "dask_info.json") + dask_run_status = os.path.join(dask_info_dir, "dask_run_status.txt") + return dask_info_dir, dask_info_address, dask_run_status + + @classmethod + def extract_node_ids_from_node_list(cls) -> List[int]: + slurm_job_nodelist = cls.get_job_nodelist() + if cls.get_number_of_nodes() == 1: + # Node name will be something like "psanagpu115" + return [extract_digit_from_string(slurm_job_nodelist)] + node_list = slurm_job_nodelist.split("[")[1].split("]")[0] + id_ranges = node_list.split(",") + node_ids = [] + for id_range in id_ranges: + if "-" in id_range: + min_id, max_id = id_range.split("-") + node_ids += [i for i in range(int(min_id), int(max_id) + 1)] + else: + node_ids.append(int(id_range)) + + return node_ids + @classmethod + def dask_cleanup(cls, client: Client) -> None: + dask_info_dir, _, _ = cls._get_dask_paths_for_slurm() + if os.path.exists(dask_info_dir) and os.path.isdir(dask_info_dir): + shutil.rmtree(dask_info_dir) -def get_number_of_nodes() -> int: - n_nodes = check_env_var(var="SLURM_JOB_NUM_NODES", fun=get_number_of_nodes) - return int(n_nodes) + super(DaskSlurmHandler, cls).dask_cleanup(client=client) + @classmethod + def should_dask_be_used(cls, override: Optional[bool] = None) -> bool: + if override is not None: + return override + elif cls.use_dask is not None: + return cls.use_dask + elif cls.dask_client is not None: + return True + elif cls.is_on_slurm_cluster() and cls.get_number_of_nodes() > 1: + return True + else: + return False -def get_node_id() -> int: - # Attention, often the node id starts with a 0. - slurmd_nodename = check_env_var(var="SLURMD_NODENAME", fun=get_node_id) - len_id = len(get_base_string_node_list()) - return int(slurmd_nodename[-len_id:]) + @classmethod + def get_min_max_of_node_id(cls) -> Tuple[int, int]: + """ + Returns the min max from SLURM_JOB_NODELIST. + Works if it's run only on two nodes (separated with a comma) + of if it runs on more than two nodes (separated with a dash). + """ + node_list = cls.extract_node_ids_from_node_list() + return min(node_list), max(node_list) + + @classmethod + def get_lowest_node_id(cls) -> int: + return cls.get_min_max_of_node_id()[0] + + @classmethod + def get_base_string_node_list(cls) -> str: + slurm_job_nodelist = cls.get_job_nodelist() + if cls.get_number_of_nodes() == 1: + return extract_chars_from_string(slurm_job_nodelist) + else: + return slurm_job_nodelist.split("[")[0] + @classmethod + def get_lowest_node_name(cls) -> str: + return cls.get_base_string_node_list() + str(cls.get_lowest_node_id()) -def get_node_name() -> str: - return check_env_var(var="SLURMD_NODENAME", fun=get_node_id) + @classmethod + def get_number_of_nodes(cls) -> int: + n_nodes = os.environ["SLURM_JOB_NUM_NODES"] + return int(n_nodes) + @classmethod + def get_node_id(cls) -> int: + # Attention, often the node id starts with a 0. + slurmd_nodename = cls.get_node_name() + len_id = len(cls.get_base_string_node_list()) + return int(slurmd_nodename[-len_id:]) -def is_first_node() -> bool: - return get_node_id() == get_lowest_node_id() + @classmethod + def get_node_name(cls) -> str: + return os.environ["SLURMD_NODENAME"] + @classmethod + def is_first_node(cls) -> bool: + return cls.get_node_id() == cls.get_lowest_node_id() -def is_on_slurm_cluster() -> bool: - return "SLURM_JOB_ID" in os.environ + @classmethod + def get_job_nodelist(cls) -> str: + return os.environ["SLURM_JOB_NODELIST"] + @classmethod + def get_job_id(cls) -> str: + return os.environ["SLURM_JOB_ID"] -def check_env_var(var: str, fun: Optional[Callable[..., Any]] = None) -> str: - value = os.getenv(var) - if value is None: - suffix = "" - if fun is not None: - suffix = f" before calling `{fun.__name__}`" - error_msg = f"Environment variable '{var}' must be set" + suffix + "." - raise KaraboDaskError(error_msg) - return value + @staticmethod + def is_on_slurm_cluster() -> bool: + return "SLURM_JOB_ID" in os.environ diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 3d6e70a8..fc60e92c 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -149,13 +149,13 @@ class FileHandler: @classmethod @property def ltm(cls) -> str: - """Gives LTM (long-term-memory) path.""" + """LTM (long-term-memory) path.""" return os.path.join(cls.root, _get_cache_dir(term="long")) @classmethod @property def stm(cls) -> str: - """Gives the STM (short-term-memory) path.""" + """STM (short-term-memory) path.""" return os.path.join(cls.root, _get_cache_dir(term="short")) def __init__( From 133296363a3739c3992ecced2dc4f66a13696471 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 24 Jan 2024 13:28:17 +0100 Subject: [PATCH 193/207] adapted other karabo-files to new DaskHandler setup :rotating_light: --- doc/src/examples/example_structure.md | 33 +++--- karabo/imaging/imager.py | 24 ++-- karabo/simulation/interferometer.py | 51 ++++---- karabo/sourcedetection/result.py | 7 +- karabo/test/test_dask.py | 162 +++++++++++++------------- karabo/util/dask.py | 23 +++- karabo/util/plotting_util.py | 28 ----- 7 files changed, 154 insertions(+), 174 deletions(-) delete mode 100644 karabo/util/plotting_util.py diff --git a/doc/src/examples/example_structure.md b/doc/src/examples/example_structure.md index e1e3c3d6..4daa7914 100644 --- a/doc/src/examples/example_structure.md +++ b/doc/src/examples/example_structure.md @@ -51,7 +51,7 @@ Following these guidelines will help ensure that you get the most out of Karabo' - tuple: A tuple containing the results of the iterate_function for each element in the iterable. Results are gathered using Dask's compute function. ### Additional Notes -It's important when working on a `Slurm Cluster` to call DaskHandler.setup() at the beginning. +It's important when working on a `Slurm Cluster` to call DaskSlurmHandler.setup() at the beginning. If 'verbose' is specified in kwargs and is set to True, progress messages will be printed during processing. @@ -62,57 +62,52 @@ Leverage the `parallelize_with_dask` utility in Karabo to harness the power of p ### Function Signature ```python -def parallelize_with_dask( - iterate_function: Callable[..., Any], - iterable: Iterable[Any], - *args: Any, - **kwargs: Any, -) -> Union[Any, Tuple[Any, ...], List[Any]]: +from karabo.util.dask import DaskHandler # Example def my_function(element, *args, **kwargs): # Do something with element return result -parallelize_with_dask(my_function, my_iterable, *args, **kwargs) # The current element of the iterable is passed as the first argument to my_function +DaskHandler.parallelize_with_dask(my_function, my_iterable, *args, **kwargs) # The current element of the iterable is passed as the first argument to my_function >>> (result1, result2, result3, ...) ``` ## Use Karabo on a SLURM cluster -Karabo manages all available nodes through Dask, making the computational power conveniently accessible for the user. The `DaskHandler` class streamlines the creation of a Dask client and offers a user-friendly interface for interaction. This class contains static variables, which when altered, modify the behavior of the Dask client. +Karabo manages all available nodes through Dask, making the computational power conveniently accessible for the user. The `DaskSlurmHandler` class streamlines the creation of a Dask client and offers a user-friendly interface for interaction. This class contains static variables to midify the behavior of a Dask client, if they've changed before creating a client. -While users are not required to interact with Dask directly - thanks to the background processes managed by Karabo - the Dask client has to be initialized at the beginning of your script with `DaskHandler.setup` (see example below). This has to do with the spawning of new processes when creating `Nanny` processes. +While users are not required to interact with Dask directly - thanks to the background processes managed by Karabo - the Dask client should to be initialized at the beginning of your script with `DaskSlurmHandler.setup` (see example below). This has to do with the spawning of new processes when creating `Nanny` processes. -If you need the client yourself, then no `setup()` is needed. +If you just need the client itself, then no `setup()` is needed. ```python -from karabo.util.dask import DaskHandler +from karabo.util.dask import DaskSlurmHandler if __name__ == "__main__": # Get the Dask client - client = DaskHandler.get_dask_client() # Not needed anymore to call .setup() + client = DaskSlurmHandler.get_dask_client() # Not needed anymore to call .setup() # Use the client as needed result = client.submit(my_function, *args) ``` ```python -from karabo.util.dask import DaskHandler +from karabo.util.dask import DaskSlurmHandler if __name__ == "__main__": - DaskHandler.setup() + DaskSlurmHandler.setup() result = (*args) ``` Disable the usage of Dask by Karabo. ```python -from karabo.util.dask import DaskHandler +from karabo.util.dask import DaskSlurmHandler # Modify the static variables -DaskHandler.use_dask = False +DaskSlurmHandler.use_dask = False ``` -Please also check out the `DaskHandler` under `karabo.util.dask` for more information. +Please also check out the `DaskSlurmHandler` under `karabo.util.dask` for more information. ### Dask Dashboard -The link for the Dask Dashboard is written into a .txt file called `karabo-dask-dashboard.txt`. This file is located in the same directory as where the run was started. This URL can then be pasted into a browser to access the Dask Dashboard. If you run Karabo on a VM without access to a browser and internet, you can use `port forwarding` to access the Dask Dashboard from your local machine. In `VSCODE`, this can be done directly when using the "PORTS" tab; just paste the IP address and port number from the .txt file into the Port column and click on "Open in Browser" in the Local Adress column. +The Dask dashboard link should be printed in stdout. Just copy the link into your browser, and then you're able to observe the current dask-process. If you run Karabo on a VM without access to a browser and internet, you can use `port forwarding` to access the Dask Dashboard from your local machine. In `VSCODE`, this can be done directly when using the "PORTS" tab. diff --git a/karabo/imaging/imager.py b/karabo/imaging/imager.py index 491d454b..2937c672 100644 --- a/karabo/imaging/imager.py +++ b/karabo/imaging/imager.py @@ -27,7 +27,7 @@ from karabo.simulation.sky_model import SkyModel from karabo.simulation.visibility import Visibility from karabo.util._types import FilePathType -from karabo.util.dask import DaskHandler +from karabo.util.dask import fetch_dask_handler from karabo.util.file_handler import FileHandler, check_ending ImageContextType = Literal["awprojection", "2d", "ng", "wg"] @@ -350,23 +350,31 @@ def imaging_rascil( residual_fits_path = os.path.join(tmp_dir, "residual.fits") if client and not use_dask: - raise EnvironmentError("Client passed but use_dask is False") + raise RuntimeError("Client passed but use_dask is False") if use_dask: - client = DaskHandler.get_dask_client() - if client: + if not client: + dask_handler = fetch_dask_handler() + client = dask_handler.get_dask_client() print(client.cluster) + rsexecute.set_client(use_dask=use_dask, client=client, use_dlg=False) # Set CUDA parameters if use_cuda: + if img_context != "wg": + print( + f"Changing imaging_rascil.img_context` from '{img_context}' " + + f"to 'wg' because {use_cuda=}" + ) img_context = "wg" - rsexecute.set_client(use_dask=use_dask, client=client, use_dlg=False) if self.ingest_vis_nchan is None: - raise ValueError("`ingest_vis_nchan` is None but must be of type 'int'.") + raise ValueError( + "`self.ingest_vis_nchan` is None but must set, but is None" + ) blockviss = create_visibility_from_ms_rsexecute( msname=str(self.visibility.ms_file_path), nchan_per_vis=self.ingest_chan_per_vis, - nout=self.ingest_vis_nchan // self.ingest_chan_per_vis, # pyright: ignore + nout=self.ingest_vis_nchan // self.ingest_chan_per_vis, dds=self.ingest_dd, average_channels=True, ) @@ -386,8 +394,6 @@ def imaging_rascil( ) for bvis in blockviss ] - if img_context == "wg": - raise NotImplementedError("WAGG support for rascil does currently not work") result = continuum_imaging_skymodel_list_rsexecute_workflow( vis_list=blockviss, diff --git a/karabo/simulation/interferometer.py b/karabo/simulation/interferometer.py index 5d627c35..787393ef 100644 --- a/karabo/simulation/interferometer.py +++ b/karabo/simulation/interferometer.py @@ -31,10 +31,9 @@ OskarSettingsTreeType, PrecisionType, ) -from karabo.util.dask import DaskHandler +from karabo.util.dask import fetch_dask_handler from karabo.util.file_handler import FileHandler from karabo.util.gpu_util import is_cuda_available -from karabo.warning import KaraboWarning class CorrelationType(enum.Enum): @@ -236,31 +235,32 @@ def __init__( self.beam_polY = beam_polY # set use_gpu if use_gpus is None: + use_gpus = is_cuda_available() print( - KaraboWarning( - "Parameter 'use_gpus' is None! Using function " - "'karabo.util.is_cuda_available()' to overwrite parameter " - f"'use_gpu' to {is_cuda_available()}." - ) + "Parameter 'use_gpus' is None! Using function " + + "'karabo.util.is_cuda_available()'. To overwrite, set " + + "'use_gpu' True or False." ) - self.use_gpus = is_cuda_available() - else: - self.use_gpus = use_gpus - - self.use_dask = use_dask - if use_dask is None and not client: - print( - KaraboWarning( - "Parameter 'use_dask' is None! Using function " - "'karabo.util.dask.DaskHandler.should_dask_be_used()' " - "to overwrite parameter 'use_dask' to " - f"{DaskHandler.should_dask_be_used()}." + self.use_gpus = use_gpus + + if use_dask is True or client: + if client and use_dask is None: + use_dask = True + elif client and use_dask is False: + raise RuntimeError( + "Providing `client` and `use_dask`=False is not allowed." ) - ) - self.use_dask = DaskHandler.should_dask_be_used() - - if self.use_dask and not client: - client = DaskHandler.get_dask_client() + elif not client and use_dask is True: + dask_handler = fetch_dask_handler() + client = dask_handler.get_dask_client() + else: + pass + elif use_dask is None and client is None: + dask_handler = fetch_dask_handler() + use_dask = dask_handler.should_dask_be_used() + if use_dask: + client = dask_handler.get_dask_client() + self.use_dask = use_dask self.client = client self.split_observation_by_channels = split_observation_by_channels @@ -381,7 +381,8 @@ def __run_simulation_parallized_observation( # Check if there is a dask client if self.client is None: - self.client = DaskHandler.get_dask_client() + dask_handler = fetch_dask_handler() + self.client = dask_handler.get_dask_client() if array_sky is None: raise KaraboInterferometerSimulationError( diff --git a/karabo/sourcedetection/result.py b/karabo/sourcedetection/result.py index 743a97f3..a23409f8 100644 --- a/karabo/sourcedetection/result.py +++ b/karabo/sourcedetection/result.py @@ -16,7 +16,7 @@ from karabo.imaging.image import Image, ImageMosaicker from karabo.imaging.imager import Imager -from karabo.util.dask import DaskHandler +from karabo.util.dask import fetch_dask_handler from karabo.util.data_util import read_CSV_to_ndarray from karabo.util.file_handler import FileHandler from karabo.warning import KaraboWarning @@ -477,7 +477,8 @@ def detect_sources_in_images( for image in images ] # Check if there is a dask client - if DaskHandler.dask_client is not None: + dask_handler = fetch_dask_handler() + if dask_handler.dask_client is not None: func = delayed(PyBDSFSourceDetectionResult.detect_sources_in_image) else: func = PyBDSFSourceDetectionResult.detect_sources_in_image @@ -490,7 +491,7 @@ def detect_sources_in_images( **kwargs, ) results.append(result) - if DaskHandler.dask_client is not None: + if dask_handler.dask_client is not None: results = compute(*results, scheduler="distributed") # Keep only results that are not None results = [result for result in results if result is not None] diff --git a/karabo/test/test_dask.py b/karabo/test/test_dask.py index 7f5d83ca..606ea94a 100644 --- a/karabo/test/test_dask.py +++ b/karabo/test/test_dask.py @@ -1,33 +1,27 @@ import os +from typing import Dict, Type, Union from unittest.mock import patch import dask import pytest from dask import compute # type: ignore[attr-defined] -from karabo.util.dask import ( - DaskHandler, - extract_node_ids_from_node_list, - get_base_string_node_list, - get_lowest_node_id, - get_lowest_node_name, - get_min_max_of_node_id, - get_node_id, - get_node_name, - get_number_of_nodes, - is_first_node, - is_on_slurm_cluster, - parallelize_with_dask, -) +from karabo.util.dask import DaskHandler, DaskSlurmHandler, fetch_dask_handler + +_DaskHandlerType = Union[Type[DaskHandler], Type[DaskSlurmHandler]] @pytest.fixture(scope="module") -def setup_dask(): - DaskHandler.setup() +def setup_dask(dask_handler: _DaskHandlerType) -> None: + dask_handler = fetch_dask_handler() + dask_handler.setup() + + +_EnvVarsType = Dict[str, str] @pytest.fixture -def env_vars(): +def env_vars() -> Dict[str, str]: return { "SLURM_JOB_NODELIST": "nid0[4397-4406]", "SLURM_JOB_NUM_NODES": "10", @@ -36,55 +30,55 @@ def env_vars(): } -def test_get_min_max_of_node_id(env_vars): +def test_get_min_max_of_node_id(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert get_min_max_of_node_id() == (4397, 4406) + assert DaskSlurmHandler.get_min_max_of_node_id() == (4397, 4406) -def test_get_lowest_node_id(env_vars): +def test_get_lowest_node_id(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert get_lowest_node_id() == 4397 + assert DaskSlurmHandler.get_lowest_node_id() == 4397 -def test_get_base_string_node_list(env_vars): +def test_get_base_string_node_list(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert get_base_string_node_list() == "nid0" + assert DaskSlurmHandler.get_base_string_node_list() == "nid0" -def test_get_lowest_node_name(env_vars): +def test_get_lowest_node_name(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert get_lowest_node_name() == "nid04397" + assert DaskSlurmHandler.get_lowest_node_name() == "nid04397" -def test_get_number_of_nodes(env_vars): +def test_get_number_of_nodes(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert get_number_of_nodes() == 10 + assert DaskSlurmHandler.get_number_of_nodes() == 10 -def test_get_node_id(env_vars): +def test_get_node_id(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert get_node_id() == 4397 + assert DaskSlurmHandler.get_node_id() == 4397 -def test_get_node_name(env_vars): +def test_get_node_name(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert get_node_name() == "nid04397" + assert DaskSlurmHandler.get_node_name() == "nid04397" -def test_is_first_node(env_vars): +def test_is_first_node(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert is_first_node() is True + assert DaskSlurmHandler.is_first_node() is True -def test_is_on_slurm_cluster(env_vars): +def test_is_on_slurm_cluster(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert is_on_slurm_cluster() is True + assert DaskSlurmHandler.is_on_slurm_cluster() is True # repeat the tests for other values of environment variables -def test_multiple_nodes_and_ranges(): +def test_multiple_nodes_and_ranges() -> None: env_vars = { "SLURM_JOB_NODELIST": "nid0[2780-2781,4715]", "SLURM_JOB_NUM_NODES": "3", @@ -92,46 +86,46 @@ def test_multiple_nodes_and_ranges(): "SLURM_JOB_ID": "123456", } with patch.dict(os.environ, env_vars): - assert get_min_max_of_node_id() == (2780, 4715) - assert get_lowest_node_id() == 2780 - assert get_base_string_node_list() == "nid0" - assert get_lowest_node_name() == "nid02780" - assert get_number_of_nodes() == 3 - assert get_node_id() == 2780 - assert get_node_name() == "nid02780" - assert is_first_node() is True - assert is_on_slurm_cluster() is True + assert DaskSlurmHandler.get_min_max_of_node_id() == (2780, 4715) + assert DaskSlurmHandler.get_lowest_node_id() == 2780 + assert DaskSlurmHandler.get_base_string_node_list() == "nid0" + assert DaskSlurmHandler.get_lowest_node_name() == "nid02780" + assert DaskSlurmHandler.get_number_of_nodes() == 3 + assert DaskSlurmHandler.get_node_id() == 2780 + assert DaskSlurmHandler.get_node_name() == "nid02780" + assert DaskSlurmHandler.is_first_node() is True + assert DaskSlurmHandler.is_on_slurm_cluster() is True # test for a different node env_vars["SLURMD_NODENAME"] = "nid04715" with patch.dict(os.environ, env_vars): - assert get_min_max_of_node_id() == (2780, 4715) - assert get_lowest_node_id() == 2780 - assert get_base_string_node_list() == "nid0" - assert get_lowest_node_name() == "nid02780" - assert get_number_of_nodes() == 3 - assert get_node_id() == 4715 - assert get_node_name() == "nid04715" - assert is_first_node() is False - assert is_on_slurm_cluster() is True + assert DaskSlurmHandler.get_min_max_of_node_id() == (2780, 4715) + assert DaskSlurmHandler.get_lowest_node_id() == 2780 + assert DaskSlurmHandler.get_base_string_node_list() == "nid0" + assert DaskSlurmHandler.get_lowest_node_name() == "nid02780" + assert DaskSlurmHandler.get_number_of_nodes() == 3 + assert DaskSlurmHandler.get_node_id() == 4715 + assert DaskSlurmHandler.get_node_name() == "nid04715" + assert DaskSlurmHandler.is_first_node() is False + assert DaskSlurmHandler.is_on_slurm_cluster() is True # test for a different node env_vars["SLURMD_NODENAME"] = "nid02781" with patch.dict(os.environ, env_vars): - assert get_min_max_of_node_id() == (2780, 4715) - assert get_lowest_node_id() == 2780 - assert get_base_string_node_list() == "nid0" - assert get_lowest_node_name() == "nid02780" - assert get_number_of_nodes() == 3 - assert get_node_id() == 2781 - assert get_node_name() == "nid02781" - assert is_first_node() is False - assert is_on_slurm_cluster() is True - - -def test_extreme_range_of_nodes(): + assert DaskSlurmHandler.get_min_max_of_node_id() == (2780, 4715) + assert DaskSlurmHandler.get_lowest_node_id() == 2780 + assert DaskSlurmHandler.get_base_string_node_list() == "nid0" + assert DaskSlurmHandler.get_lowest_node_name() == "nid02780" + assert DaskSlurmHandler.get_number_of_nodes() == 3 + assert DaskSlurmHandler.get_node_id() == 2781 + assert DaskSlurmHandler.get_node_name() == "nid02781" + assert DaskSlurmHandler.is_first_node() is False + assert DaskSlurmHandler.is_on_slurm_cluster() is True + + +def test_extreme_range_of_nodes() -> None: env_vars = { "SLURM_JOB_NODELIST": "nid0[2780-2781,3213-4313,4441,4443,4715]", "SLURM_JOB_NUM_NODES": "1106", @@ -139,32 +133,32 @@ def test_extreme_range_of_nodes(): "SLURM_JOB_ID": "123456", } with patch.dict(os.environ, env_vars): - assert get_min_max_of_node_id() == (2780, 4715) - assert get_lowest_node_id() == 2780 - assert get_base_string_node_list() == "nid0" - assert get_lowest_node_name() == "nid02780" - assert get_number_of_nodes() == 1106 - assert get_node_id() == 3333 - assert get_node_name() == "nid03333" - assert is_first_node() is False - assert is_on_slurm_cluster() is True - assert len(extract_node_ids_from_node_list()) == 1106 - - -def test_single_node(): + assert DaskSlurmHandler.get_min_max_of_node_id() == (2780, 4715) + assert DaskSlurmHandler.get_lowest_node_id() == 2780 + assert DaskSlurmHandler.get_base_string_node_list() == "nid0" + assert DaskSlurmHandler.get_lowest_node_name() == "nid02780" + assert DaskSlurmHandler.get_number_of_nodes() == 1106 + assert DaskSlurmHandler.get_node_id() == 3333 + assert DaskSlurmHandler.get_node_name() == "nid03333" + assert DaskSlurmHandler.is_first_node() is False + assert DaskSlurmHandler.is_on_slurm_cluster() is True + assert len(DaskSlurmHandler.extract_node_ids_from_node_list()) == 1106 + + +def test_single_node() -> None: env_vars = { "SLURM_JOB_NODELIST": "nid03038", "SLURM_JOB_NUM_NODES": "1", "SLURMD_NODENAME": "nid03038", } with patch.dict(os.environ, env_vars): - min_node_id, max_node_id = get_min_max_of_node_id() + min_node_id, max_node_id = DaskSlurmHandler.get_min_max_of_node_id() assert min_node_id == 3038 assert max_node_id == 3038 - assert get_base_string_node_list() == "nid" + assert DaskSlurmHandler.get_base_string_node_list() == "nid" -def test_dask_job(): +def test_dask_job() -> None: DaskHandler.setup() client = DaskHandler.get_dask_client() @@ -201,8 +195,8 @@ def simple_function(x: int, multiplier: int = 1) -> int: return x * multiplier -def test_parallelize_with_dask(setup_dask): +def test_parallelize_with_dask(setup_dask) -> None: iterable = [1, 2, 3, 4, 5] - results = parallelize_with_dask(simple_function, iterable, multiplier=2) + results = DaskHandler.parallelize_with_dask(simple_function, iterable, multiplier=2) expected_results = tuple([x * 2 for x in iterable]) assert results == expected_results diff --git a/karabo/util/dask.py b/karabo/util/dask.py index 17d48336..235802b5 100644 --- a/karabo/util/dask.py +++ b/karabo/util/dask.py @@ -8,7 +8,7 @@ import sys import time from collections.abc import Iterable -from typing import Any, Callable, List, Optional, Tuple, Union +from typing import Any, Callable, List, Optional, Tuple, Type, Union from warnings import warn import psutil @@ -23,6 +23,17 @@ from karabo.warning import KaraboWarning +def fetch_dask_handler() -> Union[Type[DaskHandler], Type[DaskSlurmHandler]]: + """Utility function to automatically choose a Handler. + + Returns: + The chosen Handler. + """ + if DaskSlurmHandler.is_on_slurm_cluster(): + return DaskSlurmHandler + return DaskHandler + + class DaskHandler: """ A class for managing a Dask client. This class is a singleton, meaning that @@ -220,8 +231,8 @@ def parallelize_with_dask( messages will be printed. - This function utilizes the distributed scheduler of Dask. """ - if not DaskHandler._setup_called: - DaskHandler.setup() + if not cls._setup_called: + cls.setup() delayed_results = [] @@ -272,7 +283,7 @@ def get_dask_client(cls) -> Client: ) ) if cls.get_number_of_nodes() > 1: - cls.dask_client = DaskSlurmHandler.setup_dask_for_slurm( + cls.dask_client = cls.setup_dask_for_slurm( cls.n_workers_scheduler_node, cls.memory_limit, ) @@ -535,6 +546,6 @@ def get_job_nodelist(cls) -> str: def get_job_id(cls) -> str: return os.environ["SLURM_JOB_ID"] - @staticmethod - def is_on_slurm_cluster() -> bool: + @classmethod + def is_on_slurm_cluster(cls) -> bool: return "SLURM_JOB_ID" in os.environ diff --git a/karabo/util/plotting_util.py b/karabo/util/plotting_util.py deleted file mode 100644 index 8e25cc6f..00000000 --- a/karabo/util/plotting_util.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import List - -from astropy.wcs import WCS - - -def get_slices(wcs: WCS) -> List[str]: - slices: List[str] = [] - for i in range(wcs.pixel_n_dim): - if i == 0: - slices.append("x") - elif i == 1: - slices.append("y") - else: - slices.append(0) # type: ignore - return slices - - -class Font: - PURPLE = "\033[95m" - CYAN = "\033[96m" - DARKCYAN = "\033[36m" - BLUE = "\033[94m" - GREEN = "\033[92m" - YELLOW = "\033[93m" - RED = "\033[91m" - BOLD = "\033[1m" - UNDERLINE = "\033[4m" - END = "\033[0m" From aa21e3e4c09aecbcafd40f7746acf075a52f929d Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 24 Jan 2024 13:38:17 +0100 Subject: [PATCH 194/207] readded accidentally removed plotting-util.py :sandal: --- karabo/util/plotting_util.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 karabo/util/plotting_util.py diff --git a/karabo/util/plotting_util.py b/karabo/util/plotting_util.py new file mode 100644 index 00000000..8e25cc6f --- /dev/null +++ b/karabo/util/plotting_util.py @@ -0,0 +1,28 @@ +from typing import List + +from astropy.wcs import WCS + + +def get_slices(wcs: WCS) -> List[str]: + slices: List[str] = [] + for i in range(wcs.pixel_n_dim): + if i == 0: + slices.append("x") + elif i == 1: + slices.append("y") + else: + slices.append(0) # type: ignore + return slices + + +class Font: + PURPLE = "\033[95m" + CYAN = "\033[96m" + DARKCYAN = "\033[36m" + BLUE = "\033[94m" + GREEN = "\033[92m" + YELLOW = "\033[93m" + RED = "\033[91m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + END = "\033[0m" From d89e91fa52d7e8d9b32ed596098142cfd3e99e7c Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 24 Jan 2024 15:21:02 +0100 Subject: [PATCH 195/207] bugfix dask-usage :four_leaf_clover: --- karabo/simulation/line_emission.py | 5 +-- karabo/test/test_dask.py | 5 ++- karabo/util/dask.py | 56 +++++++++++------------------- 3 files changed, 27 insertions(+), 39 deletions(-) diff --git a/karabo/simulation/line_emission.py b/karabo/simulation/line_emission.py index a14ddcb3..5dc93fba 100644 --- a/karabo/simulation/line_emission.py +++ b/karabo/simulation/line_emission.py @@ -24,7 +24,7 @@ from karabo.util._types import DirPathType, FilePathType, IntFloat, NPFloatLikeStrict # from dask.delayed import Delayed -from karabo.util.dask import parallelize_with_dask +from karabo.util.dask import fetch_dask_handler from karabo.util.plotting_util import get_slices @@ -694,7 +694,8 @@ def process_channel( # type: ignore[no-untyped-def] verbose=verbose, ) - result = parallelize_with_dask( + dask_handler = fetch_dask_handler() + result = dask_handler.parallelize_with_dask( process_channel, range(num_bins), outpath=outpath, diff --git a/karabo/test/test_dask.py b/karabo/test/test_dask.py index 606ea94a..fddb719b 100644 --- a/karabo/test/test_dask.py +++ b/karabo/test/test_dask.py @@ -197,6 +197,9 @@ def simple_function(x: int, multiplier: int = 1) -> int: def test_parallelize_with_dask(setup_dask) -> None: iterable = [1, 2, 3, 4, 5] - results = DaskHandler.parallelize_with_dask(simple_function, iterable, multiplier=2) + dask_handler = fetch_dask_handler() + results = dask_handler.parallelize_with_dask( + simple_function, iterable, multiplier=2 + ) expected_results = tuple([x * 2 for x in iterable]) assert results == expected_results diff --git a/karabo/util/dask.py b/karabo/util/dask.py index 235802b5..c76bc8d6 100644 --- a/karabo/util/dask.py +++ b/karabo/util/dask.py @@ -8,7 +8,7 @@ import sys import time from collections.abc import Iterable -from typing import Any, Callable, List, Optional, Tuple, Type, Union +from typing import Any, Callable, List, Optional, Tuple, Type, Union, cast from warnings import warn import psutil @@ -64,25 +64,6 @@ class DaskHandler: TIMEOUT: int The timeout in seconds for the Dask scheduler to wait for all the workers to connect. - - - Methods - ------- - setup() -> None: - Sets up the Dask client. If the client does not exist, and the - current node is a SLURM node and there are more than 1 node, a - Dask client will be created but not returned. Then, when a function - can make use of dask, it will make use of dask automatically. This - function need to be only called once at the beginning of the script. - It stops the processing of the script if the script is not running on the - main node. - get_dask_client() -> Client: - Returns a Dask client object. If the client does not exist, and - the current node is a SLURM node and there are more than 1 node, - a Dask client will be created. - - - """ dask_client: Optional[Client] = None @@ -271,22 +252,25 @@ def dask_run_status(cls) -> str: @classmethod def get_dask_client(cls) -> Client: - if cls.dask_client is not None: - return cls.dask_client + dask_client = cls.dask_client + if dask_client is not None: + return dask_client if not cls._setup_called and cls.is_first_node(): - cls_name = cls.__name__ - warn( - KaraboWarning( - f"{cls_name}.setup() has to be called at the beginning " - + "of the script. This could lead to unexpected behaviour " - + "on a SLURM cluster if not (see documentation)." - ) - ) + cls.setup() if cls.get_number_of_nodes() > 1: - cls.dask_client = cls.setup_dask_for_slurm( - cls.n_workers_scheduler_node, - cls.memory_limit, + dask_client = cast( # hacky workaround + Client, + cls.setup_dask_for_slurm( + cls.n_workers_scheduler_node, + cls.memory_limit, + ), ) + if dask_client is not None: + cls.dask_client = dask_client + return dask_client + else: + cls.dask_client = super(DaskSlurmHandler, cls).get_dask_client() + return cls.dask_client @classmethod def prepare_slurm_nodes_for_dask(cls) -> None: @@ -338,7 +322,7 @@ async def start_worker(scheduler_address: str) -> Worker: memory_limit=memory_limit, ) await worker.finished() - return worker # type: ignore + return worker async def start_nanny(scheduler_address: str) -> Nanny: nanny = await Nanny( @@ -347,7 +331,7 @@ async def start_nanny(scheduler_address: str) -> Nanny: memory_limit=memory_limit, ) await nanny.finished() - return nanny # type: ignore + return nanny scheduler_address = str(dask_info["scheduler_address"]) n_workers = int(str(dask_info["n_workers_per_node"])) @@ -385,7 +369,7 @@ def setup_dask_for_slurm( cls, n_workers_scheduler_node: int, memory_limit: Optional[IntFloat], - ) -> Client: + ) -> Optional[Client]: if cls.is_first_node(): _, dask_info_address, dask_run_status = cls._get_dask_paths_for_slurm() # Create file to show that the run is still ongoing From a905395bea3e7715f99fb66b97ab864328bf09a1 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Wed, 24 Jan 2024 15:57:51 +0100 Subject: [PATCH 196/207] minor bugfix in test-dask :smoking: --- karabo/test/test_dask.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/karabo/test/test_dask.py b/karabo/test/test_dask.py index fddb719b..dc2d5e0c 100644 --- a/karabo/test/test_dask.py +++ b/karabo/test/test_dask.py @@ -1,5 +1,5 @@ import os -from typing import Dict, Type, Union +from typing import Dict from unittest.mock import patch import dask @@ -8,11 +8,9 @@ from karabo.util.dask import DaskHandler, DaskSlurmHandler, fetch_dask_handler -_DaskHandlerType = Union[Type[DaskHandler], Type[DaskSlurmHandler]] - @pytest.fixture(scope="module") -def setup_dask(dask_handler: _DaskHandlerType) -> None: +def setup_dask() -> None: dask_handler = fetch_dask_handler() dask_handler.setup() From ff53cbc50c360dac8ba4847f899a306808abccbb Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 29 Jan 2024 15:36:31 +0100 Subject: [PATCH 197/207] refactored create_baseline_cut_telelescope to improved disk-caching & be less error-prone :mask: --- karabo/simulation/telescope.py | 200 ++++++++++++++++++------ karabo/test/test_telescope_baselines.py | 16 +- 2 files changed, 161 insertions(+), 55 deletions(-) diff --git a/karabo/simulation/telescope.py b/karabo/simulation/telescope.py index 0e95d11c..6978b8d9 100644 --- a/karabo/simulation/telescope.py +++ b/karabo/simulation/telescope.py @@ -1,14 +1,16 @@ from __future__ import annotations import enum +import glob import logging import os import re import shutil -from math import comb -from typing import Dict, List, Literal, Optional, Type, Union, cast, get_args +from itertools import product +from typing import Dict, List, Literal, Optional, Tuple, Type, Union, cast, get_args import numpy as np +import pandas as pd from numpy.typing import NDArray from oskar.telescope import Telescope as OskarTelescope from rascil.processing_components.simulation.simulation_helpers import ( @@ -624,53 +626,151 @@ def __float_try_parse(cls, value: str) -> float: except ValueError: return 0.0 + @classmethod + def _get_station_infos(cls, tel_path: DirPathType) -> pd.DataFrame: + """Creates a pd.DataFrame with telescope-station infos. + + - "station-nr": Station-number inside the .tm file. + - "station-path": Path of the according station. + - "x": x-position + - "y": y-position + + Args: + tel_path: .tm dir-path to get infos from. + + Returns: + pd.DataFrame with the according infos. + """ + station_paths = glob.glob(f"{tel_path}{os.path.sep}station[0-9]*") + if len(station_paths) <= 0: + raise FileNotFoundError(f"No stations found in {tel_path}") + station_numbers = list() + for station_path in station_paths: + station_number = os.path.split(station_path)[-1].split("station")[1] + station_numbers.append(int(station_number)) + df_tel = ( + pd.DataFrame( + { + "station-nr": station_numbers, + "station-path": station_paths, + } + ) + .sort_values(by="station-nr") + .reset_index(drop=True) + ) + if not np.all(df_tel["station-nr"].to_numpy() == np.arange(0, df_tel.shape[0])): + raise KaraboError( + f"Stations found in {tel_path} are not ascending from station<0 - n>. " + ) + stations = np.loadtxt(os.path.join(tel_path, "layout.txt")) + if (n_stations_layout := stations.shape[0]) != (n_stations := df_tel.shape[0]): + raise KaraboError( + f"Number of stations missmatch of {n_stations_layout=} & {n_stations=}" + ) + df_tel["x"] = stations[:, 0] + df_tel["y"] = stations[:, 1] + return df_tel + + @classmethod + def _get_number_str(cls, num: int, n_digits: int) -> str: + """Transforms a number `num` to str with `n_digits` digits. + + Args: + num: Number to transform. + n_digits: Number of digits to represent `num` as str. + + Returns: + Transformed `num`. + """ + num_str = str(num) + n_digit_values = len(num_str) + if n_digit_values < n_digits: + n_zeros = n_digits - n_digit_values + num_str = "0" * n_zeros + num_str + elif n_digit_values > n_digits: + raise ValueError(f"{num=} has more digits than {n_digits=}") + return num_str -def create_baseline_cut_telelescope( - lcut: NPFloatLike, - hcut: NPFloatLike, - tel: Telescope, -) -> str: - if tel.path is None: - raise KaraboError("`tel.path` None is not allowed.") - stations = np.loadtxt(os.path.join(tel.path, "layout.txt")) - station_x = stations[:, 0] - station_y = stations[:, 1] - nb = comb(stations.shape[0], 2) - k = 0 - baseline = np.zeros(nb) - baseline_x = np.zeros(nb) - baseline_y = np.zeros(nb) - for i in range(stations.shape[0]): - for j in range(i): - baseline[k] = np.linalg.norm(station_x[i] - station_y[j]) - baseline_x[k] = i - baseline_y[k] = j - k = k + 1 - cut_idx = np.where((baseline > lcut) & (baseline < hcut)) - cut_baseline_x = baseline_x[cut_idx] - cut_baseline_y = baseline_y[cut_idx] - cut_station_list = np.unique(np.hstack((cut_baseline_x, cut_baseline_y))) - output_path_prefix = str(tel.path).split(f"data{os.path.sep}")[0] - output_path = os.path.join(output_path_prefix, "data", "tel_baseline_cut.tm") - if os.path.exists(output_path): - shutil.rmtree(output_path) - os.mkdir(output_path) - count = 0 - for ns in cut_station_list: - source_path = os.path.join(str(tel.path), f"station0{str(int(ns))}") - os.system(f"cp -r {source_path} {output_path}") - os.system( - "mv " - + output_path - + f"{os.path.sep}station0" - + str(int(ns)) - + " " - + output_path - + f"{os.path.sep}station0" - + "%02d" % (int(count)) + @classmethod + def create_baseline_cut_telelescope( + cls, + lcut: NPFloatLike, + hcut: NPFloatLike, + tel: Telescope, + tm_path: Optional[DirPathType] = None, + ) -> Tuple[str, Dict[str, str]]: + """Cut telescope `tel` for baseline-lengths. + + Args: + lcut: Lower cut + hcut: Higher cut + tel: Telescope to cut off + tm_path: .tm file-path to save the cut-telescope. + `tm_path` will get overwritten if it already exists. + + Returns: + .tm file-path & station-name conversion (e.g. station055 -> station009) + """ + if tel.path is None: + raise KaraboError( + "`tel.path` None indicates that there is not telescope.tm file " + + "available for `tel`, which is not allowed here." + ) + if tm_path is not None and not str(tm_path).endswith(".tm"): + raise KaraboError(f"{tm_path=} must end with '.tm'.") + df_tel = Telescope._get_station_infos(tel_path=tel.path) + n_stations = df_tel.shape[0] + station_x = df_tel["x"].to_numpy() + station_y = df_tel["y"].to_numpy() + baselines: List[Tuple[int, int]] = sorted( + [ # each unique combination-idx a station with another station + tuple(station_idx) + for station_idx in set( + map( + frozenset, product(np.arange(n_stations), np.arange(n_stations)) + ) + ) + if len(station_idx) > 1 + ] + ) + n_baselines = len(baselines) + baseline_dist = np.zeros(n_baselines) + for i, (x, y) in enumerate(baselines): + baseline_dist[i] = np.linalg.norm(station_x[x] - station_y[y]) + cut_idx = np.where((baseline_dist > lcut) & (baseline_dist < hcut)) + cut_station_list = np.unique(np.array(baselines)[cut_idx]) + df_tel = df_tel[df_tel["station-nr"].isin(cut_station_list)].reset_index( + drop=True + ) + + if cut_station_list.shape[0] == 0: + raise KaraboError("All telescope-stations were cut off.") + + if tm_path is None: + disk_cache = FileHandler().get_tmp_dir( + prefix="telescope-baseline-cut-", + mkdir=False, + ) + tm_path = os.path.join(disk_cache, "telescope-baseline-cut.tm") + else: + if os.path.exists(tm_path): + shutil.rmtree(tm_path) + os.makedirs(tm_path, exist_ok=False) + + conversions: Dict[str, str] = dict() + for i in enumerate(df_tel.shape[0]): + source_path = df_tel.iloc[i]["station-path"] + number_str = cls._get_number_str(num=i, n_digits=3) + target_station = f"station{number_str}" + target_path = os.path.join(tm_path, target_station) + source_station = os.path.split(source_path)[-1] + conversions[source_station] = target_station + shutil.copyfile(src=source_path, dst=target_path) + + shutil.copyfile( + src=os.path.join(tel.path, "position.txt"), + dst=os.path.join(tm_path, "position.txt"), ) - count = count + 1 - cut_stations = stations[cut_station_list.astype(int)] - os.system(f"cp -r {tel.path}{os.path.sep}position.txt {output_path}") - np.savetxt(os.path.join(output_path, "layout.txt"), cut_stations) - return output_path + cut_stations = df_tel[["x", "y"]].to_numpy() + np.savetxt(os.path.join(tm_path, "layout.txt"), cut_stations) + return tm_path, conversions diff --git a/karabo/test/test_telescope_baselines.py b/karabo/test/test_telescope_baselines.py index 5280b8cc..45a592cc 100644 --- a/karabo/test/test_telescope_baselines.py +++ b/karabo/test/test_telescope_baselines.py @@ -9,18 +9,24 @@ from karabo.simulation.interferometer import InterferometerSimulation from karabo.simulation.observation import Observation from karabo.simulation.sky_model import SkyModel -from karabo.simulation.telescope import Telescope, create_baseline_cut_telelescope +from karabo.simulation.telescope import Telescope def test_baselines_based_cutoff(sky_data: NDArray[np.float64]): lcut = 5000 hcut = 10000 # Lower cut off and higher cut-off in meters parant_tel = Telescope.constructor("MeerKAT") - telescope_path = create_baseline_cut_telelescope(lcut, hcut, parant_tel) - telescope = Telescope.read_OSKAR_tm_file(telescope_path) - sky = SkyModel() - sky.add_point_sources(sky_data) with tempfile.TemporaryDirectory() as tmpdir: + tm_path = os.path.join(tmpdir, "tel-cut.tm") + telescope_path, _ = Telescope.create_baseline_cut_telelescope( + lcut, + hcut, + parant_tel, + tm_path=tm_path, + ) + telescope = Telescope.read_OSKAR_tm_file(telescope_path) + sky = SkyModel() + sky.add_point_sources(sky_data) ms_path = os.path.join(tmpdir, "out.ms") vis_path = os.path.join(tmpdir, "out.vis") simulation = InterferometerSimulation( From 932a8b155c784f4e7cae14c3d6ef4f530df3ea2f Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 29 Jan 2024 15:42:19 +0100 Subject: [PATCH 198/207] bugfixes in Telescope.create-baseline-cut-telescope :key: --- karabo/simulation/telescope.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/karabo/simulation/telescope.py b/karabo/simulation/telescope.py index 6978b8d9..33bedd50 100644 --- a/karabo/simulation/telescope.py +++ b/karabo/simulation/telescope.py @@ -758,14 +758,14 @@ def create_baseline_cut_telelescope( os.makedirs(tm_path, exist_ok=False) conversions: Dict[str, str] = dict() - for i in enumerate(df_tel.shape[0]): + for i in range(df_tel.shape[0]): source_path = df_tel.iloc[i]["station-path"] number_str = cls._get_number_str(num=i, n_digits=3) target_station = f"station{number_str}" target_path = os.path.join(tm_path, target_station) source_station = os.path.split(source_path)[-1] conversions[source_station] = target_station - shutil.copyfile(src=source_path, dst=target_path) + shutil.copytree(src=source_path, dst=target_path) shutil.copyfile( src=os.path.join(tel.path, "position.txt"), From 490d41ad2ee1bbb141dc9a441735c9826d0c616e Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 29 Jan 2024 16:06:37 +0100 Subject: [PATCH 199/207] addressed mypy-issues :neutral_face: --- karabo/imaging/imager.py | 2 +- karabo/simulation/telescope.py | 4 ++-- karabo/test/test_filehandler.py | 26 +++++++++++++------------- karabo/util/dask.py | 6 ++---- karabo/util/file_handler.py | 10 ++++------ 5 files changed, 22 insertions(+), 26 deletions(-) diff --git a/karabo/imaging/imager.py b/karabo/imaging/imager.py index 2937c672..055179ab 100644 --- a/karabo/imaging/imager.py +++ b/karabo/imaging/imager.py @@ -330,7 +330,7 @@ def imaging_rascil( if deconvolved_fits_path is not None: check_ending(path=deconvolved_fits_path, ending=".fits") if restored_fits_path is not None: - check_ending(path=deconvolved_fits_path, ending=".fits") + check_ending(path=restored_fits_path, ending=".fits") if residual_fits_path is not None: check_ending(path=residual_fits_path, ending=".fits") if ( diff --git a/karabo/simulation/telescope.py b/karabo/simulation/telescope.py index 33bedd50..42e190c9 100644 --- a/karabo/simulation/telescope.py +++ b/karabo/simulation/telescope.py @@ -698,7 +698,7 @@ def create_baseline_cut_telelescope( hcut: NPFloatLike, tel: Telescope, tm_path: Optional[DirPathType] = None, - ) -> Tuple[str, Dict[str, str]]: + ) -> Tuple[DirPathType, Dict[str, str]]: """Cut telescope `tel` for baseline-lengths. Args: @@ -724,7 +724,7 @@ def create_baseline_cut_telelescope( station_y = df_tel["y"].to_numpy() baselines: List[Tuple[int, int]] = sorted( [ # each unique combination-idx a station with another station - tuple(station_idx) + tuple(station_idx) # type: ignore[misc] for station_idx in set( map( frozenset, product(np.arange(n_stations), np.arange(n_stations)) diff --git a/karabo/test/test_filehandler.py b/karabo/test/test_filehandler.py index 3e6a7e6d..d74a1c77 100644 --- a/karabo/test/test_filehandler.py +++ b/karabo/test/test_filehandler.py @@ -19,7 +19,7 @@ def test_file_handler(): ) assert len(os.listdir(tmpdir)) == 1 assert not FileHandler.is_dir_empty(dirname=tmpdir) - assert len(os.listdir(FileHandler.stm)) == 1 + assert len(os.listdir(FileHandler.stm())) == 1 json_path = os.path.join(tmpdir_fh1, "my_json.json") with open(json_path, "w") as outfile1: json.dump({"A": "B"}, outfile1) @@ -28,11 +28,11 @@ def test_file_handler(): _ = fh_instance.get_tmp_dir( prefix="dummy-", ) - assert len(os.listdir(FileHandler.stm)) == 2 + assert len(os.listdir(FileHandler.stm())) == 2 _ = fh_instance.get_tmp_dir( mkdir=False, ) - assert len(os.listdir(FileHandler.stm)) == 2 + assert len(os.listdir(FileHandler.stm())) == 2 with pytest.raises(RuntimeError): _ = FileHandler().get_tmp_dir( term="long", @@ -42,16 +42,16 @@ def test_file_handler(): prefix="dummy-ltm-name", ) assert len(os.listdir(tmpdir)) == 2 - assert len(os.listdir(FileHandler.ltm)) == 1 - assert len(os.listdir(FileHandler.stm)) == 2 + assert len(os.listdir(FileHandler.ltm())) == 1 + assert len(os.listdir(FileHandler.stm())) == 2 fh_instance.clean_instance() - assert len(os.listdir(FileHandler.stm)) == 1 + assert len(os.listdir(FileHandler.stm())) == 1 _ = FileHandler().get_tmp_dir() fh_empty = FileHandler() empty_path = fh_empty.get_tmp_dir() - assert len(os.listdir(FileHandler.stm)) == 3 + assert len(os.listdir(FileHandler.stm())) == 3 assert len(os.listdir(empty_path)) == 0 json_empty_path = os.path.join(empty_path, "my_json.json") with open(json_empty_path, "w") as outfile2: @@ -59,12 +59,12 @@ def test_file_handler(): assert len(os.listdir(empty_path)) == 1 FileHandler.empty_dir(dir_path=empty_path) assert len(os.listdir(empty_path)) == 0 - assert len(os.listdir(FileHandler.stm)) == 3 + assert len(os.listdir(FileHandler.stm())) == 3 fh_empty.clean_instance() - assert len(os.listdir(FileHandler.stm)) == 2 + assert len(os.listdir(FileHandler.stm())) == 2 FileHandler.clean() - assert not os.path.exists(FileHandler.stm) + assert not os.path.exists(FileHandler.stm()) def test_object_bound_file_handler(): @@ -76,9 +76,9 @@ class MyClass: with tempfile.TemporaryDirectory() as tmpdir: FileHandler.root = tmpdir my_obj = MyClass() - assert not os.path.exists(FileHandler.stm) + assert not os.path.exists(FileHandler.stm()) tmpdir_fh1 = FileHandler().get_tmp_dir(unique=my_obj) - assert len(os.listdir(FileHandler.stm)) == 1 + assert len(os.listdir(FileHandler.stm())) == 1 tmpdir_fh2 = FileHandler().get_tmp_dir(unique=my_obj) - assert len(os.listdir(FileHandler.stm)) == 1 + assert len(os.listdir(FileHandler.stm())) == 1 assert tmpdir_fh1 == tmpdir_fh2 diff --git a/karabo/util/dask.py b/karabo/util/dask.py index c76bc8d6..3ffa1233 100644 --- a/karabo/util/dask.py +++ b/karabo/util/dask.py @@ -237,14 +237,12 @@ class DaskSlurmHandler(DaskHandler): """Dask & Slurm related functionality resides here.""" @classmethod - @property def dask_info_address(cls) -> str: """dask_info.json path""" _, info_address, _ = cls._get_dask_paths_for_slurm() return info_address @classmethod - @property def dask_run_status(cls) -> str: """dask_run_status.txt path""" _, _, run_status = cls._get_dask_paths_for_slurm() @@ -322,7 +320,7 @@ async def start_worker(scheduler_address: str) -> Worker: memory_limit=memory_limit, ) await worker.finished() - return worker + return worker # type: ignore[no-any-return] async def start_nanny(scheduler_address: str) -> Nanny: nanny = await Nanny( @@ -331,7 +329,7 @@ async def start_nanny(scheduler_address: str) -> Nanny: memory_limit=memory_limit, ) await nanny.finished() - return nanny + return nanny # type: ignore[no-any-return] scheduler_address = str(dask_info["scheduler_address"]) n_workers = int(str(dask_info["n_workers_per_node"])) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index fc60e92c..b9e6362d 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -138,8 +138,8 @@ class FileHandler: ├── └── - LTM stand for long-term-memory (FileHandler.ltm) and STM for short-term-memory - (FileHandler.stm). The data-products usually get into in the STM directory. + LTM stand for long-term-memory (FileHandler.ltm()) and STM for short-term-memory + (FileHandler.stm()). The data-products usually get into in the STM directory. FileHanlder can be used the same way as `tempfile.TemporaryDirectory` using `with`. """ @@ -147,13 +147,11 @@ class FileHandler: root: str = _get_tmp_dir() @classmethod - @property def ltm(cls) -> str: """LTM (long-term-memory) path.""" return os.path.join(cls.root, _get_cache_dir(term="long")) @classmethod - @property def stm(cls) -> str: """STM (short-term-memory) path.""" return os.path.join(cls.root, _get_cache_dir(term="short")) @@ -168,9 +166,9 @@ def __init__( @staticmethod def _get_term_dir(term: _LongShortTermType) -> str: if term == "short": - dir_ = FileHandler.stm + dir_ = FileHandler.stm() elif term == "long": - dir_ = FileHandler.ltm + dir_ = FileHandler.ltm() else: assert_never(term) return dir_ From 5906ecc8d352acec827dea310fa7899cacb99369 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 29 Jan 2024 17:24:30 +0100 Subject: [PATCH 200/207] updated singularity-doc :strawberry: --- doc/src/container.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/src/container.md b/doc/src/container.md index a8e6faf1..f63efe48 100644 --- a/doc/src/container.md +++ b/doc/src/container.md @@ -28,8 +28,6 @@ This will start a server on the same port as forwarded. Then copy the url which ## Singularity Containers -**Note:** Currently, building a Singularity container from our docker-registry and run karabo within it doesn't work properly. This is work in progress. Therfore, the following doc regarding Singularity are not relevant. - Singularity containers are often standard on HPC clusters, which do not require special permissions (unlike Docker). We do not provide ready-made [Singularity containers](https://sylabs.io/). However, they can be easily created from Docker images with the following command (may take a while). You may first have to load the module if it's not available `module load singularity`: @@ -37,7 +35,7 @@ We do not provide ready-made [Singularity containers](https://sylabs.io/). Howev singularity pull docker://ghcr.io/i4ds/karabo-pipeline ``` -This creates a `.sif` file which acts as a singularity image and can be used to launch your application. How to use Singularity containers (e.g. mount directories or enable gpu-support) can be seen in the [Singularity documentation](https://docs.sylabs.io/guides/3.1/user-guide/cli.html). Be aware that Singularity mounts the home-directory by default if start a container from your home-directory, which may not be desirable (e.g. `conda init` is done through .bashrc of the image). Be sure to disable this behavior by setting the `--no-home` flag when starting a container. +This creates a `.sif` file which acts as a singularity image and can be used to launch your application. How to use Singularity containers (e.g. mount directories or enable gpu-support) can be seen in the [Singularity documentation](https://docs.sylabs.io/guides/3.1/user-guide/cli.html). Be aware that Singularity mounts the home-directory by default if start a container from your home-directory, which may or may not be desirable (e.g. `conda init` of your home instead of the container could get executed if you execute a Singularity container interactively). Therefore, for interactive access of a Karabo Singularity container, we suggest to use the `--no-home` flag. ## Sarus Containers From 3ecb100919c004b38129728e5fa813b1e8881664 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Tue, 30 Jan 2024 13:08:46 +0100 Subject: [PATCH 201/207] addressed PR-requests :bamboo: --- .github/workflows/test-user-package.yml | 1 + doc/src/development.md | 2 +- karabo/__init__.py | 2 +- karabo/util/file_handler.py | 4 ++-- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-user-package.yml b/.github/workflows/test-user-package.yml index 760fe100..47a90ac0 100644 --- a/.github/workflows/test-user-package.yml +++ b/.github/workflows/test-user-package.yml @@ -50,6 +50,7 @@ jobs: export RUN_NOTEBOOK_TESTS=false conda install -y -n base conda-libmamba-solver conda config --set solver libmamba + conda config --env --set channel_priority true conda create -y -n karabo-env python=3.9 conda activate karabo-env conda install -y -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline=$KARABO_VERSION diff --git a/doc/src/development.md b/doc/src/development.md index 1843fd65..33f8e49e 100644 --- a/doc/src/development.md +++ b/doc/src/development.md @@ -28,7 +28,7 @@ Then create a local development environment with the provided `environment.yaml` conda env create -n -f environment.yaml ``` -Then install karabo as a package and the development dependencies. +Then install Karabo as a package and the according dev-dependencies. ```shell conda activate diff --git a/karabo/__init__.py b/karabo/__init__.py index b4bc8844..e42a041c 100644 --- a/karabo/__init__.py +++ b/karabo/__init__.py @@ -28,7 +28,7 @@ # Setup dask for slurm if "SLURM_JOB_ID" in os.environ: - # ugly workaraound to not import stuff not available at build-time, but on import. + # ugly workaraound to not import stuff not available at build-time from karabo.util.dask import prepare_slurm_nodes_for_dask prepare_slurm_nodes_for_dask() diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 230296d2..9fa6b04e 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -19,7 +19,7 @@ def _get_tmp_dir() -> str: Defined env-var-dir > scratch-dir > tmp-dir Honors TMPDIR and TMP environment variable(s). - The only thing not allowed is a collision between the mentioned env-vars. + Setting 'TMPDIR' & 'TMP' differently is ambiguous, thus it's not allowed. Returns: path of tmpdir @@ -41,7 +41,7 @@ def _get_tmp_dir() -> str: if TMP != env_check: raise RuntimeError( f"Environment variables collision: TMP={TMP} != " - + f"{environment_varname}={env_check}" + + f"{environment_varname}={env_check} which is ambiguous." ) else: tmpdir = os.path.abspath(TMP) From d001308db4eb4cd66ccfb61c247e9376b86e2fee Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 1 Feb 2024 14:01:26 +0100 Subject: [PATCH 202/207] addressed PR526 requests :four_leaf_clover: --- conda/meta.yaml | 4 ++-- doc/src/installation_user.md | 21 ++++++++++++--------- environment.yaml | 4 ++-- karabo/__init__.py | 9 +++++++-- karabo/test/test_utils.py | 7 +++++-- karabo/util/file_handler.py | 13 ++++++++----- 6 files changed, 36 insertions(+), 22 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 11e84e9c..408144f4 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -28,12 +28,12 @@ requirements: - bdsf =1.10.2=*_0 - bluebild =0.1.0=*_0 - cuda-cudart - - dask >=2022.12.1 + - dask =2022.12.1 - dask-mpi - distributed - eidos =1.1.0=*_0 - healpy - - h5py + - h5py =*=mpi_mpich* - ipython - katbeam =0.1.0=*_0 - libcufft diff --git a/doc/src/installation_user.md b/doc/src/installation_user.md index c370532c..e1566198 100644 --- a/doc/src/installation_user.md +++ b/doc/src/installation_user.md @@ -4,25 +4,25 @@ - Linux or Windows with WSL. For macOS we recommend you use [Docker](container.md), starting with version 0.18.1 of the image. - 8GB RAM - 10GB disk space -- GPU-acceleration requires proprietary nVidia drivers/CUDA >= 11.7 +- GPU-acceleration requires proprietary nVidia drivers/CUDA >= 11 ## Install Karabo The following steps will install Karabo and its prerequisites (miniconda): -**Install conda** ```shell +# install conda & solver wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh bash Miniconda3-latest-Linux-x86_64.sh -source .bashrc +source ~/miniconda3/bin/activate +conda init bash conda install -n base conda-libmamba-solver conda config --set solver libmamba -conda config --env --set channel_priority true -``` - -**Install Package** -```shell -conda create -n karabo +# setup virtual environment +conda create -n karabo python=3.9 conda activate karabo +conda config --env --set solver libmamba +conda config --env --set channel_priority true +# install karabo conda install -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline ``` @@ -30,6 +30,9 @@ Karabo versions older than `v0.15.0` are deprecated and therefore installation w ## Update to latest Karabo version A Karabo installation can be updated the following way: + +Note: Even though we care about not introducing API-breaking changes through different minor releases of Karabo, we don't guarantee it. + ``` conda update -c nvidia/label/cuda-11.7.0 -c i4ds -c conda-forge karabo-pipeline ``` diff --git a/environment.yaml b/environment.yaml index 738bd1c8..36fa6581 100644 --- a/environment.yaml +++ b/environment.yaml @@ -9,12 +9,12 @@ dependencies: # package-version & build-number of Karabo-Feedstock deps should - bdsf =1.10.2=*_0 - bluebild =0.1.0=*_0 - cuda-cudart - - dask >=2022.12.1 + - dask =2022.12.1 - dask-mpi - distributed - eidos =1.1.0=*_0 - healpy - - h5py # has mpich-wheel, but is not compatible with apt-compiled binary (see PR #526) + - h5py =*=mpi_mpich* - ipython - katbeam =0.1.0=*_0 - libcufft diff --git a/karabo/__init__.py b/karabo/__init__.py index e42a041c..e57eb831 100644 --- a/karabo/__init__.py +++ b/karabo/__init__.py @@ -26,9 +26,14 @@ # https://stackoverflow.com/questions/6543847/setting-ld-library-path-from-inside-python os.execv(sys.executable, ["python"] + sys.argv) -# Setup dask for slurm if "SLURM_JOB_ID" in os.environ: - # ugly workaraound to not import stuff not available at build-time + # if-statement is an ugly workaraound to not import pkgs not available at + # build/install-time. This is something which is happening if you install the + # dependencies of Karabo through pip. Then, `versioneer`` determines the current + # version of Karabo automatically, which is done through this root-init-file. + # But because this is happening at build/install-time, the dependencies of Karabo + # are not yet available in the venv, and therefore the installation of the + # dependencies will fail. from karabo.util.dask import prepare_slurm_nodes_for_dask prepare_slurm_nodes_for_dask() diff --git a/karabo/test/test_utils.py b/karabo/test/test_utils.py index 759ae9cb..f6fdebb6 100644 --- a/karabo/test/test_utils.py +++ b/karabo/test/test_utils.py @@ -12,14 +12,17 @@ def test_is_cuda_available(): CUDA_AVAILABLE = is_cuda_available() -@pytest.mark.skipif(CUDA_AVAILABLE, reason="Doesn't make sense if cuda is available") +@pytest.mark.skipif( + CUDA_AVAILABLE, + reason="get-gpu-memory thorows a RuntimeError only if cuda is not available", +) def test_gpu_memory_error(): with pytest.raises(RuntimeError): get_gpu_memory() @pytest.mark.skipif( - not CUDA_AVAILABLE, reason="Test doesn't make sense if cuda is not available" + not CUDA_AVAILABLE, reason="get-gpu-memory works only if cuda is available" ) def test_get_gpu_memory(): memory = get_gpu_memory() diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 9fa6b04e..5d75410b 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -13,13 +13,15 @@ from karabo.util.plotting_util import Font -def _get_tmp_dir() -> str: - """Gets the according tmpdir. +def _get_disk_cache_root() -> str: + """Gets the root-directory of the disk-cache. Defined env-var-dir > scratch-dir > tmp-dir Honors TMPDIR and TMP environment variable(s). - Setting 'TMPDIR' & 'TMP' differently is ambiguous, thus it's not allowed. + + Raises: + RuntimeError: If 'TMPDIR' & 'TMP' are set differently which is ambiguous. Returns: path of tmpdir @@ -51,19 +53,20 @@ def _get_tmp_dir() -> str: def _get_cache_dir() -> str: - """Gets a default cache-dir. + """Creates a deterministic & user-specific cache-dir-name. dir-name: karabo-($USER-)<10-rnd-asci-letters-and-digits> Returns: path of cache-dir """ - tmpdir = _get_tmp_dir() + tmpdir = _get_disk_cache_root() delimiter = "-" prefix = "karabo" user = os.environ.get("USER") if user is not None: prefix = delimiter.join((prefix, user)) + random.seed(prefix) suffix = "".join(random.choices(string.ascii_letters + string.digits, k=10)) cache_dir_name = delimiter.join((prefix, suffix)) cache_dir = os.path.join(tmpdir, cache_dir_name) From 745d0185597bc239a2cde88fb1b7c78ffbdb3d14 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 1 Feb 2024 15:27:33 +0100 Subject: [PATCH 203/207] bugfix removed set libmamba-solver globally in installation-user.md :package: --- doc/src/installation_user.md | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/src/installation_user.md b/doc/src/installation_user.md index e1566198..76f9a015 100644 --- a/doc/src/installation_user.md +++ b/doc/src/installation_user.md @@ -16,7 +16,6 @@ bash Miniconda3-latest-Linux-x86_64.sh source ~/miniconda3/bin/activate conda init bash conda install -n base conda-libmamba-solver -conda config --set solver libmamba # setup virtual environment conda create -n karabo python=3.9 conda activate karabo From 266c883a19a83238b7c0445105b2928d687d8750 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 1 Feb 2024 15:39:51 +0100 Subject: [PATCH 204/207] made plot-function api more consistent :cold_sweat: --- karabo/imaging/image.py | 10 +++------- karabo/simulation/beam.py | 2 +- karabo/test/test_long_observation.py | 2 -- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/karabo/imaging/image.py b/karabo/imaging/image.py index 0a128005..e7de02b8 100644 --- a/karabo/imaging/image.py +++ b/karabo/imaging/image.py @@ -2,7 +2,6 @@ import logging import os -import uuid from typing import ( Any, Callable, @@ -476,7 +475,7 @@ def plot_power_spectrum( self, resolution: float = 5.0e-4, signal_channel: Optional[int] = None, - save_png: bool = False, + path: Optional[FilePathType] = None, ) -> None: """ Plot the power spectrum of this image. @@ -501,11 +500,8 @@ def plot_power_spectrum( plt.gca().set_ylim(1e-6 * max_profile, 2.0 * max_profile) plt.tight_layout() - if save_png: - power_spectrum_name = ( - self._fname if self._fname is not None else uuid.uuid4() - ) - plt.savefig(f"./power_spectrum_{power_spectrum_name}") + if path is not None: + plt.savefig(path) plt.show(block=False) plt.pause(1) diff --git a/karabo/simulation/beam.py b/karabo/simulation/beam.py index 98ab344b..07023468 100644 --- a/karabo/simulation/beam.py +++ b/karabo/simulation/beam.py @@ -233,7 +233,7 @@ def show_eidos_beam( B_ah: NDArray[np.complex_], path: Optional[str] = None, ) -> None: - f, ax = plt.subplots(2, 2) + _, ax = plt.subplots(2, 2) log10_notzero = 10 ** (-10) ax00 = ax[0, 0] ax01 = ax[0, 1] diff --git a/karabo/test/test_long_observation.py b/karabo/test/test_long_observation.py index 32a6475d..7d94b18e 100644 --- a/karabo/test/test_long_observation.py +++ b/karabo/test/test_long_observation.py @@ -133,5 +133,3 @@ def test_long_observations(tobject: TFiles, sky_data: NDArray[np.float64]): # imaging cellsize is over-written in the Imager based on max uv dist. imager = Imager(visibility, imaging_npixel=4096, imaging_cellsize=1.0e-5) imager.get_dirty_image() - # dirty.write_to_file("./test/result/beam/beam_vis.fits",overwrite=True) - # dirty.plot(colobar_label="Flux Density (Jy)", filename="combine_vis.png") From 02268ab7beca5db72c9cfe8acc08dda3a024cd82 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 5 Feb 2024 14:36:49 +0100 Subject: [PATCH 205/207] refactored DaskHandler to use just a single DaskHandler class for function-calling purpose :musical_score: --- doc/src/examples/example_structure.md | 20 +- karabo/__init__.py | 4 +- karabo/test/test_dask.py | 112 +++-- karabo/util/dask.py | 596 +++++++++++++++++--------- 4 files changed, 468 insertions(+), 264 deletions(-) diff --git a/doc/src/examples/example_structure.md b/doc/src/examples/example_structure.md index 4daa7914..c6e15795 100644 --- a/doc/src/examples/example_structure.md +++ b/doc/src/examples/example_structure.md @@ -51,7 +51,7 @@ Following these guidelines will help ensure that you get the most out of Karabo' - tuple: A tuple containing the results of the iterate_function for each element in the iterable. Results are gathered using Dask's compute function. ### Additional Notes -It's important when working on a `Slurm Cluster` to call DaskSlurmHandler.setup() at the beginning. +It's important when working on a `Slurm Cluster` to call DaskHandler.setup() at the beginning. If 'verbose' is specified in kwargs and is set to True, progress messages will be printed during processing. @@ -75,39 +75,39 @@ DaskHandler.parallelize_with_dask(my_function, my_iterable, *args, **kwargs) # T ## Use Karabo on a SLURM cluster -Karabo manages all available nodes through Dask, making the computational power conveniently accessible for the user. The `DaskSlurmHandler` class streamlines the creation of a Dask client and offers a user-friendly interface for interaction. This class contains static variables to midify the behavior of a Dask client, if they've changed before creating a client. +Karabo manages all available nodes through Dask, making the computational power conveniently accessible for the user. The `DaskHandler` class streamlines the creation of a Dask client and offers a user-friendly interface for interaction. This class contains static variables to midify the behavior of a Dask client, if they've changed before creating a client. -While users are not required to interact with Dask directly - thanks to the background processes managed by Karabo - the Dask client should to be initialized at the beginning of your script with `DaskSlurmHandler.setup` (see example below). This has to do with the spawning of new processes when creating `Nanny` processes. +While users are not required to interact with Dask directly - thanks to the background processes managed by Karabo - the Dask client should to be initialized at the beginning of your script with `DaskHandler.setup` (see example below). This has to do with the spawning of new processes when creating `Nanny` processes. If you just need the client itself, then no `setup()` is needed. ```python -from karabo.util.dask import DaskSlurmHandler +from karabo.util.dask import DaskHandler if __name__ == "__main__": # Get the Dask client - client = DaskSlurmHandler.get_dask_client() # Not needed anymore to call .setup() + client = DaskHandler.get_dask_client() # Not needed anymore to call .setup() # Use the client as needed result = client.submit(my_function, *args) ``` ```python -from karabo.util.dask import DaskSlurmHandler +from karabo.util.dask import DaskHandler if __name__ == "__main__": - DaskSlurmHandler.setup() + DaskHandler.setup() result = (*args) ``` Disable the usage of Dask by Karabo. ```python -from karabo.util.dask import DaskSlurmHandler +from karabo.util.dask import DaskHandler # Modify the static variables -DaskSlurmHandler.use_dask = False +DaskHandler.use_dask = False ``` -Please also check out the `DaskSlurmHandler` under `karabo.util.dask` for more information. +Please also check out the `DaskHandler` under `karabo.util.dask` for more information. ### Dask Dashboard The Dask dashboard link should be printed in stdout. Just copy the link into your browser, and then you're able to observe the current dask-process. If you run Karabo on a VM without access to a browser and internet, you can use `port forwarding` to access the Dask Dashboard from your local machine. In `VSCODE`, this can be done directly when using the "PORTS" tab. diff --git a/karabo/__init__.py b/karabo/__init__.py index 5dc3acd0..af92a5e9 100644 --- a/karabo/__init__.py +++ b/karabo/__init__.py @@ -34,9 +34,9 @@ # But because this is happening at build/install-time, the dependencies of Karabo # are not yet available in the venv, and therefore the installation of the # dependencies will fail. - from karabo.util.dask import DaskSlurmHandler + from karabo.util.dask import DaskHandlerSlurm - DaskSlurmHandler.prepare_slurm_nodes_for_dask() + DaskHandlerSlurm._prepare_slurm_nodes_for_dask() # set rascil data directory environment variable # see https://ska-telescope.gitlab.io/external/rascil/RASCIL_install.html diff --git a/karabo/test/test_dask.py b/karabo/test/test_dask.py index dc2d5e0c..37dc89ea 100644 --- a/karabo/test/test_dask.py +++ b/karabo/test/test_dask.py @@ -6,14 +6,7 @@ import pytest from dask import compute # type: ignore[attr-defined] -from karabo.util.dask import DaskHandler, DaskSlurmHandler, fetch_dask_handler - - -@pytest.fixture(scope="module") -def setup_dask() -> None: - dask_handler = fetch_dask_handler() - dask_handler.setup() - +from karabo.util.dask import DaskHandler, DaskHandlerSlurm _EnvVarsType = Dict[str, str] @@ -30,47 +23,47 @@ def env_vars() -> Dict[str, str]: def test_get_min_max_of_node_id(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_min_max_of_node_id() == (4397, 4406) + assert DaskHandlerSlurm._get_min_max_of_node_id() == (4397, 4406) def test_get_lowest_node_id(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_lowest_node_id() == 4397 + assert DaskHandlerSlurm._get_lowest_node_id() == 4397 def test_get_base_string_node_list(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_base_string_node_list() == "nid0" + assert DaskHandlerSlurm._get_base_string_node_list() == "nid0" def test_get_lowest_node_name(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_lowest_node_name() == "nid04397" + assert DaskHandlerSlurm._get_lowest_node_name() == "nid04397" def test_get_number_of_nodes(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_number_of_nodes() == 10 + assert DaskHandlerSlurm.get_number_of_nodes() == 10 def test_get_node_id(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_node_id() == 4397 + assert DaskHandlerSlurm.get_node_id() == 4397 def test_get_node_name(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_node_name() == "nid04397" + assert DaskHandlerSlurm.get_node_name() == "nid04397" def test_is_first_node(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.is_first_node() is True + assert DaskHandlerSlurm.is_first_node() is True def test_is_on_slurm_cluster(env_vars: _EnvVarsType) -> None: with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.is_on_slurm_cluster() is True + assert DaskHandlerSlurm.is_on_slurm_cluster() is True # repeat the tests for other values of environment variables @@ -84,43 +77,43 @@ def test_multiple_nodes_and_ranges() -> None: "SLURM_JOB_ID": "123456", } with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_min_max_of_node_id() == (2780, 4715) - assert DaskSlurmHandler.get_lowest_node_id() == 2780 - assert DaskSlurmHandler.get_base_string_node_list() == "nid0" - assert DaskSlurmHandler.get_lowest_node_name() == "nid02780" - assert DaskSlurmHandler.get_number_of_nodes() == 3 - assert DaskSlurmHandler.get_node_id() == 2780 - assert DaskSlurmHandler.get_node_name() == "nid02780" - assert DaskSlurmHandler.is_first_node() is True - assert DaskSlurmHandler.is_on_slurm_cluster() is True + assert DaskHandlerSlurm._get_min_max_of_node_id() == (2780, 4715) + assert DaskHandlerSlurm._get_lowest_node_id() == 2780 + assert DaskHandlerSlurm._get_base_string_node_list() == "nid0" + assert DaskHandlerSlurm._get_lowest_node_name() == "nid02780" + assert DaskHandlerSlurm.get_number_of_nodes() == 3 + assert DaskHandlerSlurm.get_node_id() == 2780 + assert DaskHandlerSlurm.get_node_name() == "nid02780" + assert DaskHandlerSlurm.is_first_node() is True + assert DaskHandlerSlurm.is_on_slurm_cluster() is True # test for a different node env_vars["SLURMD_NODENAME"] = "nid04715" with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_min_max_of_node_id() == (2780, 4715) - assert DaskSlurmHandler.get_lowest_node_id() == 2780 - assert DaskSlurmHandler.get_base_string_node_list() == "nid0" - assert DaskSlurmHandler.get_lowest_node_name() == "nid02780" - assert DaskSlurmHandler.get_number_of_nodes() == 3 - assert DaskSlurmHandler.get_node_id() == 4715 - assert DaskSlurmHandler.get_node_name() == "nid04715" - assert DaskSlurmHandler.is_first_node() is False - assert DaskSlurmHandler.is_on_slurm_cluster() is True + assert DaskHandlerSlurm._get_min_max_of_node_id() == (2780, 4715) + assert DaskHandlerSlurm._get_lowest_node_id() == 2780 + assert DaskHandlerSlurm._get_base_string_node_list() == "nid0" + assert DaskHandlerSlurm._get_lowest_node_name() == "nid02780" + assert DaskHandlerSlurm.get_number_of_nodes() == 3 + assert DaskHandlerSlurm.get_node_id() == 4715 + assert DaskHandlerSlurm.get_node_name() == "nid04715" + assert DaskHandlerSlurm.is_first_node() is False + assert DaskHandlerSlurm.is_on_slurm_cluster() is True # test for a different node env_vars["SLURMD_NODENAME"] = "nid02781" with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_min_max_of_node_id() == (2780, 4715) - assert DaskSlurmHandler.get_lowest_node_id() == 2780 - assert DaskSlurmHandler.get_base_string_node_list() == "nid0" - assert DaskSlurmHandler.get_lowest_node_name() == "nid02780" - assert DaskSlurmHandler.get_number_of_nodes() == 3 - assert DaskSlurmHandler.get_node_id() == 2781 - assert DaskSlurmHandler.get_node_name() == "nid02781" - assert DaskSlurmHandler.is_first_node() is False - assert DaskSlurmHandler.is_on_slurm_cluster() is True + assert DaskHandlerSlurm._get_min_max_of_node_id() == (2780, 4715) + assert DaskHandlerSlurm._get_lowest_node_id() == 2780 + assert DaskHandlerSlurm._get_base_string_node_list() == "nid0" + assert DaskHandlerSlurm._get_lowest_node_name() == "nid02780" + assert DaskHandlerSlurm.get_number_of_nodes() == 3 + assert DaskHandlerSlurm.get_node_id() == 2781 + assert DaskHandlerSlurm.get_node_name() == "nid02781" + assert DaskHandlerSlurm.is_first_node() is False + assert DaskHandlerSlurm.is_on_slurm_cluster() is True def test_extreme_range_of_nodes() -> None: @@ -131,16 +124,16 @@ def test_extreme_range_of_nodes() -> None: "SLURM_JOB_ID": "123456", } with patch.dict(os.environ, env_vars): - assert DaskSlurmHandler.get_min_max_of_node_id() == (2780, 4715) - assert DaskSlurmHandler.get_lowest_node_id() == 2780 - assert DaskSlurmHandler.get_base_string_node_list() == "nid0" - assert DaskSlurmHandler.get_lowest_node_name() == "nid02780" - assert DaskSlurmHandler.get_number_of_nodes() == 1106 - assert DaskSlurmHandler.get_node_id() == 3333 - assert DaskSlurmHandler.get_node_name() == "nid03333" - assert DaskSlurmHandler.is_first_node() is False - assert DaskSlurmHandler.is_on_slurm_cluster() is True - assert len(DaskSlurmHandler.extract_node_ids_from_node_list()) == 1106 + assert DaskHandlerSlurm._get_min_max_of_node_id() == (2780, 4715) + assert DaskHandlerSlurm._get_lowest_node_id() == 2780 + assert DaskHandlerSlurm._get_base_string_node_list() == "nid0" + assert DaskHandlerSlurm._get_lowest_node_name() == "nid02780" + assert DaskHandlerSlurm.get_number_of_nodes() == 1106 + assert DaskHandlerSlurm.get_node_id() == 3333 + assert DaskHandlerSlurm.get_node_name() == "nid03333" + assert DaskHandlerSlurm.is_first_node() is False + assert DaskHandlerSlurm.is_on_slurm_cluster() is True + assert len(DaskHandlerSlurm._extract_node_ids_from_node_list()) == 1106 def test_single_node() -> None: @@ -150,10 +143,10 @@ def test_single_node() -> None: "SLURMD_NODENAME": "nid03038", } with patch.dict(os.environ, env_vars): - min_node_id, max_node_id = DaskSlurmHandler.get_min_max_of_node_id() + min_node_id, max_node_id = DaskHandlerSlurm._get_min_max_of_node_id() assert min_node_id == 3038 assert max_node_id == 3038 - assert DaskSlurmHandler.get_base_string_node_list() == "nid" + assert DaskHandlerSlurm._get_base_string_node_list() == "nid" def test_dask_job() -> None: @@ -193,11 +186,8 @@ def simple_function(x: int, multiplier: int = 1) -> int: return x * multiplier -def test_parallelize_with_dask(setup_dask) -> None: +def test_parallelize_with_dask() -> None: iterable = [1, 2, 3, 4, 5] - dask_handler = fetch_dask_handler() - results = dask_handler.parallelize_with_dask( - simple_function, iterable, multiplier=2 - ) + results = DaskHandler.parallelize_with_dask(simple_function, iterable, multiplier=2) expected_results = tuple([x * 2 for x in iterable]) assert results == expected_results diff --git a/karabo/util/dask.py b/karabo/util/dask.py index 3ffa1233..2a3b92bf 100644 --- a/karabo/util/dask.py +++ b/karabo/util/dask.py @@ -1,3 +1,4 @@ +"""Module for dask-related functionality.""" from __future__ import annotations import asyncio @@ -8,7 +9,7 @@ import sys import time from collections.abc import Iterable -from typing import Any, Callable, List, Optional, Tuple, Type, Union, cast +from typing import Any, Callable, List, Literal, Optional, Tuple, Type, Union, cast from warnings import warn import psutil @@ -16,77 +17,64 @@ from dask.distributed import Client, LocalCluster, Nanny, Worker from dask_mpi import initialize from mpi4py import MPI +from typing_extensions import assert_never -from karabo.util._types import IntFloat from karabo.util.data_util import extract_chars_from_string, extract_digit_from_string from karabo.util.file_handler import FileHandler from karabo.warning import KaraboWarning -def fetch_dask_handler() -> Union[Type[DaskHandler], Type[DaskSlurmHandler]]: - """Utility function to automatically choose a Handler. - - Returns: - The chosen Handler. - """ - if DaskSlurmHandler.is_on_slurm_cluster(): - return DaskSlurmHandler - return DaskHandler - - -class DaskHandler: - """ - A class for managing a Dask client. This class is a singleton, meaning that - only one instance of this class can exist at any given time. This also - allows you to create your own client and pass it to this class. +class DaskHandlerBasic: + """Base-class for dask-handler functionality. Attributes ---------- - dask_client: Optional[Client] + dask_client: The Dask client object. If None, a new client will be created. - n_workers_scheduler_node : int - The number of workers to start on the scheduler node. - memory_limit : Optional[float] + memory_limit: The memory_limit per worker in GB. If None, the memory limit will be set to the maximum available memory on the node (see documentation) in dask for `memory_limit`. - n_threads_per_worker : int + n_threads_per_worker: The number of threads to use per worker. Standard is None, which means that the number of threads will be equal to the number of cores. - use_dask: Optional[bool] - Whether to use Dask or not. If None, Dask will be used if the - current node is a SLURM node and there are more than 1 node. - use_workers_or_nannies: Optional[str] - Whether to use workers or nannies. If None, nannies will be used. - This could lead to more processing (see documentation for dask usage - in Karabo). - TIMEOUT: int - The timeout in seconds for the Dask scheduler to wait for all the - workers to connect. + use_dask: + Whether to use Dask or not. If None, then Karabo will decide whether + to use dask or not for certain tasks. + use_processes: + Use processes instead of threads? + Threads: + - Fast to initiate. + - No need to transfer data to them. + - Limited by the GIL, which allows one thread to read the code at once. + Processes: + - Take time to set up. + - Slow to transfer data to. + - Each have their own GIL and so don't need to take turns reading the code. """ dask_client: Optional[Client] = None - n_workers_scheduler_node: int = 1 - memory_limit: Optional[int] = None + memory_limit: Optional[float] = None n_threads_per_worker: Optional[int] = None use_dask: Optional[bool] = None - use_workers_or_nannies: Optional[str] = "nannies" use_proccesses: bool = False # Some packages, such as pybdsf, do not work - # with processes because they spawn subprocesses. - TIMEOUT: int = 60 - # Some internal variables - _nodes_prepared: bool = False _setup_called: bool = False @classmethod def setup(cls) -> None: + """Calls `get_dask_client`.""" _ = cls.get_dask_client() cls._setup_called = True @classmethod def get_dask_client(cls) -> Client: + """Get (create if not exists) a dask-client. + + Returns: + Dask-client. + """ if cls.dask_client is not None: return cls.dask_client if MPI.COMM_WORLD.Get_size() > 1: # TODO: testing of whole if-block @@ -98,16 +86,24 @@ def get_dask_client(cls) -> Client: cls.dask_client = Client(processes=cls.use_proccesses) # TODO: testing if MPI.COMM_WORLD.rank == 0: print(f"Dashboard link: {cls.dask_client.dashboard_link}", flush=True) - atexit.register(cls.dask_cleanup, cls.dask_client) + atexit.register(cls._dask_cleanup) else: - cls.dask_client = cls.get_local_dask_client(cls.memory_limit) + cls.dask_client = cls._get_local_dask_client() # Register cleanup function print(f"Dashboard link: {cls.dask_client.dashboard_link}", flush=True) - atexit.register(cls.dask_cleanup, cls.dask_client) + atexit.register(cls._dask_cleanup) return cls.dask_client @classmethod def should_dask_be_used(cls, override: Optional[bool] = None) -> bool: + """Util function to decide whether dask should be used or not. + + Args: + override: Override? Has highest priority. + + Returns: + Decision whether dask should be used or not. + """ if override is not None: return override elif cls.use_dask is not None: @@ -118,18 +114,61 @@ def should_dask_be_used(cls, override: Optional[bool] = None) -> bool: return False @classmethod - def calc_num_of_workers( + def parallelize_with_dask( cls, - memory_limit: Optional[IntFloat], - ) -> int: - """Estimates the number of workers considering settings and availability. + iterate_function: Callable[..., Any], + iterable: Iterable[Any], + *args: Any, + **kwargs: Any, + ) -> Union[Any, Tuple[Any, ...], List[Any]]: + """ + Run a function over an iterable in parallel using dask, and gather the results. + + args & kwargs will get passed to `Delayed`. Args: - memory_limit: Memory constraint. + iterate_function: The function to be applied to each element of `iterable`. + The function takes the current element of the iterable as its first + argument, followed by any positional arguments, and then any keyword + arguments. + + iterable + The iterable over which the function will be applied. Each element of + `iterable` will be passed to `iterate_function`. + + Returns: A tuple containing the results of the `iterate_function` for each + element in the iterable. The results are gathered using dask's `compute` + function. + """ + if not cls._setup_called: + cls.setup() + + delayed_results = [] + + for element in iterable: + if "verbose" in kwargs and kwargs["verbose"]: + print(f"Processing element {element}...\nExtracting data...") + + delayed_result = delayed(iterate_function)(element, *args, **kwargs) + delayed_results.append(delayed_result) + + return compute(*delayed_results, scheduler="distributed") + + @classmethod + def _dask_cleanup(cls) -> None: + """Shutdown & close `cls.dask_client`.""" + if cls.dask_client is not None: + cls.dask_client.shutdown() + cls.dask_client.close() + + @classmethod + def _calc_num_of_workers(cls) -> int: + """Estimates the number of workers considering settings and availability. Returns: Etimated number of workers. """ + memory_limit = cls.memory_limit if memory_limit is None: return 1 # Calculate number of workers @@ -158,12 +197,13 @@ def calc_num_of_workers( return n_workers @classmethod - def get_local_dask_client( - cls, - memory_limit: Optional[IntFloat], - ) -> Client: - # Calculate number of workers per node - n_workers = cls.calc_num_of_workers(memory_limit) + def _get_local_dask_client(cls) -> Client: + """Creates a local dask-client. + + Returns: + Created dask-client. + """ + n_workers = cls._calc_num_of_workers() client = Client( n_workers=n_workers, threads_per_worker=cls.n_threads_per_worker, @@ -171,116 +211,112 @@ def get_local_dask_client( ) return client - @classmethod - def parallelize_with_dask( - cls, - iterate_function: Callable[..., Any], - iterable: Iterable[Any], - *args: Any, - **kwargs: Any, - ) -> Union[Any, Tuple[Any, ...], List[Any]]: - """ - Run a function over an iterable in parallel using Dask, and gather the results. - - Parameters - ---------- - iterate_function : callable - The function to be applied to each element of the iterable. The function - should take the current element of the iterable as its first argument, - followed by any positional arguments, and then any keyword arguments. - - iterable : iterable - The iterable over which the function will be applied. Each element of this - iterable will be passed to the `iterate_function`. - - *args : tuple - Positional arguments that will be passed to the `iterate_function` after the - current element of the iterable. - - **kwargs : dict - Keyword arguments that will be passed to the `iterate_function`. - - Returns - ------- - tuple - A tuple containing the results of the `iterate_function` for each element in - the iterable. The results are gathered using Dask's compute function. - - Notes - ----- - - If 'verbose' is present in **kwargs and is set to True, additional progress - messages will be printed. - - This function utilizes the distributed scheduler of Dask. - """ - if not cls._setup_called: - cls.setup() - - delayed_results = [] - - for element in iterable: - if "verbose" in kwargs and kwargs["verbose"]: - print(f"Processing element {element}...\nExtracting data...") - delayed_result = delayed(iterate_function)(element, *args, **kwargs) - delayed_results.append(delayed_result) - - return compute(*delayed_results, scheduler="distributed") +class DaskHandlerSlurm(DaskHandlerBasic): + """Dask-handler for slurm-based jobs. - @classmethod - def dask_cleanup(cls, client: Client) -> None: - if client is not None: - client.shutdown() - client.close() - - -class DaskSlurmHandler(DaskHandler): - """Dask & Slurm related functionality resides here.""" + Attributes + ---------- + use_workers_or_nannies: + Whether to use workers or nannies (default). + This could lead to more processing (see documentation for dask usage + in Karabo). + n_workers_scheduler_node : int + The number of workers to start on the scheduler node. + timeout: int + Timeout in seconds for the dask-scheduler to wait for all the + workers to connect. + """ - @classmethod - def dask_info_address(cls) -> str: - """dask_info.json path""" - _, info_address, _ = cls._get_dask_paths_for_slurm() - return info_address + use_workers_or_nannies: Literal["workers", "nannies"] = "nannies" + n_workers_scheduler_node: int = 1 + # with processes because they spawn subprocesses. + timeout: int = 60 - @classmethod - def dask_run_status(cls) -> str: - """dask_run_status.txt path""" - _, _, run_status = cls._get_dask_paths_for_slurm() - return run_status + _nodes_prepared: bool = False @classmethod def get_dask_client(cls) -> Client: + """Get (create if not exists) a dask-client for a SLURM environment. + + Returns: + Dask-client. + """ dask_client = cls.dask_client if dask_client is not None: return dask_client if not cls._setup_called and cls.is_first_node(): cls.setup() if cls.get_number_of_nodes() > 1: - dask_client = cast( # hacky workaround - Client, - cls.setup_dask_for_slurm( - cls.n_workers_scheduler_node, - cls.memory_limit, - ), + dask_client = cast( # dask_client is None if not first-node + Client, # however, needed workaround to keep api-compatibility + cls._setup_dask_for_slurm(), ) if dask_client is not None: cls.dask_client = dask_client return dask_client else: - cls.dask_client = super(DaskSlurmHandler, cls).get_dask_client() + cls.dask_client = super(DaskHandlerSlurm, cls).get_dask_client() return cls.dask_client @classmethod - def prepare_slurm_nodes_for_dask(cls) -> None: - # Detect if we are on a slurm cluster + def should_dask_be_used(cls, override: Optional[bool] = None) -> bool: + """Util function to decide whether dask should be used or not. + + This implementation differs a bit from the basic-class, where + on SLURM-systems, additional checks are taken into consideration. + + Args: + override: Override? Has highest priority. + + Returns: + Decision whether dask should be used or not. + """ + if override is not None: + return override + elif cls.use_dask is not None: + return cls.use_dask + elif cls.dask_client is not None: + return True + elif cls.is_on_slurm_cluster() and cls.get_number_of_nodes() > 1: + return True + else: + return False + + @classmethod + def _dask_cleanup(cls) -> None: + """Shutdown & close `cls.dask_client`. + + In addition, `dask_info_dir` will get removed if exists. + """ + dask_info_dir, _, _ = cls._get_dask_paths_for_slurm() + if os.path.exists(dask_info_dir) and os.path.isdir(dask_info_dir): + shutil.rmtree(dask_info_dir) + + super(DaskHandlerSlurm, cls)._dask_cleanup() + + @classmethod + def _dask_info_address(cls) -> str: + """dask_info.json path.""" + _, info_address, _ = cls._get_dask_paths_for_slurm() + return info_address + + @classmethod + def _dask_run_status(cls) -> str: + """dask_run_status.txt path.""" + _, _, run_status = cls._get_dask_paths_for_slurm() + return run_status + + @classmethod + def _prepare_slurm_nodes_for_dask(cls) -> None: + """Prepares slurm-nodes for dask-usage.""" if not cls.is_on_slurm_cluster() or cls.get_number_of_nodes() <= 1: cls.use_dask = False - return elif ( cls.is_first_node() and cls.dask_client is None and not cls._nodes_prepared ): cls._nodes_prepared = True - slurm_job_nodelist = cls.get_job_nodelist() + slurm_job_nodelist = cls._get_job_nodelist() slurm_node_name = cls.get_node_name() print( f""" @@ -297,7 +333,8 @@ def prepare_slurm_nodes_for_dask(cls) -> None: pass @classmethod - def setup_nannies_workers_for_slurm(cls) -> None: + def _setup_nannies_workers_for_slurm(cls) -> None: + """Setup nannies & workers.""" # Wait until dask info file is created _, dask_info_address, dask_run_status = cls._get_dask_paths_for_slurm() while not os.path.exists(dask_info_address): @@ -340,9 +377,11 @@ async def start_nanny(scheduler_address: str) -> Nanny: if cls.use_workers_or_nannies == "workers": worker = asyncio.run(start_worker(scheduler_address)) workers_or_nannies.append(worker) - else: + elif cls.use_workers_or_nannies == "nannies": nanny = asyncio.run(start_nanny(scheduler_address)) workers_or_nannies.append(nanny) + else: + assert_never(cls.use_workers_or_nannies) # Keep the process alive while os.path.exists(dask_run_status): @@ -354,20 +393,25 @@ async def start_nanny(scheduler_address: str) -> Nanny: if result == "OK": pass else: + if isinstance(worker_or_nanny, Worker): + instance = "worker" + else: + instance = "nanny" print( - "There was an issue closing the worker or nanny at " - + f"{worker_or_nanny.address}" + f"There was an issue closing {instance} {worker_or_nanny.address}", + file=sys.stderr, ) # Stop the script successfully sys.exit(0) @classmethod - def setup_dask_for_slurm( - cls, - n_workers_scheduler_node: int, - memory_limit: Optional[IntFloat], - ) -> Optional[Client]: + def _setup_dask_for_slurm(cls) -> Optional[Client]: + """Setup dask for slurm. + + Returns: + A dask-client if it's the first node, otherwise None. + """ if cls.is_first_node(): _, dask_info_address, dask_run_status = cls._get_dask_paths_for_slurm() # Create file to show that the run is still ongoing @@ -377,13 +421,13 @@ def setup_dask_for_slurm( # Create client and scheduler cluster = LocalCluster( ip=cls.get_node_name(), - n_workers=n_workers_scheduler_node, + n_workers=cls.n_workers_scheduler_node, threads_per_worker=cls.n_threads_per_worker, ) dask_client = Client(cluster, proccesses=cls.use_proccesses) # Calculate number of workers per node - n_workers_per_node = cls.calc_num_of_workers(memory_limit) + n_workers_per_node = cls._calc_num_of_workers() # Create dictionary with the information dask_info = { @@ -398,10 +442,10 @@ def setup_dask_for_slurm( # Wait until all workers are connected n_workers_requested = ( cls.get_number_of_nodes() - 1 - ) * n_workers_per_node + n_workers_scheduler_node + ) * n_workers_per_node + cls.n_workers_scheduler_node dask_client.wait_for_workers( - n_workers=n_workers_requested, timeout=cls.TIMEOUT + n_workers=n_workers_requested, timeout=cls.timeout ) print( @@ -410,12 +454,12 @@ def setup_dask_for_slurm( return dask_client else: - cls.setup_nannies_workers_for_slurm() + cls._setup_nannies_workers_for_slurm() return None @classmethod def _get_dask_paths_for_slurm(cls) -> Tuple[str, str, str]: - """Gets dask-file paths for SLURM setup. + """Gets dask-file paths for slurm setup. This needs to be a function, to enable the `FileHandler` lazy path-loading, hence allowing path-changes at run-time. @@ -423,7 +467,7 @@ def _get_dask_paths_for_slurm(cls) -> Tuple[str, str, str]: Returns: dask_info_dir, dask-info-address, dask-run-status """ - slurm_job_id = cls.get_job_id() + slurm_job_id = cls._get_job_id() prefix = f"-dask-info-slurm-{slurm_job_id}-" dask_info_dir = FileHandler().get_tmp_dir( prefix=prefix, @@ -436,14 +480,19 @@ def _get_dask_paths_for_slurm(cls) -> Tuple[str, str, str]: return dask_info_dir, dask_info_address, dask_run_status @classmethod - def extract_node_ids_from_node_list(cls) -> List[int]: - slurm_job_nodelist = cls.get_job_nodelist() + def _extract_node_ids_from_node_list(cls) -> List[int]: + """Extracts all node-ids of the current slurm-job as a list. + + Returns: + Node-ids. + """ + slurm_job_nodelist = cls._get_job_nodelist() if cls.get_number_of_nodes() == 1: # Node name will be something like "psanagpu115" return [extract_digit_from_string(slurm_job_nodelist)] node_list = slurm_job_nodelist.split("[")[1].split("]")[0] id_ranges = node_list.split(",") - node_ids = [] + node_ids: List[int] = [] for id_range in id_ranges: if "-" in id_range: min_id, max_id = id_range.split("-") @@ -454,80 +503,245 @@ def extract_node_ids_from_node_list(cls) -> List[int]: return node_ids @classmethod - def dask_cleanup(cls, client: Client) -> None: - dask_info_dir, _, _ = cls._get_dask_paths_for_slurm() - if os.path.exists(dask_info_dir) and os.path.isdir(dask_info_dir): - shutil.rmtree(dask_info_dir) + def _get_min_max_of_node_id(cls) -> Tuple[int, int]: + """Returns the min max from SLURM_JOB_NODELIST. - super(DaskSlurmHandler, cls).dask_cleanup(client=client) + Returns: + Min & Max node-ids. + """ + node_list = cls._extract_node_ids_from_node_list() + return min(node_list), max(node_list) @classmethod - def should_dask_be_used(cls, override: Optional[bool] = None) -> bool: - if override is not None: - return override - elif cls.use_dask is not None: - return cls.use_dask - elif cls.dask_client is not None: - return True - elif cls.is_on_slurm_cluster() and cls.get_number_of_nodes() > 1: - return True - else: - return False + def _get_lowest_node_id(cls) -> int: + """Get the lowest slurm node-id. - @classmethod - def get_min_max_of_node_id(cls) -> Tuple[int, int]: - """ - Returns the min max from SLURM_JOB_NODELIST. - Works if it's run only on two nodes (separated with a comma) - of if it runs on more than two nodes (separated with a dash). + Returns: + Lowest node-id. """ - node_list = cls.extract_node_ids_from_node_list() - return min(node_list), max(node_list) + return cls._get_min_max_of_node_id()[0] @classmethod - def get_lowest_node_id(cls) -> int: - return cls.get_min_max_of_node_id()[0] + def _get_base_string_node_list(cls) -> str: + """Gets the node-list base-string. - @classmethod - def get_base_string_node_list(cls) -> str: - slurm_job_nodelist = cls.get_job_nodelist() + Returns: + Node-list base-string. + """ + slurm_job_nodelist = cls._get_job_nodelist() if cls.get_number_of_nodes() == 1: return extract_chars_from_string(slurm_job_nodelist) else: return slurm_job_nodelist.split("[")[0] @classmethod - def get_lowest_node_name(cls) -> str: - return cls.get_base_string_node_list() + str(cls.get_lowest_node_id()) + def _get_lowest_node_name(cls) -> str: + """Gets the lowest node-name. + + Returns: + Lowest node-name. + """ + return cls._get_base_string_node_list() + str(cls._get_lowest_node_id()) @classmethod def get_number_of_nodes(cls) -> int: + """Gets the number of nodes of the slurm-job. + + Returns: + Number of nodes. + """ n_nodes = os.environ["SLURM_JOB_NUM_NODES"] return int(n_nodes) @classmethod def get_node_id(cls) -> int: + """Gets the current node-id. + + Returns: + Node-id. + """ # Attention, often the node id starts with a 0. slurmd_nodename = cls.get_node_name() - len_id = len(cls.get_base_string_node_list()) + len_id = len(cls._get_base_string_node_list()) return int(slurmd_nodename[-len_id:]) @classmethod def get_node_name(cls) -> str: + """Gets the current node-name. + + Returns: + Node-name. + """ return os.environ["SLURMD_NODENAME"] @classmethod def is_first_node(cls) -> bool: - return cls.get_node_id() == cls.get_lowest_node_id() + """Util function to check if current-node is fist-node. + + Returns: + Check-result. + """ + return cls.get_node_id() == cls._get_lowest_node_id() @classmethod - def get_job_nodelist(cls) -> str: + def _get_job_nodelist(cls) -> str: + """Gets the nodelist of the current job as an `str`. + + Returns: + Nodelist of current job. + """ return os.environ["SLURM_JOB_NODELIST"] @classmethod - def get_job_id(cls) -> str: + def _get_job_id(cls) -> str: + """Gets the current job-id as an `str`. + + Returns: + Job-id. + """ return os.environ["SLURM_JOB_ID"] @classmethod def is_on_slurm_cluster(cls) -> bool: + """Util function to check if code is running in a slurm-job. + + Returns: + Check-result. + """ return "SLURM_JOB_ID" in os.environ + + +def _select_dask_handler() -> Type[DaskHandlerBasic]: + """Selects a dask-handler class. + + Returns: + Chosen dask-handler class. + """ + if DaskHandlerSlurm.is_on_slurm_cluster(): + return DaskHandlerSlurm + return DaskHandlerBasic + + +class DaskHandler(DaskHandlerBasic): + """Public & dev API for dask associated functionality. + + This is the public dask-api for Karabo, where you don't have to worry which + dask-handler of this module to use. You can do almost everything through this + class. The only exception is, if you want to adjust the default settings on + a slurm-system (customization through `DaskHandlerSlurm`). + + Attributes + ---------- + dask_client: + The Dask client object. If None, a new client will be created. + memory_limit: + The memory_limit per worker in GB. If None, the memory limit will + be set to the maximum available memory on the node (see documentation) + in dask for `memory_limit`. + n_threads_per_worker: + The number of threads to use per worker. Standard is None, which + means that the number of threads will be equal to the number of + cores. + use_dask: + Whether to use Dask or not. If None, then Karabo will decide whether + to use dask or not for certain tasks. + use_processes: + Use processes instead of threads? + Threads: + - Fast to initiate. + - No need to transfer data to them. + - Limited by the GIL, which allows one thread to read the code at once. + Processes: + - Take time to set up. + - Slow to transfer data to. + - Each have their own GIL and so don't need to take turns reading the code. + """ + + # Important: API-functions of `DaskHandler` should redirect ALL functions defined + # in `DaskHandlerBasic` through `_handler`. This ensures that in case `_handler` + # is a more specific implementation of `DaskHandlerBasic`, that the according + # overwritten functions will be used instead. + _handler = _select_dask_handler() + + @classmethod + def setup(cls) -> None: + """Calls `get_dask_client`.""" + return cls._handler.setup() + + @classmethod + def get_dask_client(cls) -> Client: + """Get (create if not exists) a dask-client. + + Returns: + Dask-client. + """ + return cls._handler.get_dask_client() + + @classmethod + def should_dask_be_used(cls, override: Optional[bool] = None) -> bool: + """Util function to decide whether dask should be used or not. + + Args: + override: Override? Has highest priority. + + Returns: + Decision whether dask should be used or not. + """ + return cls._handler.should_dask_be_used(override) + + @classmethod + def parallelize_with_dask( + cls, + iterate_function: Callable[..., Any], + iterable: Iterable[Any], + *args: Any, + **kwargs: Any, + ) -> Union[Any, Tuple[Any, ...], List[Any]]: + """ + Run a function over an iterable in parallel using dask, and gather the results. + + args & kwargs will get passed to `Delayed`. + + Args: + iterate_function: The function to be applied to each element of `iterable`. + The function takes the current element of the iterable as its first + argument, followed by any positional arguments, and then any keyword + arguments. + + iterable + The iterable over which the function will be applied. Each element of + `iterable` will be passed to `iterate_function`. + + Returns: A tuple containing the results of the `iterate_function` for each + element in the iterable. The results are gathered using dask's `compute` + function. + """ + return cls._handler.parallelize_with_dask( + iterate_function, + iterable, + *args, + **kwargs, + ) + + @classmethod + def _dask_cleanup(cls) -> None: + """Shutdown & close `cls.dask_client`.""" + return cls._handler._dask_cleanup() + + @classmethod + def _calc_num_of_workers(cls) -> int: + """Estimates the number of workers considering settings and availability. + + Returns: + Etimated number of workers. + """ + return cls._handler._calc_num_of_workers() + + @classmethod + def _get_local_dask_client(cls) -> Client: + """Creates a local dask-client. + + Returns: + Created dask-client. + """ + return cls._handler._get_local_dask_client() From e96a929c32cba15053d00de608446a07eec5dccd Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Mon, 5 Feb 2024 14:40:41 +0100 Subject: [PATCH 206/207] bugfix removed fetch-dask-handler function from karabo :gift: --- karabo/imaging/imager.py | 5 ++--- karabo/simulation/interferometer.py | 13 +++++-------- karabo/simulation/line_emission.py | 5 ++--- karabo/sourcedetection/result.py | 7 +++---- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/karabo/imaging/imager.py b/karabo/imaging/imager.py index 055179ab..f4365386 100644 --- a/karabo/imaging/imager.py +++ b/karabo/imaging/imager.py @@ -27,7 +27,7 @@ from karabo.simulation.sky_model import SkyModel from karabo.simulation.visibility import Visibility from karabo.util._types import FilePathType -from karabo.util.dask import fetch_dask_handler +from karabo.util.dask import DaskHandler from karabo.util.file_handler import FileHandler, check_ending ImageContextType = Literal["awprojection", "2d", "ng", "wg"] @@ -353,8 +353,7 @@ def imaging_rascil( raise RuntimeError("Client passed but use_dask is False") if use_dask: if not client: - dask_handler = fetch_dask_handler() - client = dask_handler.get_dask_client() + client = DaskHandler.get_dask_client() print(client.cluster) rsexecute.set_client(use_dask=use_dask, client=client, use_dlg=False) # Set CUDA parameters diff --git a/karabo/simulation/interferometer.py b/karabo/simulation/interferometer.py index 787393ef..b8cfc25b 100644 --- a/karabo/simulation/interferometer.py +++ b/karabo/simulation/interferometer.py @@ -31,7 +31,7 @@ OskarSettingsTreeType, PrecisionType, ) -from karabo.util.dask import fetch_dask_handler +from karabo.util.dask import DaskHandler from karabo.util.file_handler import FileHandler from karabo.util.gpu_util import is_cuda_available @@ -251,15 +251,13 @@ def __init__( "Providing `client` and `use_dask`=False is not allowed." ) elif not client and use_dask is True: - dask_handler = fetch_dask_handler() - client = dask_handler.get_dask_client() + client = DaskHandler.get_dask_client() else: pass elif use_dask is None and client is None: - dask_handler = fetch_dask_handler() - use_dask = dask_handler.should_dask_be_used() + use_dask = DaskHandler.should_dask_be_used() if use_dask: - client = dask_handler.get_dask_client() + client = DaskHandler.get_dask_client() self.use_dask = use_dask self.client = client @@ -381,8 +379,7 @@ def __run_simulation_parallized_observation( # Check if there is a dask client if self.client is None: - dask_handler = fetch_dask_handler() - self.client = dask_handler.get_dask_client() + self.client = DaskHandler.get_dask_client() if array_sky is None: raise KaraboInterferometerSimulationError( diff --git a/karabo/simulation/line_emission.py b/karabo/simulation/line_emission.py index 5dc93fba..ad5011b3 100644 --- a/karabo/simulation/line_emission.py +++ b/karabo/simulation/line_emission.py @@ -24,7 +24,7 @@ from karabo.util._types import DirPathType, FilePathType, IntFloat, NPFloatLikeStrict # from dask.delayed import Delayed -from karabo.util.dask import fetch_dask_handler +from karabo.util.dask import DaskHandler from karabo.util.plotting_util import get_slices @@ -694,8 +694,7 @@ def process_channel( # type: ignore[no-untyped-def] verbose=verbose, ) - dask_handler = fetch_dask_handler() - result = dask_handler.parallelize_with_dask( + result = DaskHandler.parallelize_with_dask( process_channel, range(num_bins), outpath=outpath, diff --git a/karabo/sourcedetection/result.py b/karabo/sourcedetection/result.py index a23409f8..743a97f3 100644 --- a/karabo/sourcedetection/result.py +++ b/karabo/sourcedetection/result.py @@ -16,7 +16,7 @@ from karabo.imaging.image import Image, ImageMosaicker from karabo.imaging.imager import Imager -from karabo.util.dask import fetch_dask_handler +from karabo.util.dask import DaskHandler from karabo.util.data_util import read_CSV_to_ndarray from karabo.util.file_handler import FileHandler from karabo.warning import KaraboWarning @@ -477,8 +477,7 @@ def detect_sources_in_images( for image in images ] # Check if there is a dask client - dask_handler = fetch_dask_handler() - if dask_handler.dask_client is not None: + if DaskHandler.dask_client is not None: func = delayed(PyBDSFSourceDetectionResult.detect_sources_in_image) else: func = PyBDSFSourceDetectionResult.detect_sources_in_image @@ -491,7 +490,7 @@ def detect_sources_in_images( **kwargs, ) results.append(result) - if dask_handler.dask_client is not None: + if DaskHandler.dask_client is not None: results = compute(*results, scheduler="distributed") # Keep only results that are not None results = [result for result in results if result is not None] From a4a599a836d38cc84dfdc5a2f7ec96907ee229d5 Mon Sep 17 00:00:00 2001 From: "lukas.gehrig" Date: Thu, 8 Feb 2024 13:51:10 +0100 Subject: [PATCH 207/207] addressed PR540 requests :bear: --- .gitignore | 1 - doc/src/examples/example_structure.md | 8 +++--- karabo/imaging/image.py | 4 +-- karabo/imaging/imager.py | 14 +++++----- karabo/simulation/interferometer.py | 12 ++++----- karabo/simulation/telescope.py | 22 +--------------- karabo/util/dask.py | 3 ++- karabo/util/file_handler.py | 37 +++++++++++++++------------ 8 files changed, 41 insertions(+), 60 deletions(-) diff --git a/.gitignore b/.gitignore index 2cb7b240..4627b2ab 100644 --- a/.gitignore +++ b/.gitignore @@ -78,7 +78,6 @@ visibilities_task_stream.html # Dask dask-worker-space **/dask_info.json -**/karabo-dask-dashboard.txt # Karabo folder **/karabo_folder/** diff --git a/doc/src/examples/example_structure.md b/doc/src/examples/example_structure.md index c6e15795..2c91ab94 100644 --- a/doc/src/examples/example_structure.md +++ b/doc/src/examples/example_structure.md @@ -75,11 +75,9 @@ DaskHandler.parallelize_with_dask(my_function, my_iterable, *args, **kwargs) # T ## Use Karabo on a SLURM cluster -Karabo manages all available nodes through Dask, making the computational power conveniently accessible for the user. The `DaskHandler` class streamlines the creation of a Dask client and offers a user-friendly interface for interaction. This class contains static variables to midify the behavior of a Dask client, if they've changed before creating a client. +Karabo manages all available nodes through Dask, making the computational power conveniently accessible for the user. The `DaskHandler` class streamlines the creation of a Dask client and offers a user-friendly interface for interaction. This class contains static variables to modify the behavior of a Dask client, if they've changed before creating a client. -While users are not required to interact with Dask directly - thanks to the background processes managed by Karabo - the Dask client should to be initialized at the beginning of your script with `DaskHandler.setup` (see example below). This has to do with the spawning of new processes when creating `Nanny` processes. - -If you just need the client itself, then no `setup()` is needed. +While users are not required to interact with Dask directly - thanks to the background processes managed by Karabo - the Dask client should be initialized at the beginning of your script with `DaskHandler.setup` (see example below). This has to do with the spawning of new processes when creating `Nanny` processes. ```python from karabo.util.dask import DaskHandler @@ -110,4 +108,4 @@ DaskHandler.use_dask = False Please also check out the `DaskHandler` under `karabo.util.dask` for more information. ### Dask Dashboard -The Dask dashboard link should be printed in stdout. Just copy the link into your browser, and then you're able to observe the current dask-process. If you run Karabo on a VM without access to a browser and internet, you can use `port forwarding` to access the Dask Dashboard from your local machine. In `VSCODE`, this can be done directly when using the "PORTS" tab. +The Dask dashboard link should be printed in stdout. Just copy the link into your browser, and then you're able to observe the current dask-process. If you run Karabo on a VM without access to a browser and internet, you can use ssh `port forwarding` to access the Dask Dashboard from your local machine (e.g. `ssh -N -L :(:) `). Don't forget to use the `` in the browser-link if you used port-forwarding. In `VSCODE`, this can be done directly when using the "PORTS" tab; just paste the IP address and port number from stdout into the "Port" column and click on "Open in Browser" in the "Local Adress" column. diff --git a/karabo/imaging/image.py b/karabo/imaging/image.py index e7de02b8..2571e105 100644 --- a/karabo/imaging/image.py +++ b/karabo/imaging/image.py @@ -28,7 +28,7 @@ from scipy.interpolate import RegularGridInterpolator from karabo.util._types import FilePathType -from karabo.util.file_handler import FileHandler, check_ending +from karabo.util.file_handler import FileHandler, assert_valid_ending from karabo.util.plotting_util import get_slices # store and restore the previously set matplotlib backend, @@ -117,7 +117,7 @@ def write_to_file( overwrite: bool = False, ) -> None: """Write an `Image` to `path` as .fits""" - check_ending(path=path, ending=".fits") + assert_valid_ending(path=path, ending=".fits") dir_name = os.path.abspath(os.path.dirname(path)) os.makedirs(dir_name, exist_ok=True) fits.writeto( diff --git a/karabo/imaging/imager.py b/karabo/imaging/imager.py index f4365386..995e6fc3 100644 --- a/karabo/imaging/imager.py +++ b/karabo/imaging/imager.py @@ -28,7 +28,7 @@ from karabo.simulation.visibility import Visibility from karabo.util._types import FilePathType from karabo.util.dask import DaskHandler -from karabo.util.file_handler import FileHandler, check_ending +from karabo.util.file_handler import FileHandler, assert_valid_ending ImageContextType = Literal["awprojection", "2d", "ng", "wg"] CleanAlgorithmType = Literal["hogbom", "msclean", "mmclean"] @@ -228,7 +228,7 @@ def get_dirty_image( ) fits_path = os.path.join(tmp_dir, "dirty.fits") else: - check_ending(path=fits_path, ending=".fits") + assert_valid_ending(path=fits_path, ending=".fits") block_visibilities = create_visibility_from_ms( str(self.visibility.ms_file_path) @@ -328,11 +328,11 @@ def imaging_rascil( deconvolved, restored, residual """ if deconvolved_fits_path is not None: - check_ending(path=deconvolved_fits_path, ending=".fits") + assert_valid_ending(path=deconvolved_fits_path, ending=".fits") if restored_fits_path is not None: - check_ending(path=restored_fits_path, ending=".fits") + assert_valid_ending(path=restored_fits_path, ending=".fits") if residual_fits_path is not None: - check_ending(path=residual_fits_path, ending=".fits") + assert_valid_ending(path=residual_fits_path, ending=".fits") if ( deconvolved_fits_path is None or restored_fits_path is None @@ -366,9 +366,7 @@ def imaging_rascil( img_context = "wg" if self.ingest_vis_nchan is None: - raise ValueError( - "`self.ingest_vis_nchan` is None but must set, but is None" - ) + raise ValueError("`self.ingest_vis_nchan` is None but must set.") blockviss = create_visibility_from_ms_rsexecute( msname=str(self.visibility.ms_file_path), diff --git a/karabo/simulation/interferometer.py b/karabo/simulation/interferometer.py index b8cfc25b..37e64de6 100644 --- a/karabo/simulation/interferometer.py +++ b/karabo/simulation/interferometer.py @@ -243,18 +243,18 @@ def __init__( ) self.use_gpus = use_gpus - if use_dask is True or client: - if client and use_dask is None: + if (use_dask is True) or (client is not None): + if (client is not None) and (use_dask is None): use_dask = True elif client and use_dask is False: raise RuntimeError( "Providing `client` and `use_dask`=False is not allowed." ) - elif not client and use_dask is True: + elif (client is None) and (use_dask is True): client = DaskHandler.get_dask_client() else: pass - elif use_dask is None and client is None: + elif (use_dask is None) and (client is None): use_dask = DaskHandler.should_dask_be_used() if use_dask: client = DaskHandler.get_dask_client() @@ -406,8 +406,8 @@ def __run_simulation_parallized_observation( # Scatter sky array_sky = self.client.scatter(array_sky) tmp_dir = FileHandler().get_tmp_dir( - prefix="simulation-praallezed-observation-", - purpose="disk-cache simulation-praallezed-observation", + prefix="simulation-parallelized-observation-", + purpose="disk-cache simulation-parallelized-observation", ) ms_dir = os.path.join(tmp_dir, "measurements") os.makedirs(ms_dir, exist_ok=False) diff --git a/karabo/simulation/telescope.py b/karabo/simulation/telescope.py index 42e190c9..cdda8545 100644 --- a/karabo/simulation/telescope.py +++ b/karabo/simulation/telescope.py @@ -671,26 +671,6 @@ def _get_station_infos(cls, tel_path: DirPathType) -> pd.DataFrame: df_tel["y"] = stations[:, 1] return df_tel - @classmethod - def _get_number_str(cls, num: int, n_digits: int) -> str: - """Transforms a number `num` to str with `n_digits` digits. - - Args: - num: Number to transform. - n_digits: Number of digits to represent `num` as str. - - Returns: - Transformed `num`. - """ - num_str = str(num) - n_digit_values = len(num_str) - if n_digit_values < n_digits: - n_zeros = n_digits - n_digit_values - num_str = "0" * n_zeros + num_str - elif n_digit_values > n_digits: - raise ValueError(f"{num=} has more digits than {n_digits=}") - return num_str - @classmethod def create_baseline_cut_telelescope( cls, @@ -760,7 +740,7 @@ def create_baseline_cut_telelescope( conversions: Dict[str, str] = dict() for i in range(df_tel.shape[0]): source_path = df_tel.iloc[i]["station-path"] - number_str = cls._get_number_str(num=i, n_digits=3) + number_str = str(i).zfill(3) target_station = f"station{number_str}" target_path = os.path.join(tm_path, target_station) source_station = os.path.split(source_path)[-1] diff --git a/karabo/util/dask.py b/karabo/util/dask.py index 2a3b92bf..d9200583 100644 --- a/karabo/util/dask.py +++ b/karabo/util/dask.py @@ -59,6 +59,7 @@ class DaskHandlerBasic: n_threads_per_worker: Optional[int] = None use_dask: Optional[bool] = None use_proccesses: bool = False # Some packages, such as pybdsf, do not work + # with processes because they spawn subprocesses. _setup_called: bool = False @@ -83,7 +84,7 @@ def get_dask_client(cls) -> Client: initialize(comm=MPI.COMM_WORLD) else: initialize(nthreads=n_threads_per_worker, comm=MPI.COMM_WORLD) - cls.dask_client = Client(processes=cls.use_proccesses) # TODO: testing + cls.dask_client = Client(processes=cls.use_proccesses) if MPI.COMM_WORLD.rank == 0: print(f"Dashboard link: {cls.dask_client.dashboard_link}", flush=True) atexit.register(cls._dask_cleanup) diff --git a/karabo/util/file_handler.py b/karabo/util/file_handler.py index 19aaed1d..209e10df 100644 --- a/karabo/util/file_handler.py +++ b/karabo/util/file_handler.py @@ -68,7 +68,7 @@ def _get_disk_cache_root() -> str: def _get_rnd_str(k: int, seed: _SeedType = None) -> str: """Creates a random ascii+digits string with length=`k`. - Most tmp-file tools are using a sting-length of 10. + Most tmp-file tools are using a string-length of 10. Args: k: Length of random string. @@ -168,12 +168,12 @@ def __init__( # tmps is an instance bound dirs and/or files registry for STM self.tmps: list[str] = list() - @staticmethod - def _get_term_dir(term: _LongShortTermType) -> str: + @classmethod + def _get_term_dir(cls, term: _LongShortTermType) -> str: if term == "short": - dir_ = FileHandler.stm() + dir_ = cls.stm() elif term == "long": - dir_ = FileHandler.ltm() + dir_ = cls.ltm() else: assert_never(term) return dir_ @@ -295,23 +295,26 @@ def clean_instance(self) -> None: shutil.rmtree(tmp) self.tmps.remove(tmp) - @staticmethod + @classmethod def clean( + cls, term: _LongShortTermType = "short", ) -> None: """Removes the entire directory specified by `term`. - Be careful with cleaning, to not mess up dirs of other processes. + We stronlgy suggest to NOT use this function in a workflow. This function + removed the entire karabo-disk-cache. So if there's another karabo-process + running in parallel, you could mess with their disk-cache as well. Args: term: "long" or "short" term memory """ - dir_ = FileHandler._get_term_dir(term=term) + dir_ = cls._get_term_dir(term=term) if os.path.exists(dir_): shutil.rmtree(dir_) - @staticmethod - def is_dir_empty(dirname: DirPathType) -> bool: + @classmethod + def is_dir_empty(cls, dirname: DirPathType) -> bool: """Checks if `dirname` is empty assuming `dirname` exists. Args: @@ -328,8 +331,8 @@ def is_dir_empty(dirname: DirPathType) -> bool: is_empty = len(os.listdir(path=dirname)) == 0 return is_empty - @staticmethod - def remove_empty_dirs(term: _LongShortTermType = "short") -> None: + @classmethod + def remove_empty_dirs(cls, term: _LongShortTermType = "short") -> None: """Removes emtpy directories in the chosen cache-dir. Args: @@ -341,8 +344,8 @@ def remove_empty_dirs(term: _LongShortTermType = "short") -> None: if os.path.isdir(path) and len(os.listdir(path=path)) == 0: shutil.rmtree(path=path) - @staticmethod - def empty_dir(dir_path: DirPathType) -> None: + @classmethod + def empty_dir(cls, dir_path: DirPathType) -> None: """Deletes all contents of `dir_path`, but not the directory itself. This function assumes that all filed and directories are owned by @@ -366,7 +369,9 @@ def __exit__( self.clean_instance() -def check_ending(path: Union[str, FilePathType, DirPathType], ending: str) -> None: +def assert_valid_ending( + path: Union[str, FilePathType, DirPathType], ending: str +) -> None: """Utility function to check if the ending of `path` is `ending`. Args: @@ -379,6 +384,6 @@ def check_ending(path: Union[str, FilePathType, DirPathType], ending: str) -> No path_ = str(path) if not path_.endswith(ending): fname = path_.split(os.path.sep)[-1] - raise ValueError( + raise AssertionError( f"Invalid file-ending, file {fname} must have {ending} extension!" )