From 31d6c5dbd57b85c169564c4b582e578fd6915dba Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Fri, 15 Nov 2024 09:31:27 -0500 Subject: [PATCH 01/37] DOC v25.02 Updates [skip ci] --- .../cuda11.8-conda/devcontainer.json | 6 ++-- .devcontainer/cuda11.8-pip/devcontainer.json | 8 ++--- .../cuda12.5-conda/devcontainer.json | 6 ++-- .devcontainer/cuda12.5-pip/devcontainer.json | 8 ++--- .github/workflows/build.yaml | 16 +++++----- .github/workflows/pr.yaml | 28 ++++++++--------- .github/workflows/test.yaml | 10 +++---- README.md | 2 +- VERSION | 2 +- .../all_cuda-118_arch-aarch64.yaml | 14 ++++----- .../all_cuda-118_arch-x86_64.yaml | 14 ++++----- .../all_cuda-125_arch-aarch64.yaml | 14 ++++----- .../all_cuda-125_arch-x86_64.yaml | 14 ++++----- .../bench_ann_cuda-118_arch-aarch64.yaml | 4 +-- .../bench_ann_cuda-118_arch-x86_64.yaml | 4 +-- .../bench_ann_cuda-120_arch-aarch64.yaml | 4 +-- .../bench_ann_cuda-120_arch-x86_64.yaml | 4 +-- .../recipes/raft-dask/conda_build_config.yaml | 4 +-- .../cmake/thirdparty/fetch_rapids.cmake | 2 +- dependencies.yaml | 30 +++++++++---------- docs/source/build.md | 2 +- docs/source/developer_guide.md | 6 ++-- docs/source/raft_ann_benchmarks.md | 12 ++++---- python/pylibraft/pyproject.toml | 4 +-- .../raft-dask/cmake/thirdparty/get_ucxx.cmake | 4 +-- python/raft-dask/pyproject.toml | 10 +++---- 26 files changed, 116 insertions(+), 116 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 008bf8730a..8c857961c2 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 75aed80f9f..c691ed6007 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "11.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index 240ba02131..dc4fcd02fd 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index c23c79017a..bc43900ef3 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "12.5", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index db379c9d47..7879f22879 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: package-name: pylibraft wheel-build-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -98,7 +98,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index fe8e730921..e349b25ce6 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -26,13 +26,13 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: files_yaml: | test_cpp: @@ -65,27 +65,27 @@ jobs: - '!thirdparty/LICENSES/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: pull-request enable_check_symbols: true @@ -93,20 +93,20 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -116,14 +116,14 @@ jobs: wheel-build-pylibraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: [wheel-build-pylibraft, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -131,21 +131,21 @@ jobs: wheel-build-raft-dask: needs: wheel-tests-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: [wheel-build-raft-dask, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2bee8a3d1d..1ae093bc56 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: raft_cutlass conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/README.md b/README.md index 8870e9385e..8d16fc5842 100755 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.5 ``` -If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.12/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-25.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ### Installing Python through Pip diff --git a/VERSION b/VERSION index af28c42b52..72eefaf7c7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.12.00 +25.02.00 diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 6098cd12bf..269af03e9f 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -20,8 +20,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 -- dask-cuda==24.12.*,>=0.0.0a0 -- distributed-ucxx==0.41.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - graphviz @@ -35,7 +35,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja - numba>=0.57 @@ -44,18 +44,18 @@ dependencies: - nvcc_linux-aarch64=11.8 - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 -- ucx-py==0.41.*,>=0.0.0a0 +- ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 0fe8fbab39..4c7150264b 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -20,8 +20,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 -- dask-cuda==24.12.*,>=0.0.0a0 -- distributed-ucxx==0.41.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-64=11.* - graphviz @@ -35,7 +35,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja - numba>=0.57 @@ -44,18 +44,18 @@ dependencies: - nvcc_linux-64=11.8 - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-64==2.17 -- ucx-py==0.41.*,>=0.0.0a0 +- ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index dfb9ac0b97..648a5a00f0 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -21,8 +21,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 -- dask-cuda==24.12.*,>=0.0.0a0 -- distributed-ucxx==0.41.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - graphviz @@ -32,7 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja - numba>=0.57 @@ -40,18 +40,18 @@ dependencies: - numpydoc - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 -- ucx-py==0.41.*,>=0.0.0a0 +- ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-125_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index bf6f5d6462..7d7b9c4454 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -21,8 +21,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 -- dask-cuda==24.12.*,>=0.0.0a0 -- distributed-ucxx==0.41.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-64=11.* - graphviz @@ -32,7 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja - numba>=0.57 @@ -40,18 +40,18 @@ dependencies: - numpydoc - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-64==2.17 -- ucx-py==0.41.*,>=0.0.0a0 +- ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-125_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 39bdf2671d..777d2ddb7f 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -30,7 +30,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja @@ -40,7 +40,7 @@ dependencies: - pandas - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 56004fa818..7fa432c8d6 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -30,7 +30,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja @@ -40,7 +40,7 @@ dependencies: - pandas - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml index 5f0599d9ae..0f59fc6090 100644 --- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml @@ -27,7 +27,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja @@ -36,7 +36,7 @@ dependencies: - pandas - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-120_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml index 849e6c1412..273d6a9f9b 100644 --- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml @@ -27,7 +27,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja @@ -36,7 +36,7 @@ dependencies: - pandas - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-120_arch-x86_64 diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index d7d2f68b42..68140e6bc0 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -17,10 +17,10 @@ c_stdlib_version: - "2.17" ucx_py_version: - - "0.41.*" + - "0.42.*" ucxx_version: - - "0.41.*" + - "0.42.*" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/template/cmake/thirdparty/fetch_rapids.cmake index 6f4c627ed4..23c8490b40 100644 --- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake +++ b/cpp/template/cmake/thirdparty/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # Use this variable to update RAPIDS and RAFT versions -set(RAPIDS_VERSION "24.12") +set(RAPIDS_VERSION "25.02") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake diff --git a/dependencies.yaml b/dependencies.yaml index 7766481c99..fb58e93f71 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -171,7 +171,7 @@ dependencies: - c-compiler - cxx-compiler - nccl>=2.19 - - libucxx==0.41.*,>=0.0.0a0 + - libucxx==0.42.*,>=0.0.0a0 specific: - output_types: conda matrices: @@ -210,7 +210,7 @@ dependencies: common: - output_types: [conda] packages: - - &rmm_unsuffixed rmm==24.12.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -237,12 +237,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - &rmm_cu12 rmm-cu12==24.12.*,>=0.0.0a0 + - &rmm_cu12 rmm-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - &rmm_cu11 rmm-cu11==24.12.*,>=0.0.0a0 + - &rmm_cu11 rmm-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*rmm_unsuffixed] } checks: common: @@ -514,14 +514,14 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - dask-cuda==24.12.*,>=0.0.0a0 + - dask-cuda==25.2.*,>=0.0.0a0 - joblib>=0.11 - numba>=0.57 - - rapids-dask-dependency==24.12.*,>=0.0.0a0 + - rapids-dask-dependency==25.2.*,>=0.0.0a0 - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==24.12.*,>=0.0.0a0 - - &ucx_py_unsuffixed ucx-py==0.41.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==25.2.*,>=0.0.0a0 + - &ucx_py_unsuffixed ucx-py==0.42.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -535,14 +535,14 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - &pylibraft_cu12 pylibraft-cu12==24.12.*,>=0.0.0a0 - - &ucx_py_cu12 ucx-py-cu12==0.41.*,>=0.0.0a0 + - &pylibraft_cu12 pylibraft-cu12==25.2.*,>=0.0.0a0 + - &ucx_py_cu12 ucx-py-cu12==0.42.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - &pylibraft_cu11 pylibraft-cu11==24.12.*,>=0.0.0a0 - - &ucx_py_cu11 ucx-py-cu11==0.41.*,>=0.0.0a0 + - &pylibraft_cu11 pylibraft-cu11==25.2.*,>=0.0.0a0 + - &ucx_py_cu11 ucx-py-cu11==0.42.*,>=0.0.0a0 - {matrix: null, packages: [*pylibraft_unsuffixed, *ucx_py_unsuffixed]} test_python_common: common: @@ -562,7 +562,7 @@ dependencies: packages: # UCXX is not currently a hard-dependency thus only installed during tests, # this will change in the future. - - &distributed_ucxx_unsuffixed distributed-ucxx==0.41.*,>=0.0.0a0 + - &distributed_ucxx_unsuffixed distributed-ucxx==0.42.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -575,12 +575,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - distributed-ucxx-cu12==0.41.*,>=0.0.0a0 + - distributed-ucxx-cu12==0.42.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - distributed-ucxx-cu11==0.41.*,>=0.0.0a0 + - distributed-ucxx-cu11==0.42.*,>=0.0.0a0 - {matrix: null, packages: [*distributed_ucxx_unsuffixed]} depends_on_ucx_build: common: diff --git a/docs/source/build.md b/docs/source/build.md index b9a1832b02..0c4ab17ed0 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -56,7 +56,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0 ``` -If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.12/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-25.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ## Installing Python through Pip diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index c4a099fabb..5cc694dc8f 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour 1. Do not split empty functions/records/namespaces. 2. Two-space indentation everywhere, including the line continuations. 3. Disable reflowing of comments. - The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/.clang-format). + The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/.clang-format). [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter. In order to run doxygen as a linter on C++/CUDA code, run @@ -205,7 +205,7 @@ you can run `codespell -i 3 -w .` from the repository root directory. This will bring up an interactive prompt to select which spelling fixes to apply. ### #include style -[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/scripts/include_checker.py) is used to enforce the include style as follows: +[include_checker.py](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/scripts/include_checker.py) is used to enforce the include style as follows: 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. 2. `#include <...>` should be used for referencing everything else @@ -230,7 +230,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` ## Logging ### Introduction -Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. +Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. ### Usage ```cpp diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index 12a94e45ce..b7f7cc81d4 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -66,7 +66,7 @@ Nightly images are located in [dockerhub](https://hub.docker.com/r/rapidsai/raft - The following command pulls the nightly container for python version 10, cuda version 12, and RAFT version 23.10: ```bash -docker pull rapidsai/raft-ann-bench:24.12a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. +docker pull rapidsai/raft-ann-bench:25.02a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. ``` The CUDA and python versions can be changed for the supported values: @@ -87,7 +87,7 @@ You can see the exact versions as well in the dockerhub site: [//]: # () [//]: # (```bash) -[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.12-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) +[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:25.02-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) [//]: # (```) @@ -348,7 +348,7 @@ For GPU-enabled systems, the `DATA_FOLDER` variable should be a local folder whe export DATA_FOLDER=path/to/store/datasets/and/results docker run --gpus all --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10 \ + rapidsai/raft-ann-bench:25.02a-cuda11.8-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms raft_cagra,raft_ivf_pq --batch-size 10 -k 10" \ @@ -359,7 +359,7 @@ Usage of the above command is as follows: | Argument | Description | |-----------------------------------------------------------|----------------------------------------------------------------------------------------------------| -| `rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | +| `rapidsai/raft-ann-bench:25.02a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | | `"--dataset deep-image-96-angular"` | Dataset name | | `"--normalize"` | Whether to normalize the dataset | | `"--algorithms raft_cagra,hnswlib --batch-size 10 -k 10"` | Arguments passed to the `run` script, such as the algorithms to benchmark, the batch size, and `k` | @@ -376,7 +376,7 @@ The container arguments in the above section also be used for the CPU-only conta export DATA_FOLDER=path/to/store/datasets/and/results docker run --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench-cpu:24.12a-py3.10 \ + rapidsai/raft-ann-bench-cpu:25.02a-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms hnswlib --batch-size 10 -k 10" \ @@ -393,7 +393,7 @@ docker run --gpus all --rm -it -u $(id -u) \ --entrypoint /bin/bash \ --workdir /data/benchmarks \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10 + rapidsai/raft-ann-bench:25.02a-cuda11.8-py3.10 ``` This will drop you into a command line in the container, with the `raft-ann-bench` python package ready to use, as described in the [Running the benchmarks](#running-the-benchmarks) section above: diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index bb01602b33..3502d82fd4 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "nvidia-curand", "nvidia-cusolver", "nvidia-cusparse", - "rmm==24.12.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -125,7 +125,7 @@ requires = [ "cuda-python", "cython>=3.0.0,<3.1.0a0", "ninja", - "rmm==24.12.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" diff --git a/python/raft-dask/cmake/thirdparty/get_ucxx.cmake b/python/raft-dask/cmake/thirdparty/get_ucxx.cmake index db9b5c6b4d..f5daf70f92 100644 --- a/python/raft-dask/cmake/thirdparty/get_ucxx.cmake +++ b/python/raft-dask/cmake/thirdparty/get_ucxx.cmake @@ -47,9 +47,9 @@ endfunction() # Change pinned tag here to test a commit in CI # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft -find_and_configure_ucxx(VERSION 0.41 +find_and_configure_ucxx(VERSION 0.42 FORK rapidsai - PINNED_TAG branch-0.41 + PINNED_TAG branch-0.42 EXCLUDE_FROM_ALL YES UCXX_STATIC ${RAFT_DASK_UCXX_STATIC} ) diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index a9f4de5dc3..33643c481e 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -31,13 +31,13 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "dask-cuda==24.12.*,>=0.0.0a0", - "distributed-ucxx==0.41.*,>=0.0.0a0", + "dask-cuda==25.2.*,>=0.0.0a0", + "distributed-ucxx==0.42.*,>=0.0.0a0", "joblib>=0.11", "numba>=0.57", - "pylibraft==24.12.*,>=0.0.0a0", - "rapids-dask-dependency==24.12.*,>=0.0.0a0", - "ucx-py==0.41.*,>=0.0.0a0", + "pylibraft==25.2.*,>=0.0.0a0", + "rapids-dask-dependency==25.2.*,>=0.0.0a0", + "ucx-py==0.42.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", From 4cdc1d80aa01c147a94eed9fbc68a38fba29eaf4 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:27:54 -0600 Subject: [PATCH 02/37] Add breaking change workflow trigger (#2482) Adds a workflow that triggers a second workflow which sends a notification to a designated Slack channel on every PR labelled with breaking, whenever any of the following events are triggered on the PR: - closed - reopened - labeled - unlabeled Depends on https://github.com/rapidsai/shared-workflows/pull/257 --- .../trigger-breaking-change-alert.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/trigger-breaking-change-alert.yaml diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml new file mode 100644 index 0000000000..3b972f31ca --- /dev/null +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -0,0 +1,26 @@ +name: Trigger Breaking Change Notifications + +on: + pull_request_target: + types: + - closed + - reopened + - labeled + - unlabeled + +jobs: + trigger-notifier: + if: contains(github.event.pull_request.labels.*.name, 'breaking') + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-24.12 + with: + sender_login: ${{ github.event.sender.login }} + sender_avatar: ${{ github.event.sender.avatar_url }} + repo: ${{ github.repository }} + pr_number: ${{ github.event.pull_request.number }} + pr_title: "${{ github.event.pull_request.title }}" + pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}" + pr_base_ref: ${{ github.event.pull_request.base.ref }} + pr_author: ${{ github.event.pull_request.user.login }} + event_action: ${{ github.event.action }} + pr_merged: ${{ github.event.pull_request.merged }} From adfd2f6f765b5a979742399e10581b75ba5a2834 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 26 Nov 2024 16:38:40 -0600 Subject: [PATCH 03/37] Require approval to run CI on draft PRs (#2512) By default, CI runs on draft PRs. This leads to many CI runs that may be unnecessary. With this PR's change to `.github/copy-pr-bot.yaml`, an `/ok to test` comment from a trusted user is required to trigger CI on draft PRs. Non-draft PRs will run CI by default, assuming that all commits are signed by trusted users. Otherwise an `/ok to test` is required (as before) -- see the `copy-pr-bot` docs at https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ for more information. Part of https://github.com/rapidsai/build-planning/issues/123. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/raft/pull/2512 --- .github/copy-pr-bot.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml index 895ba83ee5..e0ea775aad 100644 --- a/.github/copy-pr-bot.yaml +++ b/.github/copy-pr-bot.yaml @@ -2,3 +2,4 @@ # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ enabled: true +auto_sync_draft: false From c943181c4d48e5050f2b8c40f17e40155bfd9d61 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 26 Nov 2024 19:29:03 -0600 Subject: [PATCH 04/37] Shrink wheel size limit following removal of vector search APIs. (#2509) Following #2498, we can apply this feedback from #2490: https://github.com/rapidsai/raft/pull/2490#discussion_r1841357165 These changes are inspired by https://github.com/rapidsai/cuvs/pull/469. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) URL: https://github.com/rapidsai/raft/pull/2509 --- ci/build_wheel_pylibraft.sh | 2 +- ci/build_wheel_raft_dask.sh | 2 +- ci/validate_wheel.sh | 24 ++++++++++++++++++++++++ python/pylibraft/pyproject.toml | 4 +--- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/ci/build_wheel_pylibraft.sh b/ci/build_wheel_pylibraft.sh index dacaa1190e..dd62ab5399 100755 --- a/ci/build_wheel_pylibraft.sh +++ b/ci/build_wheel_pylibraft.sh @@ -18,4 +18,4 @@ esac export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_RAFT_CPP=OFF${EXTRA_CMAKE_ARGS}" ci/build_wheel.sh pylibraft ${package_dir} -ci/validate_wheel.sh ${package_dir} final_dist +ci/validate_wheel.sh ${package_dir} final_dist pylibraft diff --git a/ci/build_wheel_raft_dask.sh b/ci/build_wheel_raft_dask.sh index e4f3f0a833..d49d131abf 100755 --- a/ci/build_wheel_raft_dask.sh +++ b/ci/build_wheel_raft_dask.sh @@ -9,4 +9,4 @@ package_dir="python/raft-dask" export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_RAFT_CPP=OFF" ci/build_wheel.sh raft-dask ${package_dir} -ci/validate_wheel.sh ${package_dir} final_dist +ci/validate_wheel.sh ${package_dir} final_dist raft-dask diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 5910a5c59f..5ef72ad895 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -5,6 +5,29 @@ set -euo pipefail package_dir=$1 wheel_dir_relative_path=$2 +package_name=$3 + +RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" + +# some packages are much larger on CUDA 11 than on CUDA 12 +if [[ "${package_name}" == "raft-dask" ]]; then + PYDISTCHECK_ARGS=( + --max-allowed-size-compressed '200M' + ) +elif [[ "${package_name}" == "pylibraft" ]]; then + if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then + PYDISTCHECK_ARGS=( + --max-allowed-size-compressed '600M' + ) + else + PYDISTCHECK_ARGS=( + --max-allowed-size-compressed '100M' + ) + fi +else + echo "Unsupported package name: ${package_name}" + exit 1 +fi cd "${package_dir}" @@ -12,6 +35,7 @@ rapids-logger "validate packages with 'pydistcheck'" pydistcheck \ --inspect \ + "${PYDISTCHECK_ARGS[@]}" \ "$(echo ${wheel_dir_relative_path}/*.whl)" rapids-logger "validate packages with 'twine'" diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index 3502d82fd4..ba454af591 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -132,12 +132,10 @@ matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" [tool.pydistcheck] select = [ + # NOTE: size threshold is managed via CLI args in CI scripts "distro-too-large-compressed", ] -# detect when package size grows significantly -max_allowed_size_compressed = '825M' - [tool.pytest.ini_options] filterwarnings = [ "error", From 0e6d35f7cbb4354641d69868d6cb10dcee21fbca Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Nov 2024 12:36:38 -0800 Subject: [PATCH 05/37] Adapt to rmm logger changes (#2513) This PR adapts to breaking changes in rmm in https://github.com/rapidsai/rmm/pull/1722. This PR is a breaking change because consumers of raft that use any functionality that touches rmm logging will need to link to the rmm::rmm_logger_impl target as well now. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/raft/pull/2513 --- cpp/CMakeLists.txt | 13 ++++++++++--- cpp/bench/prims/CMakeLists.txt | 4 ++++ cpp/cmake/thirdparty/get_spdlog.cmake | 6 +++--- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 780f6f8581..78a4dbb913 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -180,7 +180,10 @@ target_include_directories( ) # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target. -target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass CCCL::CCCL) +target_link_libraries( + raft INTERFACE rmm::rmm rmm::rmm_logger spdlog::spdlog_header_only cuco::cuco + nvidia::cutlass::cutlass CCCL::CCCL +) target_compile_features(raft INTERFACE cxx_std_17 $) target_compile_options( @@ -288,8 +291,10 @@ if(RAFT_COMPILE_LIBRARY) "$<$:${RAFT_CUDA_FLAGS}>" ) - add_library(raft_lib SHARED $) - add_library(raft_lib_static STATIC $) + # Make sure not to add the rmm logger twice since it will be brought in as an interface source by + # the rmm::rmm_logger_impl target. + add_library(raft_lib SHARED $,EXCLUDE,rmm.*logger>) + add_library(raft_lib_static STATIC $,EXCLUDE,rmm.*logger>) set_target_properties( raft_lib raft_lib_static @@ -313,6 +318,8 @@ if(RAFT_COMPILE_LIBRARY) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries target_link_options(${target} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") endforeach() + target_link_libraries(raft_lib PRIVATE rmm::rmm_logger_impl) + target_link_libraries(raft_lib_static PRIVATE rmm::rmm_logger_impl) endif() if(TARGET raft_lib AND (NOT TARGET raft::raft_lib)) diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index cf03a36612..edc1af4e02 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -32,6 +32,7 @@ function(ConfigureBench) PRIVATE raft::raft raft_internal $<$:raft::compiled> + $<$>:bench_rmm_logger> ${RAFT_CTK_MATH_DEPENDENCIES} benchmark::benchmark Threads::Threads @@ -73,6 +74,9 @@ function(ConfigureBench) endfunction() +add_library(bench_rmm_logger OBJECT) +target_link_libraries(bench_rmm_logger PRIVATE rmm::rmm_logger_impl) + if(BUILD_PRIMS_BENCH) ConfigureBench(NAME CORE_BENCH PATH core/bitset.cu core/copy.cu main.cpp) diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake index 57e38c2638..b1ffbe246f 100644 --- a/cpp/cmake/thirdparty/get_spdlog.cmake +++ b/cpp/cmake/thirdparty/get_spdlog.cmake @@ -16,9 +16,9 @@ function(find_and_configure_spdlog) include(${rapids-cmake-dir}/cpm/spdlog.cmake) - rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET rmm-exports) - rapids_export_package(BUILD spdlog rmm-exports) + rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET raft-exports) + rapids_export_package(BUILD spdlog raft-exports) endfunction() -find_and_configure_spdlog() \ No newline at end of file +find_and_configure_spdlog() From fc7818f078a69393e8a0cb27c117b19208c76aaf Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 4 Dec 2024 10:48:49 -0600 Subject: [PATCH 06/37] prefer system install of UCX in devcontainers, update outdated RAPIDS references (#2514) Contributes to https://github.com/rapidsai/build-planning/issues/118 Proposes the following changes for pip devcontainers: * prefer system installation of ucx to the one provided by the `libucx-cu{11,12}` wheels (ref: https://github.com/rapidsai/devcontainers/pull/421#issuecomment-2502324982) And some other related changes noticed while doing that: * update lingering `24.*` references to `25.02` ## Notes for Reviewers ### How I tested this Relying on CI for most things. Double-checked that `update-version.sh` would have caught the one lingering `24.12` reference like this: ```shell ./ci/release/update-version.sh '25.02.00' git grep -E '24\.' ``` Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/raft/pull/2514 --- .devcontainer/Dockerfile | 1 + .github/workflows/trigger-breaking-change-alert.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index dc12ab2ade..0f6a8b46af 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -13,6 +13,7 @@ RUN apt update -y \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; ENV DEFAULT_VIRTUAL_ENV=rapids +ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true FROM ${BASE} as conda-base diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 3b972f31ca..01dd2436be 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -12,7 +12,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} From 3ce5b6ad45946a9c790711addb7b5d358534d8d9 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 4 Dec 2024 18:14:06 -0600 Subject: [PATCH 07/37] Remove upper bounds on cuda-python to allow 12.6.2 and 11.8.5 (#2517) Now that some upstream bugs have been fixed, we can allow cuda-python 12.6.2 and 11.8.5. See https://github.com/NVIDIA/cuda-python/issues/226#issuecomment-2472355738 for more information. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2517 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 3 ++- conda/environments/all_cuda-118_arch-x86_64.yaml | 3 ++- conda/environments/all_cuda-125_arch-aarch64.yaml | 3 ++- conda/environments/all_cuda-125_arch-x86_64.yaml | 3 ++- conda/recipes/pylibraft/meta.yaml | 8 ++++---- conda/recipes/raft-dask/meta.yaml | 8 ++++---- cpp/cmake/thirdparty/get_rmm.cmake | 2 +- dependencies.yaml | 7 ++++--- 8 files changed, 21 insertions(+), 16 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 269af03e9f..f8201cbccf 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 @@ -54,6 +54,7 @@ dependencies: - scikit-build-core>=0.10.0 - scikit-learn - scipy +- spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 4c7150264b..66b97854ab 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 @@ -54,6 +54,7 @@ dependencies: - scikit-build-core>=0.10.0 - scikit-learn - scipy +- spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-64==2.17 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 648a5a00f0..1fd6edfb6f 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -16,7 +16,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler @@ -50,6 +50,7 @@ dependencies: - scikit-build-core>=0.10.0 - scikit-learn - scipy +- spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 7d7b9c4454..72108fed48 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -16,7 +16,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler @@ -50,6 +50,7 @@ dependencies: - scikit-build-core>=0.10.0 - scikit-learn - scipy +- spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-64==2.17 diff --git a/conda/recipes/pylibraft/meta.yaml b/conda/recipes/pylibraft/meta.yaml index 01a9d61f0f..f1edf5d767 100644 --- a/conda/recipes/pylibraft/meta.yaml +++ b/conda/recipes/pylibraft/meta.yaml @@ -43,10 +43,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.7.1,<12.0a0 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.0,<13.0a0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -61,10 +61,10 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.7.1,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.0,<13.0a0 {% endif %} - libraft {{ version }} - libraft-headers {{ version }} diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index 02a8957b06..14ffa5c092 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -43,10 +43,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.7.1,<12.0a0 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.0,<13.0a0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -62,10 +62,10 @@ requirements: run: {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.7.1,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.0,<13.0a0 {% endif %} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - dask-cuda ={{ minor_version }} diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake index 5a7d54ea4a..0e93363039 100644 --- a/cpp/cmake/thirdparty/get_rmm.cmake +++ b/cpp/cmake/thirdparty/get_rmm.cmake @@ -17,7 +17,7 @@ function(find_and_configure_rmm) include(${rapids-cmake-dir}/cpm/rmm.cmake) rapids_cpm_rmm(BUILD_EXPORT_SET raft-exports - INSTALL_EXPORT_SET raft-exports) + INSTALL_EXPORT_SET raft-exports) endfunction() find_and_configure_rmm() diff --git a/dependencies.yaml b/dependencies.yaml index daef3ad2ea..80c7f29447 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -143,8 +143,9 @@ dependencies: packages: - c-compiler - cxx-compiler - - nccl>=2.19 - libucxx==0.42.*,>=0.0.0a0 + - nccl>=2.19 + - spdlog>=1.14.1,<1.15 specific: - output_types: conda matrices: @@ -196,11 +197,11 @@ dependencies: - matrix: cuda: "12.*" packages: - - &cuda_python12 cuda-python>=12.0,<13.0a0,<=12.6.0 + - &cuda_python12 cuda-python>=12.0,<13.0a0 - matrix: cuda: "11.*" packages: - - &cuda_python11 cuda-python>=11.7.1,<12.0a0,<=11.8.3 + - &cuda_python11 cuda-python>=11.7.1,<12.0a0 - matrix: packages: - &cuda_python cuda-python From ee45ce786686b54d1972408b927d7fcd8ce0cf20 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Sat, 7 Dec 2024 00:35:26 -0600 Subject: [PATCH 08/37] Update cuda-python lower bounds to 12.6.2 / 11.8.5 (#2522) We require a newer cuda-python lower bound for new features and to use the new layout. This will fix a number of errors observed when the runtime version of cuda-python is older than the version used to build packages using Cython features from cuda-python. See https://github.com/rapidsai/build-planning/issues/117#issuecomment-2524250915 for details. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/raft/pull/2522 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/pylibraft/meta.yaml | 8 ++++---- conda/recipes/raft-dask/meta.yaml | 8 ++++---- dependencies.yaml | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index f8201cbccf..e145aeb92e 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 66b97854ab..75dcffa95d 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 1fd6edfb6f..bfa32c80d1 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -16,7 +16,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 72108fed48..98ec334635 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -16,7 +16,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/recipes/pylibraft/meta.yaml b/conda/recipes/pylibraft/meta.yaml index f1edf5d767..4a8ed29c85 100644 --- a/conda/recipes/pylibraft/meta.yaml +++ b/conda/recipes/pylibraft/meta.yaml @@ -43,10 +43,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.8.5,<12.0a0 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.6.2,<13.0a0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -61,10 +61,10 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.8.5,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.6.2,<13.0a0 {% endif %} - libraft {{ version }} - libraft-headers {{ version }} diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index 14ffa5c092..a8be273f82 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -43,10 +43,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.8.5,<12.0a0 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.6.2,<13.0a0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -62,10 +62,10 @@ requirements: run: {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.8.5,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.6.2,<13.0a0 {% endif %} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - dask-cuda ={{ minor_version }} diff --git a/dependencies.yaml b/dependencies.yaml index 80c7f29447..37ea223a01 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -197,11 +197,11 @@ dependencies: - matrix: cuda: "12.*" packages: - - &cuda_python12 cuda-python>=12.0,<13.0a0 + - &cuda_python12 cuda-python>=12.6.2,<13.0a0 - matrix: cuda: "11.*" packages: - - &cuda_python11 cuda-python>=11.7.1,<12.0a0 + - &cuda_python11 cuda-python>=11.8.5,<12.0a0 - matrix: packages: - &cuda_python cuda-python From 1e5030d1b4f85a9f306c36f8a030494fa59aaaa4 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 11 Dec 2024 23:39:15 +0100 Subject: [PATCH 09/37] Fix rnd bit generation in rmat_rectangular_kernel (#2524) For certain architectures, the compiler always generates zero destination bit in the following loop https://github.com/rapidsai/raft/blob/ee45ce786686b54d1972408b927d7fcd8ce0cf20/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh#L160-L162 irrespective of the random value that shall determine which bit to use for `dst_id`. This PR refactors the loop. This way the `dst_id` number has the desired random distribution for all bits. Authors: - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2524 --- .../detail/rmat_rectangular_generator.cuh | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh index 9ad7c68f87..24207ba6db 100644 --- a/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh +++ b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh @@ -151,15 +151,16 @@ RAFT_KERNEL rmat_gen_kernel(IdxT* out, raft::random::PCGenerator gen{r.seed, r.base_subsequence + idx, 0}; auto min_scale = min(r_scale, c_scale); IdxT i = 0; - for (; i < min_scale; ++i) { - gen_and_update_bits(src_id, dst_id, a, a + b, a + b + c, r_scale, c_scale, i, gen); - } - for (; i < r_scale; ++i) { - gen_and_update_bits(src_id, dst_id, a + b, a + b, ProbT(1), r_scale, c_scale, i, gen); - } - for (; i < c_scale; ++i) { - gen_and_update_bits(src_id, dst_id, a + c, ProbT(1), ProbT(1), r_scale, c_scale, i, gen); + // Whether we have more rows than columns. + const bool more_rows = r_scale > c_scale; + + for (; i < max_scale; ++i) { + ProbT A = (i < min_scale) ? a : (more_rows ? a + b : a + c); + ProbT AB = (i < min_scale) ? a + b : (more_rows ? a + b : ProbT(1)); + ProbT ABC = (i < min_scale) ? a + b + c : ProbT(1); + gen_and_update_bits(src_id, dst_id, A, AB, ABC, r_scale, c_scale, i, gen); } + store_ids(out, out_src, out_dst, src_id, dst_id, idx, n_edges); } From 3720d8e91c21ec95d3dbe8e0d1a4515eb60fa7fa Mon Sep 17 00:00:00 2001 From: rhdong Date: Wed, 11 Dec 2024 14:41:14 -0800 Subject: [PATCH 10/37] [Opt] Optimizing the performance of `bitmap_to_csr` (#2516) This PR optimizes the performance of `bitmap_to_csr` related kernels by 14~1000 times. It could also benefit the `bitset_to_csr` in the future. #### After (Updated Dec 08) ```shell --------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------- BitmapToCsrBench/0/manual_time 0.161 ms 0.197 ms 4350 rows*cols=1*100000000 sparsity=0.95 BitmapToCsrBench/1/manual_time 0.110 ms 0.147 ms 6363 rows*cols=1*100000000 sparsity=0.99 BitmapToCsrBench/2/manual_time 14.2 ms 14.2 ms 50 rows*cols=100*100000000 sparsity=0.95 BitmapToCsrBench/3/manual_time 8.76 ms 8.80 ms 80 rows*cols=100*100000000 sparsity=0.99 ``` #### Before ```shell --------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------- BitmapToCsrBench/0/manual_time 176 ms 176 ms 4 rows*cols=1*100000000 sparsity=0.95 BitmapToCsrBench/1/manual_time 146 ms 146 ms 5 rows*cols=1*100000000 sparsity=0.99 BitmapToCsrBench/2/manual_time 180 ms 180 ms 4 rows*cols=100*100000000 sparsity=0.95 BitmapToCsrBench/3/manual_time 148 ms 148 ms 5 rows*cols=100*100000000 sparsity=0.99 ``` Authors: - rhdong (https://github.com/rhdong) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2516 --- cpp/bench/prims/sparse/bitmap_to_csr.cu | 26 +- .../sparse/convert/detail/bitmap_to_csr.cuh | 358 ++++++++++-------- cpp/include/raft/util/device_loads_stores.cuh | 42 ++ cpp/test/sparse/convert_csr.cu | 78 ++-- 4 files changed, 319 insertions(+), 185 deletions(-) diff --git a/cpp/bench/prims/sparse/bitmap_to_csr.cu b/cpp/bench/prims/sparse/bitmap_to_csr.cu index ed53df3265..71aabb1bf9 100644 --- a/cpp/bench/prims/sparse/bitmap_to_csr.cu +++ b/cpp/bench/prims/sparse/bitmap_to_csr.cu @@ -71,7 +71,7 @@ struct BitmapToCsrBench : public fixture { index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitmap) { index_t total = static_cast(m * n); - index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t num_ones = static_cast((total * 1.0f) * (1.0f - sparsity)); index_t res = num_ones; for (auto& item : bitmap) { @@ -141,7 +141,27 @@ const std::vector> getInputs() }; const std::vector params_group = raft::util::itertools::product( - {index_t(10), index_t(1024)}, {index_t(1024 * 1024)}, {0.01f, 0.1f, 0.2f, 0.5f}); + {index_t(10), index_t(1024)}, {index_t(1024 * 1024)}, {0.99f, 0.9f, 0.8f, 0.5f}); + + param_vec.reserve(params_group.size()); + for (TestParams params : params_group) { + param_vec.push_back(bench_param({params.m, params.n, params.sparsity})); + } + return param_vec; +} + +template +const std::vector> getLargeInputs() +{ + std::vector> param_vec; + struct TestParams { + index_t m; + index_t n; + float sparsity; + }; + + const std::vector params_group = raft::util::itertools::product( + {index_t(1), index_t(100)}, {index_t(100 * 1000000)}, {0.95f, 0.99f}); param_vec.reserve(params_group.size()); for (TestParams params : params_group) { @@ -153,4 +173,6 @@ const std::vector> getInputs() RAFT_BENCH_REGISTER((BitmapToCsrBench), "", getInputs()); RAFT_BENCH_REGISTER((BitmapToCsrBench), "", getInputs()); +RAFT_BENCH_REGISTER((BitmapToCsrBench), "", getLargeInputs()); + } // namespace raft::bench::sparse diff --git a/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh b/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh index 769d5de9be..866923d647 100644 --- a/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -41,61 +42,68 @@ namespace sparse { namespace convert { namespace detail { -// Threads per block in calc_nnz_by_rows_kernel. -static const constexpr int calc_nnz_by_rows_tpb = 32; +// Threads per block in bitmap_to_csr. +static const constexpr int bitmap_to_csr_tpb = 256; template -RAFT_KERNEL __launch_bounds__(calc_nnz_by_rows_tpb) calc_nnz_by_rows_kernel(const bitmap_t* bitmap, - index_t num_rows, - index_t num_cols, - index_t bitmap_num, - nnz_t* nnz_per_row) +RAFT_KERNEL __launch_bounds__(bitmap_to_csr_tpb) calc_nnz_by_rows_kernel(const bitmap_t* bitmap, + index_t num_rows, + index_t num_cols, + index_t bitmap_num, + nnz_t* sub_col_nnz, + index_t bits_per_sub_col) { - constexpr bitmap_t FULL_MASK = ~bitmap_t(0u); - constexpr bitmap_t ONE = bitmap_t(1u); + using mutable_bitmap_t = typename std::remove_const_t; + using BlockReduce = cub::BlockReduce; + + __shared__ typename BlockReduce::TempStorage reduce_storage; + constexpr index_t BITS_PER_BITMAP = sizeof(bitmap_t) * 8; - auto block = cg::this_thread_block(); - auto tile = cg::tiled_partition<32>(block); + const auto tid = threadIdx.x; + const auto row = blockIdx.x; - int lane_id = threadIdx.x & 0x1f; + const auto num_sub_cols = gridDim.y; + const auto sub_col = blockIdx.y; - for (index_t row = blockIdx.x; row < num_rows; row += gridDim.x) { - index_t offset = 0; - index_t s_bit = row * num_cols; - index_t e_bit = s_bit + num_cols; - index_t l_sum = 0; + size_t s_bit = size_t(row) * num_cols + sub_col * bits_per_sub_col; + size_t e_bit = min(s_bit + bits_per_sub_col, size_t(num_cols) * (row + 1)); - int s_gap = 0; - int e_gap = 0; + nnz_t l_sum = 0; + nnz_t g_sum = 0; - while (offset < num_cols) { - index_t bitmap_idx = lane_id + (s_bit + offset) / BITS_PER_BITMAP; - std::remove_const_t l_bitmap = 0; + index_t s_offset = s_bit % BITS_PER_BITMAP; + size_t bitmap_idx = s_bit / BITS_PER_BITMAP; - if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } + if (tid == 0 && s_offset != 0) { + mutable_bitmap_t l_bitmap = bitmap[bitmap_idx]; - offset += BITS_PER_BITMAP * warpSize; + l_bitmap >>= s_offset; - s_gap = s_bit - bitmap_idx * BITS_PER_BITMAP; - if (s_gap > 0) { - l_bitmap >>= s_gap; - l_bitmap <<= s_gap; - offset -= s_gap; - } + size_t remaining_bits = min(size_t(BITS_PER_BITMAP - s_offset), e_bit - s_bit); - e_gap = (bitmap_idx + 1) * BITS_PER_BITMAP - e_bit; - if (e_gap > 0) { - l_bitmap <<= e_gap; - l_bitmap >>= e_gap; - } - l_sum += static_cast(raft::detail::popc(l_bitmap)); + if (remaining_bits < BITS_PER_BITMAP) { + l_bitmap &= ((mutable_bitmap_t(1) << remaining_bits) - 1); } + l_sum += static_cast(raft::detail::popc(l_bitmap)); + } + if (s_offset != 0) { s_bit += (BITS_PER_BITMAP - s_offset); } - l_sum = cg::reduce(tile, l_sum, cg::plus()); + for (size_t bit_idx = s_bit; bit_idx < e_bit; bit_idx += BITS_PER_BITMAP * blockDim.x) { + mutable_bitmap_t l_bitmap = 0; + bitmap_idx = bit_idx / BITS_PER_BITMAP + tid; - if (lane_id == 0) { *(nnz_per_row + row) += static_cast(l_sum); } + index_t remaining_bits = min(BITS_PER_BITMAP, index_t(e_bit - bitmap_idx * BITS_PER_BITMAP)); + + if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } + + if (remaining_bits < BITS_PER_BITMAP) { + l_bitmap &= ((mutable_bitmap_t(1) << remaining_bits) - 1); + } + l_sum += static_cast(raft::detail::popc(l_bitmap)); } + g_sum = BlockReduce(reduce_storage).Reduce(l_sum, cub::Sum()); + stg(g_sum, sub_col_nnz + sub_col + row * num_sub_cols, tid == 0); } template @@ -103,144 +111,164 @@ void calc_nnz_by_rows(raft::resources const& handle, const bitmap_t* bitmap, index_t num_rows, index_t num_cols, - nnz_t* nnz_per_row) + nnz_t* sub_col_nnz, + size_t& sub_nnz_size, + index_t& bits_per_sub_col) { - auto stream = resource::get_cuda_stream(handle); - const index_t total = num_rows * num_cols; - const index_t bitmap_num = raft::ceildiv(total, index_t(sizeof(bitmap_t) * 8)); - - int dev_id, sm_count, blocks_per_sm; + if (sub_nnz_size == 0) { + bits_per_sub_col = bitmap_to_csr_tpb * sizeof(index_t) * 8 * 8; + auto grid_dim_y = (num_cols + bits_per_sub_col - 1) / bits_per_sub_col; + sub_nnz_size = num_rows * ((num_cols + bits_per_sub_col - 1) / bits_per_sub_col); + return; + } + auto stream = resource::get_cuda_stream(handle); + const size_t total = num_rows * num_cols; + const size_t bitmap_num = + (total + index_t(sizeof(bitmap_t) * 8) - 1) / index_t(sizeof(bitmap_t) * 8); - cudaGetDevice(&dev_id); - cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev_id); - cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &blocks_per_sm, calc_nnz_by_rows_kernel, calc_nnz_by_rows_tpb, 0); + auto block_x = num_rows; + auto block_y = sub_nnz_size / num_rows; + dim3 grid(block_x, block_y, 1); - index_t max_active_blocks = sm_count * blocks_per_sm; - auto grid = std::min(max_active_blocks, raft::ceildiv(bitmap_num, index_t(calc_nnz_by_rows_tpb))); - auto block = calc_nnz_by_rows_tpb; + auto block = bitmap_to_csr_tpb; - calc_nnz_by_rows_kernel - <<>>(bitmap, num_rows, num_cols, bitmap_num, nnz_per_row); + calc_nnz_by_rows_kernel<<>>( + bitmap, num_rows, num_cols, bitmap_num, sub_col_nnz, bits_per_sub_col); RAFT_CUDA_TRY(cudaPeekAtLastError()); } -/* - Execute the exclusive_scan within one warp with no inter-warp communication. - This function calculates the exclusive prefix sum of `value` across threads within the same warp. - Each thread in the warp will end up with the sum of all the values of the threads with lower IDs - in the same warp, with the first thread always getting a sum of 0. -*/ -template -RAFT_DEVICE_INLINE_FUNCTION value_t warp_exclusive_scan(value_t value) -{ - int lane_id = threadIdx.x & 0x1f; - value_t shifted_value = __shfl_up_sync(0xffffffff, value, 1, warpSize); - if (lane_id == 0) shifted_value = 0; - - value_t sum = shifted_value; - - for (int i = 1; i < warpSize; i *= 2) { - value_t n = __shfl_up_sync(0xffffffff, sum, i, warpSize); - if (lane_id >= i) { sum += n; } - } - return sum; -} - -// Threads per block in fill_indices_by_rows_kernel. -static const constexpr int fill_indices_by_rows_tpb = 32; - template -RAFT_KERNEL __launch_bounds__(fill_indices_by_rows_tpb) +RAFT_KERNEL __launch_bounds__(bitmap_to_csr_tpb) fill_indices_by_rows_kernel(const bitmap_t* bitmap, - const index_t* indptr, - index_t num_rows, - index_t num_cols, + index_t* indptr, + size_t num_rows, + size_t num_cols, nnz_t nnz, - index_t bitmap_num, - index_t* indices) + index_t* indices, + nnz_t* sub_col_nnz, + index_t bits_per_sub_col) { - constexpr bitmap_t FULL_MASK = ~bitmap_t(0u); constexpr bitmap_t ONE = bitmap_t(1u); constexpr index_t BITS_PER_BITMAP = sizeof(bitmap_t) * 8; - int lane_id = threadIdx.x & 0x1f; + using mutable_bitmap_t = typename std::remove_const_t; + using BlockScan = cub::BlockScan; + + __shared__ typename BlockScan::TempStorage scan_storage; + + const auto tid = threadIdx.x; + const auto row = blockIdx.x; + + const auto num_sub_cols = gridDim.y; + const auto sub_col = blockIdx.y; // Ensure the HBM allocated for CSR values is sufficient to handle all non-zero bitmap bits. // An assert will trigger if the allocated HBM is insufficient when `NDEBUG` isn't defined. // Note: Assertion is active only if `NDEBUG` is undefined. if constexpr (check_nnz) { - if (lane_id == 0) { assert(nnz < indptr[num_rows]); } + if (tid == 0) { assert(nnz < sub_col_nnz[num_rows * num_sub_cols]); } } + size_t s_bit = size_t(row) * num_cols + sub_col * bits_per_sub_col; + size_t e_bit = min(s_bit + bits_per_sub_col, size_t(num_cols) * (row + 1)); + + size_t l_sum = 0; + __shared__ size_t g_sum; + + index_t s_offset = s_bit % BITS_PER_BITMAP; + size_t bitmap_idx = s_bit / BITS_PER_BITMAP; + + if (tid == 0 && row == 0 && sub_col == 0) { indptr[0] = 0; } + if (tid == 0 && sub_col == 0) { indptr[row + 1] = sub_col_nnz[(row + 1) * num_sub_cols]; } + + size_t g_nnz = sub_col_nnz[sub_col + row * num_sub_cols]; + index_t* sub_cols_indices_addr = indices + g_nnz; + + bool guard[BITS_PER_BITMAP]; + + index_t g_bits = sub_col * bits_per_sub_col + tid * BITS_PER_BITMAP; + + if (tid == 0 && s_offset != 0) { + mutable_bitmap_t l_bitmap = bitmap[bitmap_idx]; + l_bitmap >>= s_offset; + + size_t remaining_bits = min(size_t(BITS_PER_BITMAP - s_offset), e_bit - s_bit); + if (remaining_bits < BITS_PER_BITMAP) { + l_bitmap &= ((mutable_bitmap_t(1) << remaining_bits) - 1); + } + +#pragma unroll + for (int i = 0; i < BITS_PER_BITMAP; i++) { + guard[i] = l_bitmap & (ONE << i); + } #pragma unroll - for (index_t row = blockIdx.x; row < num_rows; row += gridDim.x) { - index_t g_sum = 0; - index_t s_bit = row * num_cols; - index_t e_bit = s_bit + num_cols; - index_t indptr_row = indptr[row]; + for (int i = 0; i < BITS_PER_BITMAP; i++) { + stg(index_t(i + g_bits), sub_cols_indices_addr + l_sum, guard[i]); + l_sum += guard[i]; + } + } + + if (tid == 0) { g_sum = l_sum; } + __syncthreads(); + + if (s_offset != 0) { + s_bit += (BITS_PER_BITMAP - s_offset); + g_bits += (BITS_PER_BITMAP - s_offset); + } + + for (size_t bit_idx = s_bit; bit_idx < e_bit; bit_idx += BITS_PER_BITMAP * blockDim.x) { + mutable_bitmap_t l_bitmap = 0; + bitmap_idx = bit_idx / BITS_PER_BITMAP + tid; + + if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } + + index_t remaining_bits = min(BITS_PER_BITMAP, index_t(e_bit - bitmap_idx * BITS_PER_BITMAP)); + if (remaining_bits < BITS_PER_BITMAP) { + l_bitmap &= ((mutable_bitmap_t(1) << remaining_bits) - 1); + } + + int l_bits = raft::detail::popc(l_bitmap); + int l_sum_32b = 0; + BlockScan(scan_storage).InclusiveSum(l_bits, l_sum_32b); + l_sum = l_sum_32b + g_sum - l_bits; + __syncthreads(); #pragma unroll - for (index_t offset = 0; offset < num_cols; offset += BITS_PER_BITMAP * warpSize) { - index_t bitmap_idx = lane_id + (s_bit + offset) / BITS_PER_BITMAP; - std::remove_const_t l_bitmap = 0; - index_t l_offset = offset + lane_id * BITS_PER_BITMAP - (s_bit % BITS_PER_BITMAP); - - if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } - - if (s_bit > bitmap_idx * BITS_PER_BITMAP) { - l_bitmap >>= (s_bit - bitmap_idx * BITS_PER_BITMAP); - l_bitmap <<= (s_bit - bitmap_idx * BITS_PER_BITMAP); - } - - if ((bitmap_idx + 1) * BITS_PER_BITMAP > e_bit) { - l_bitmap <<= ((bitmap_idx + 1) * BITS_PER_BITMAP - e_bit); - l_bitmap >>= ((bitmap_idx + 1) * BITS_PER_BITMAP - e_bit); - } - - index_t l_sum = - g_sum + warp_exclusive_scan(static_cast(raft::detail::popc(l_bitmap))); - - for (int i = 0; i < BITS_PER_BITMAP; i++) { - if (l_bitmap & (ONE << i)) { - indices[indptr_row + l_sum] = l_offset + i; - l_sum++; - } - } - g_sum = __shfl_sync(0xffffffff, l_sum, warpSize - 1); + for (int i = 0; i < BITS_PER_BITMAP; i++) { + guard[i] = l_bitmap & (ONE << i); } +#pragma unroll + for (int i = 0; i < BITS_PER_BITMAP; i++) { + stg(index_t(i + g_bits), sub_cols_indices_addr + l_sum, guard[i]); + l_sum += guard[i]; + } + + if (threadIdx.x == (bitmap_to_csr_tpb - 1)) { g_sum += (l_sum_32b); } + g_bits += BITS_PER_BITMAP * blockDim.x; } } template void fill_indices_by_rows(raft::resources const& handle, const bitmap_t* bitmap, - const index_t* indptr, + index_t* indptr, index_t num_rows, index_t num_cols, nnz_t nnz, - index_t* indices) + index_t* indices, + nnz_t* sub_col_nnz, + index_t bits_per_sub_col, + size_t sub_nnz_size) { - auto stream = resource::get_cuda_stream(handle); - const index_t total = num_rows * num_cols; - const index_t bitmap_num = raft::ceildiv(total, index_t(sizeof(bitmap_t) * 8)); - - int dev_id, sm_count, blocks_per_sm; - - cudaGetDevice(&dev_id); - cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev_id); - cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &blocks_per_sm, - fill_indices_by_rows_kernel, - fill_indices_by_rows_tpb, - 0); - - index_t max_active_blocks = sm_count * blocks_per_sm; - auto grid = std::min(max_active_blocks, num_rows); - auto block = fill_indices_by_rows_tpb; - - fill_indices_by_rows_kernel - <<>>(bitmap, indptr, num_rows, num_cols, nnz, bitmap_num, indices); + auto stream = resource::get_cuda_stream(handle); + auto block_x = num_rows; + auto block_y = sub_nnz_size / num_rows; + dim3 grid(block_x, block_y, 1); + + auto block = bitmap_to_csr_tpb; + + fill_indices_by_rows_kernel<<>>( + bitmap, indptr, num_rows, num_cols, nnz, indices, sub_col_nnz, bits_per_sub_col); RAFT_CUDA_TRY(cudaPeekAtLastError()); } @@ -252,6 +280,7 @@ void bitmap_to_csr(raft::resources const& handle, raft::core::bitmap_view bitmap, csr_matrix_t& csr) { + using nnz_t = typename csr_matrix_t::nnz_type; auto csr_view = csr.structure_view(); if (csr_view.get_n_rows() == 0 || csr_view.get_n_cols() == 0 || csr_view.get_nnz() == 0) { @@ -274,25 +303,50 @@ void bitmap_to_csr(raft::resources const& handle, RAFT_CUDA_TRY(cudaMemsetAsync(indptr, 0, (csr_view.get_n_rows() + 1) * sizeof(index_t), stream)); - calc_nnz_by_rows(handle, bitmap.data(), csr_view.get_n_rows(), csr_view.get_n_cols(), indptr); - thrust::exclusive_scan(thrust_policy, indptr, indptr + csr_view.get_n_rows() + 1, indptr); + size_t sub_nnz_size = 0; + index_t bits_per_sub_col = 0; + + // Get buffer size and number of bits per each sub-columns + calc_nnz_by_rows(handle, + bitmap.data(), + csr_view.get_n_rows(), + csr_view.get_n_cols(), + static_cast(nullptr), + sub_nnz_size, + bits_per_sub_col); + + rmm::device_async_resource_ref device_memory = resource::get_workspace_resource(handle); + rmm::device_uvector sub_nnz(sub_nnz_size + 1, stream, device_memory); + + calc_nnz_by_rows(handle, + bitmap.data(), + csr_view.get_n_rows(), + csr_view.get_n_cols(), + sub_nnz.data(), + sub_nnz_size, + bits_per_sub_col); + + thrust::exclusive_scan( + thrust_policy, sub_nnz.data(), sub_nnz.data() + sub_nnz_size + 1, sub_nnz.data()); if constexpr (is_device_csr_sparsity_owning_v) { index_t nnz = 0; RAFT_CUDA_TRY(cudaMemcpyAsync( - &nnz, indptr + csr_view.get_n_rows(), sizeof(index_t), cudaMemcpyDeviceToHost, stream)); + &nnz, sub_nnz.data() + sub_nnz_size, sizeof(index_t), cudaMemcpyDeviceToHost, stream)); resource::sync_stream(handle); csr.initialize_sparsity(nnz); } constexpr bool check_nnz = is_device_csr_sparsity_preserving_v; - fill_indices_by_rows( - handle, - bitmap.data(), - indptr, - csr_view.get_n_rows(), - csr_view.get_n_cols(), - csr_view.get_nnz(), - indices); + fill_indices_by_rows(handle, + bitmap.data(), + indptr, + csr_view.get_n_rows(), + csr_view.get_n_cols(), + csr_view.get_nnz(), + indices, + sub_nnz.data(), + bits_per_sub_col, + sub_nnz_size); thrust::fill_n(thrust_policy, csr.get_elements().data(), diff --git a/cpp/include/raft/util/device_loads_stores.cuh b/cpp/include/raft/util/device_loads_stores.cuh index 2c954ec99a..c1b668fed6 100644 --- a/cpp/include/raft/util/device_loads_stores.cuh +++ b/cpp/include/raft/util/device_loads_stores.cuh @@ -739,4 +739,46 @@ DI void block_copy(raft::device_span dst, const raft::device_span src) /** @} */ +/** + * @defgroup GlobalStores Global Store Operations + * @{ + * @brief Perform conditional stores to global memory. + * + * These functions store data to a specified global memory address, + * controlled by a guard flag to enable conditional execution. + * + * @param[in] reg The data to store in global memory. + * The type of `reg` determines the size of the store. + * @param[in] addr The global memory address where the data will be stored. + * @param[in] guard A flag to conditionally enable the store operation. + * If `true`, the store is performed; otherwise, it is skipped + */ +DI void stg(const int& reg, void* addr, bool guard) +{ + asm volatile( + "{\n" + ".reg .pred p;\n" + "setp.ne.b32 p, %2, 0;\n" + "@p st.global.b32 [%0], %1;\n" + "}\n" + : + : "l"(addr), "r"(reg), "r"((int)guard) + : "memory"); +} + +DI void stg(const int64_t& reg, void* addr, bool guard) +{ + asm volatile( + "{\n" + ".reg .pred p;\n" + "setp.ne.b32 p, %2, 0;\n" + "@p st.global.b64 [%0], %1;\n" + "}\n" + : + : "l"(addr), "l"(reg), "r"((int)guard) + : "memory"); +} + +/** @} */ + } // namespace raft diff --git a/cpp/test/sparse/convert_csr.cu b/cpp/test/sparse/convert_csr.cu index 1cd49b0bbd..c1a495ea3d 100644 --- a/cpp/test/sparse/convert_csr.cu +++ b/cpp/test/sparse/convert_csr.cu @@ -249,7 +249,7 @@ class BitmapToCSRTest : public ::testing::TestWithParam& bitmap) { index_t total = static_cast(m * n); - index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t num_ones = static_cast((total * 1.0f) * (1.0f - sparsity)); index_t res = num_ones; for (auto& item : bitmap) { @@ -257,7 +257,7 @@ class BitmapToCSRTest : public ::testing::TestWithParam dis(0, total - 1); while (num_ones > 0) { @@ -318,8 +318,8 @@ class BitmapToCSRTest : public ::testing::TestWithParam cols1(col_indices1.begin() + start_idx, col_indices1.begin() + end_idx); - std::vector cols2(col_indices2.begin() + start_idx, col_indices2.begin() + end_idx); + std::vector cols1(col_indices1.begin() + start_idx, col_indices1.begin() + end_idx); + std::vector cols2(col_indices2.begin() + start_idx, col_indices2.begin() + end_idx); std::sort(cols1.begin(), cols1.end()); std::sort(cols2.begin(), cols2.end()); @@ -396,9 +396,13 @@ class BitmapToCSRTest : public ::testing::TestWithParam( - values_expected_d.data(), values_d.data(), nnz, raft::Compare(), stream)); + EXPECT_TRUE(csr_compare(indptr_h, indices_h, indptr_expected_h, indices_expected_h)) + << " n_row: " << params.n_rows << ", n_cols: " << params.n_cols << ", nnz: " << nnz + << ", random_number: " << random_number; + EXPECT_TRUE(raft::devArrMatch( + values_expected_d.data(), values_d.data(), nnz, raft::Compare(), stream)) + << " n_row: " << params.n_rows << ", n_cols: " << params.n_cols << ", nnz: " << nnz + << ", random_number: " << random_number; } protected: @@ -418,6 +422,8 @@ class BitmapToCSRTest : public ::testing::TestWithParam indptr_expected_d; rmm::device_uvector indices_expected_d; rmm::device_uvector values_expected_d; + + unsigned int random_number; }; using BitmapToCSRTestI = BitmapToCSRTest; @@ -426,40 +432,50 @@ TEST_P(BitmapToCSRTestI, Result) { Run(); } using BitmapToCSRTestL = BitmapToCSRTest; TEST_P(BitmapToCSRTestL, Result) { Run(); } +using BitmapToCSRTestLOnLargeSize = BitmapToCSRTest; +TEST_P(BitmapToCSRTestLOnLargeSize, Result) { Run(); } + template const std::vector> bitmaptocsr_inputs = { - {0, 0, 0.2, false}, - {10, 32, 0.4, false}, - {10, 3, 0.2, false}, - {32, 1024, 0.4, false}, - {1024, 1048576, 0.01, false}, - {1024, 1024, 0.4, false}, - {64 * 1024 + 10, 2, 0.3, false}, // 64K + 10 is slightly over maximum of blockDim.y - {16, 16, 0.3, false}, // No peeling-remainder - {17, 16, 0.3, false}, // Check peeling-remainder - {18, 16, 0.3, false}, // Check peeling-remainder - {32 + 9, 33, 0.2, false}, // Check peeling-remainder - {2, 33, 0.2, false}, // Check peeling-remainder - {0, 0, 0.2, true}, - {10, 32, 0.4, true}, - {10, 3, 0.2, true}, - {32, 1024, 0.4, true}, - {1024, 1048576, 0.01, true}, - {1024, 1024, 0.4, true}, - {64 * 1024 + 10, 2, 0.3, true}, // 64K + 10 is slightly over maximum of blockDim.y - {16, 16, 0.3, true}, // No peeling-remainder - {17, 16, 0.3, true}, // Check peeling-remainder - {18, 16, 0.3, true}, // Check peeling-remainder - {32 + 9, 33, 0.2, true}, // Check peeling-remainder - {2, 33, 0.2, true}, // Check peeling-remainder + {0, 0, 0.8, false}, + {10, 32, 0.6, false}, + {10, 3, 0.8, false}, + {32, 1024, 0.6, false}, + {1024, 1048576, 0.99, false}, + {1024, 1024, 0.6, false}, + {64 * 1024 + 10, 2, 0.7, false}, // 64K + 10 is slightly over maximum of blockDim.y + {16, 16, 0.7, false}, // No peeling-remainder + {17, 16, 0.7, false}, // Check peeling-remainder + {18, 16, 0.7, false}, // Check peeling-remainder + {32 + 9, 33, 0.8, false}, // Check peeling-remainder + {2, 33, 0.8, false}, // Check peeling-remainder + {0, 0, 0.8, true}, + {10, 32, 0.6, true}, + {10, 3, 0.8, true}, + {32, 1024, 0.6, true}, + {1024, 1048576, 0.99, true}, + {1024, 1024, 0.6, true}, + {64 * 1024 + 10, 2, 0.7, true}, // 64K + 10 is slightly over maximum of blockDim.y + {16, 16, 0.7, true}, // No peeling-remainder + {17, 16, 0.7, true}, // Check peeling-remainder + {18, 16, 0.7, true}, // Check peeling-remainder + {32 + 9, 33, 0.8, true}, // Check peeling-remainder + {2, 33, 0.8, true}, // Check peeling-remainder }; +template +const std::vector> bitmaptocsr_large_inputs = { + {100, 100000000, 0.99, true}, {100, 100000000, 0.95, false}, {100, 100000000 + 17, 0.95, false}}; + INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, BitmapToCSRTestI, ::testing::ValuesIn(bitmaptocsr_inputs)); INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, BitmapToCSRTestL, ::testing::ValuesIn(bitmaptocsr_inputs)); +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitmapToCSRTestLOnLargeSize, + ::testing::ValuesIn(bitmaptocsr_large_inputs)); } // namespace sparse } // namespace raft From bfd190687ee396374b7106d9ac26add73b57b22a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 17 Dec 2024 12:08:26 -0600 Subject: [PATCH 11/37] reduce duplication, removed unused things in dependencies.yaml (#2529) Proposes some small cleanup for `dependencies.yaml` * removes `rapids_build_setuptools` dependency group - *#2497 removed the last use of `setuptools` here* * breaks `cuda-python` and `rmm` out into `depends_on_*` groups to reduce duplication, and for consistency with other RAPIDS projects ([docs explaining this](https://github.com/rapidsai/build-planning/blob/d9e3c606d95c835ee384ac6480a4af0ac6cb024a/docs/docs/packaging.md#L181)) * alphabetizes lists Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/raft/pull/2529 --- dependencies.yaml | 156 ++++++++++++++++++---------------------------- 1 file changed, 60 insertions(+), 96 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 37ea223a01..dc1807fbf9 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -6,22 +6,22 @@ files: cuda: ["11.8", "12.5"] arch: [x86_64, aarch64] includes: - - rapids_build - - build_pylibraft + - checks - cuda - cuda_version + - depends_on_cuda_python - depends_on_cupy - depends_on_distributed_ucxx + - depends_on_rmm - develop - - checks - - test_libraft - docs - - rapids_build_setuptools + - rapids_build - rapids_build_skbuild - - run_raft_dask - run_pylibraft - - test_python_common + - run_raft_dask + - test_libraft - test_pylibraft + - test_python_common test_cpp: output: none includes: @@ -31,10 +31,10 @@ files: output: none includes: - cuda_version + - depends_on_cupy - py_version - - test_python_common - test_pylibraft - - depends_on_cupy + - test_python_common checks: output: none includes: @@ -62,8 +62,9 @@ files: table: tool.rapids-build-backend key: requires includes: + - depends_on_cuda_python + - depends_on_rmm - rapids_build - - build_pylibraft py_run_pylibraft: output: pyproject pyproject_dir: python/pylibraft @@ -71,6 +72,8 @@ files: table: project includes: - cuda_wheels + - depends_on_cuda_python + - depends_on_rmm - run_pylibraft py_test_pylibraft: output: pyproject @@ -79,9 +82,9 @@ files: table: project.optional-dependencies key: test includes: - - test_python_common - - test_pylibraft - depends_on_cupy + - test_pylibraft + - test_python_common py_build_raft_dask: output: pyproject pyproject_dir: python/raft-dask @@ -96,16 +99,16 @@ files: table: tool.rapids-build-backend key: requires includes: - - rapids_build - depends_on_ucx_build + - rapids_build py_run_raft_dask: output: pyproject pyproject_dir: python/raft-dask extras: table: project includes: - - run_raft_dask - depends_on_distributed_ucxx + - run_raft_dask py_test_raft_dask: output: pyproject pyproject_dir: python/raft-dask @@ -125,7 +128,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - &rapids_build_backend rapids-build-backend>=0.3.0,<0.4.0.dev0 + - rapids-build-backend>=0.3.0,<0.4.0.dev0 - output_types: [conda] packages: - scikit-build-core>=0.10.0 @@ -180,44 +183,6 @@ dependencies: - matrix: {cuda: "11.2", arch: aarch64} packages: [nvcc_linux-aarch64=11.2] - build_pylibraft: - common: - - output_types: [conda] - packages: - - &rmm_unsuffixed rmm==25.2.*,>=0.0.0a0 - - output_types: requirements - packages: - # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for rmm-cu{11,12}. - - --extra-index-url=https://pypi.nvidia.com - - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple - specific: - - output_types: [conda, requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - packages: - - &cuda_python12 cuda-python>=12.6.2,<13.0a0 - - matrix: - cuda: "11.*" - packages: - - &cuda_python11 cuda-python>=11.8.5,<12.0a0 - - matrix: - packages: - - &cuda_python cuda-python - - output_types: [requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - cuda_suffixed: "true" - packages: - - &rmm_cu12 rmm-cu12==25.2.*,>=0.0.0a0 - - matrix: - cuda: "11.*" - cuda_suffixed: "true" - packages: - - &rmm_cu11 rmm-cu11==25.2.*,>=0.0.0a0 - - {matrix: null, packages: [*rmm_unsuffixed] } checks: common: - output_types: [conda, requirements] @@ -398,13 +363,6 @@ dependencies: - recommonmark - sphinx-copybutton - sphinx-markdown-tables - rapids_build_setuptools: - common: - - output_types: [requirements, pyproject] - packages: - - wheel - - setuptools - - *rapids_build_backend py_version: specific: - output_types: conda @@ -429,42 +387,6 @@ dependencies: - output_types: [conda, pyproject] packages: - numpy>=1.23,<3.0a0 - - output_types: [conda] - packages: - - *rmm_unsuffixed - - output_types: requirements - packages: - # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for cudf and rmm. - - --extra-index-url=https://pypi.nvidia.com - - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple - specific: - - output_types: [conda, requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - packages: - - *cuda_python12 - - matrix: - cuda: "11.*" - packages: - - *cuda_python11 - - matrix: - packages: - - *cuda_python - - output_types: [requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - cuda_suffixed: "true" - packages: - - *rmm_cu12 - - matrix: - cuda: "11.*" - cuda_suffixed: "true" - packages: - - *rmm_cu11 - - {matrix: null, packages: [*rmm_unsuffixed]} run_raft_dask: common: - output_types: [conda, pyproject] @@ -511,6 +433,21 @@ dependencies: packages: - scikit-learn - scipy + depends_on_cuda_python: + specific: + - output_types: [conda, requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + packages: + - cuda-python>=12.6.2,<13.0a0 + - matrix: + cuda: "11.*" + packages: + - cuda-python>=11.8.5,<12.0a0 + - matrix: + packages: + - cuda-python depends_on_distributed_ucxx: common: - output_types: conda @@ -537,6 +474,33 @@ dependencies: packages: - distributed-ucxx-cu11==0.42.*,>=0.0.0a0 - {matrix: null, packages: [*distributed_ucxx_unsuffixed]} + depends_on_rmm: + common: + - output_types: conda + packages: + - &rmm_unsuffixed rmm==25.2.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for rmm-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - rmm-cu12==25.2.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - rmm-cu11==25.2.*,>=0.0.0a0 + - matrix: + packages: + - *rmm_unsuffixed depends_on_ucx_build: common: - output_types: conda From d7e68f55c58493bc9cf1fbe4eb775a9593891c58 Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:30:22 +0900 Subject: [PATCH 12/37] [DOC] Fix sample codes (#2518) `raft::raft::resources` -> `raft::resources` Authors: - tsuki (https://github.com/enp1s0) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2518 --- cpp/include/raft/cluster/kmeans.cuh | 8 ++++---- cpp/include/raft/comms/std_comms.hpp | 4 ++-- cpp/include/raft/distance/distance-inl.cuh | 2 +- cpp/include/raft/neighbors/epsilon_neighborhood.cuh | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index 38318e8ec8..ee1fc83a9b 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -52,7 +52,7 @@ using KeyValueIndexOp = detail::KeyValueIndexOp; * #include * using namespace raft::cluster; * ... - * raft::raft::resources handle; + * raft::resources handle; * raft::cluster::KMeansParams params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); @@ -61,7 +61,7 @@ using KeyValueIndexOp = detail::KeyValueIndexOp; * params, * X, * std::nullopt, - * centroids, + * centroids.view(), * raft::make_scalar_view(&inertia), * raft::make_scalar_view(&n_iter)); * @endcode @@ -107,7 +107,7 @@ template * #include * using namespace raft::cluster; * ... - * raft::raft::resources handle; + * raft::resources handle; * raft::cluster::KMeansParams params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); @@ -175,7 +175,7 @@ template * #include * using namespace raft::cluster; * ... - * raft::raft::resources handle; + * raft::resources handle; * raft::cluster::KMeansParams params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index 667c8be285..8481360897 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -52,7 +52,7 @@ using std_comms = detail::std_comms; * #include * * ncclComm_t nccl_comm; - * raft::raft::resources handle; + * raft::resources handle; * * build_comms_nccl_only(&handle, nccl_comm, 5, 0); * ... @@ -98,7 +98,7 @@ void build_comms_nccl_only(resources* handle, ncclComm_t nccl_comm, int num_rank * #include * * ncclComm_t nccl_comm; - * raft::raft::resources handle; + * raft::resources handle; * ucp_worker_h ucp_worker; * ucp_ep_h *ucp_endpoints_arr; * diff --git a/cpp/include/raft/distance/distance-inl.cuh b/cpp/include/raft/distance/distance-inl.cuh index 13c9d57efd..d5f8d1cfe1 100644 --- a/cpp/include/raft/distance/distance-inl.cuh +++ b/cpp/include/raft/distance/distance-inl.cuh @@ -366,7 +366,7 @@ void pairwise_distance(raft::resources const& handle, * #include * #include * - * raft::raft::resources handle; + * raft::resources handle; * int n_samples = 5000; * int n_features = 50; * diff --git a/cpp/include/raft/neighbors/epsilon_neighborhood.cuh b/cpp/include/raft/neighbors/epsilon_neighborhood.cuh index bade4385fb..c2f531263d 100644 --- a/cpp/include/raft/neighbors/epsilon_neighborhood.cuh +++ b/cpp/include/raft/neighbors/epsilon_neighborhood.cuh @@ -76,7 +76,7 @@ void epsUnexpL2SqNeighborhood(bool* adj, * #include * #include * using namespace raft::neighbors; - * raft::raft::resources handle; + * raft::resources handle; * ... * auto adj = raft::make_device_matrix(handle, m * n); * auto vd = raft::make_device_vector(handle, m+1); @@ -120,4 +120,4 @@ void eps_neighbors_l2sq(raft::resources const& handle, } // namespace raft::neighbors::epsilon_neighborhood -#endif \ No newline at end of file +#endif From dee71f89e4bdf5ce3ff982e53da9c6c81d883608 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 30 Dec 2024 11:44:33 -0800 Subject: [PATCH 13/37] Check if nightlies have succeeded recently enough (#2533) Contributes to https://github.com/rapidsai/build-planning/issues/127 This PR cannot be merged unless nightly CI has passed within the past 7 days, so if it remains unmerged that will itself be an indication that nightly CI needs fixing. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/raft/pull/2533 --- .github/workflows/pr.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 965943e726..a270df1dfa 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,6 +12,7 @@ concurrency: jobs: pr-builder: needs: + - check-nightly-ci - changed-files - checks - conda-cpp-build @@ -30,6 +31,18 @@ jobs: if: always() with: needs: ${{ toJSON(needs) }} + check-nightly-ci: + # Switch to ubuntu-latest once it defaults to a version of Ubuntu that + # provides at least Python 3.11 (see + # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat) + runs-on: ubuntu-24.04 + env: + RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Check if nightly CI is passing + uses: rapidsai/shared-actions/check_nightly_success/dispatch@main + with: + repo: raft changed-files: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 From eef9a4fa9a39d4349ed699b097a3e3ff6c78cbc4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 30 Dec 2024 11:48:07 -0800 Subject: [PATCH 14/37] Switch over to rapids-logger (#2530) This PR removes raft's implementation of a logger in favor of the centralized one in [rapids-logger](https://github.com/rapidsai/rapids-logger). Consumers still get the benefits of a PImpl idiom, but now that is primarily handled by using the appropriate targets (if necessary the impl header is of course still available for direct inclusion). This change paves the way for ensuring consistent fmt/spdlog (lack of) linkage throughout RAPIDS conda and wheel packages. This PR requires https://github.com/rapidsai/rapids-logger/pull/1 Contributes to https://github.com/rapidsai/build-planning/issues/104 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2530 --- cpp/CMakeLists.txt | 28 +++- cpp/include/raft/cluster/detail/kmeans.cuh | 8 +- .../raft/cluster/detail/kmeans_balanced.cuh | 1 + cpp/include/raft/cluster/kmeans_types.hpp | 2 +- cpp/include/raft/common/logger.hpp | 24 --- cpp/include/raft/core/cublas_macros.hpp | 3 - cpp/include/raft/core/cusolver_macros.hpp | 7 +- cpp/include/raft/core/cusparse_macros.hpp | 2 - .../raft/core/detail/callback_sink.hpp | 71 -------- .../core/detail/fail_container_policy.hpp | 2 +- cpp/include/raft/core/detail/logger.hpp | 24 --- cpp/include/raft/core/logger-ext.hpp | 152 ----------------- cpp/include/raft/core/logger-inl.hpp | 153 ------------------ cpp/include/raft/core/logger-macros.hpp | 95 ++--------- cpp/include/raft/core/logger.hpp | 23 --- .../raft/neighbors/detail/ivf_flat_build.cuh | 1 + .../neighbors/detail/ivf_flat_search-inl.cuh | 3 +- .../raft/solver/detail/lap_kernels.cuh | 3 +- .../raft/sparse/solver/detail/lanczos.cuh | 2 +- cpp/src/core/logger.cpp | 16 -- cpp/test/CMakeLists.txt | 5 + cpp/test/core/device_resources_manager.cpp | 2 +- cpp/test/core/logger.cpp | 57 +++---- docs/source/developer_guide.md | 4 +- 24 files changed, 87 insertions(+), 601 deletions(-) delete mode 100644 cpp/include/raft/common/logger.hpp delete mode 100644 cpp/include/raft/core/detail/callback_sink.hpp delete mode 100644 cpp/include/raft/core/detail/logger.hpp delete mode 100644 cpp/include/raft/core/logger-ext.hpp delete mode 100644 cpp/include/raft/core/logger-inl.hpp delete mode 100644 cpp/include/raft/core/logger.hpp delete mode 100644 cpp/src/core/logger.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 78a4dbb913..06531941aa 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -100,6 +100,17 @@ set_property( ) message(VERBOSE "RAFT: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'.") +# Set logging level +set(LIBRAFT_LOGGING_LEVEL + "INFO" + CACHE STRING "Choose the logging level." +) +set_property( + CACHE LIBRAFT_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" + "OFF" +) +message(VERBOSE "RAFT: LIBRAFT_LOGGING_LEVEL = '${LIBRAFT_LOGGING_LEVEL}'.") + # ################################################################################################## # * Conda environment detection ---------------------------------------------- @@ -152,6 +163,13 @@ include(cmake/modules/ConfigureCUDA.cmake) # add third party dependencies using CPM rapids_cpm_init() +# Not using rapids-cmake since we never want to find, always download. +CPMAddPackage( + NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW FALSE GIT_TAG + 4df3ee70c6746fd1b6c0dc14209dae2e2d4378c6 VERSION 4df3ee70c6746fd1b6c0dc14209dae2e2d4378c6 +) +rapids_make_logger(raft LOGGER_HEADER_DIR include/raft/core EXPORT_SET raft-exports) + # CCCL before rmm/cuco so we get the right version of CCCL include(cmake/thirdparty/get_cccl.cmake) include(cmake/thirdparty/get_rmm.cmake) @@ -182,7 +200,7 @@ target_include_directories( # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target. target_link_libraries( raft INTERFACE rmm::rmm rmm::rmm_logger spdlog::spdlog_header_only cuco::cuco - nvidia::cutlass::cutlass CCCL::CCCL + nvidia::cutlass::cutlass CCCL::CCCL raft_logger ) target_compile_features(raft INTERFACE cxx_std_17 $) @@ -190,6 +208,9 @@ target_compile_options( raft INTERFACE $<$:--expt-extended-lambda --expt-relaxed-constexpr> ) +target_compile_definitions( + raft INTERFACE "RAFT_LOG_ACTIVE_LEVEL=RAFT_LOG_LEVEL_${LIBRAFT_LOGGING_LEVEL}" +) set(RAFT_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix}) set(RAFT_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix}) @@ -265,7 +286,6 @@ set_target_properties(raft_compiled PROPERTIES EXPORT_NAME compiled) if(RAFT_COMPILE_LIBRARY) add_library( raft_objs OBJECT - src/core/logger.cpp src/linalg/detail/coalesced_reduction.cu src/raft_runtime/random/rmat_rectangular_generator_int64_double.cu src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu @@ -318,8 +338,8 @@ if(RAFT_COMPILE_LIBRARY) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries target_link_options(${target} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") endforeach() - target_link_libraries(raft_lib PRIVATE rmm::rmm_logger_impl) - target_link_libraries(raft_lib_static PRIVATE rmm::rmm_logger_impl) + target_link_libraries(raft_lib PRIVATE rmm::rmm_logger_impl raft_logger_impl) + target_link_libraries(raft_lib_static PRIVATE rmm::rmm_logger_impl raft_logger_impl) endif() if(TARGET raft_lib AND (NOT TARGET raft::raft_lib)) diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 4efeedcbaa..4203f0969b 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -369,7 +369,7 @@ void kmeans_fit_main(raft::resources const& handle, rmm::device_uvector& workspace) { common::nvtx::range fun_scope("kmeans_fit_main"); - logger::get(RAFT_NAME).set_level(params.verbosity); + default_logger().set_level(params.verbosity); cudaStream_t stream = resource::get_cuda_stream(handle); auto n_samples = X.extent(0); auto n_features = X.extent(1); @@ -865,7 +865,7 @@ void kmeans_fit(raft::resources const& handle, params.n_clusters); } - logger::get(RAFT_NAME).set_level(params.verbosity); + default_logger().set_level(params.verbosity); // Allocate memory rmm::device_uvector workspace(0, stream); @@ -1010,7 +1010,7 @@ void kmeans_predict(raft::resources const& handle, RAFT_EXPECTS(centroids.extent(1) == n_features, "invalid parameter (centroids.extent(1) != n_features)"); - logger::get(RAFT_NAME).set_level(params.verbosity); + default_logger().set_level(params.verbosity); auto metric = params.metric; // Allocate memory @@ -1201,7 +1201,7 @@ void kmeans_transform(raft::resources const& handle, raft::device_matrix_view X_new) { common::nvtx::range fun_scope("kmeans_transform"); - logger::get(RAFT_NAME).set_level(params.verbosity); + default_logger().set_level(params.verbosity); cudaStream_t stream = resource::get_cuda_stream(handle); auto n_samples = X.extent(0); auto n_features = X.extent(1); diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh index 0a5a3ba5aa..5dcd679bd5 100644 --- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/include/raft/cluster/kmeans_types.hpp b/cpp/include/raft/cluster/kmeans_types.hpp index 4d956ad7a0..fbedd58417 100644 --- a/cpp/include/raft/cluster/kmeans_types.hpp +++ b/cpp/include/raft/cluster/kmeans_types.hpp @@ -82,7 +82,7 @@ struct KMeansParams : kmeans_base_params { /** * verbosity level. */ - int verbosity = RAFT_LEVEL_INFO; + level_enum verbosity = level_enum::info; /** * Seed to the random number generator. diff --git a/cpp/include/raft/common/logger.hpp b/cpp/include/raft/common/logger.hpp deleted file mode 100644 index 77483e577d..0000000000 --- a/cpp/include/raft/common/logger.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * This file is deprecated and will be removed in release 22.08. - * Please use the include/core/logger.hpp instead. - */ - -#pragma once - -#include \ No newline at end of file diff --git a/cpp/include/raft/core/cublas_macros.hpp b/cpp/include/raft/core/cublas_macros.hpp index b69b121161..6c195d8a6f 100644 --- a/cpp/include/raft/core/cublas_macros.hpp +++ b/cpp/include/raft/core/cublas_macros.hpp @@ -23,9 +23,6 @@ #include -///@todo: enable this once we have logger enabled -// #include - #include #define _CUBLAS_ERR_TO_STR(err) \ diff --git a/cpp/include/raft/core/cusolver_macros.hpp b/cpp/include/raft/core/cusolver_macros.hpp index 74a8b7c36c..beaf2d74dc 100644 --- a/cpp/include/raft/core/cusolver_macros.hpp +++ b/cpp/include/raft/core/cusolver_macros.hpp @@ -19,11 +19,10 @@ #pragma once +#include + #include #include -///@todo: enable this once logging is enabled -// #include -#include #include @@ -135,4 +134,4 @@ inline const char* cusolver_error_to_string(cusolverStatus_t err) #define CUSOLVER_CHECK_NO_THROW(call) CUSOLVER_TRY_NO_THROW(call) #endif -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/core/cusparse_macros.hpp b/cpp/include/raft/core/cusparse_macros.hpp index 5a1968b529..2a1df14345 100644 --- a/cpp/include/raft/core/cusparse_macros.hpp +++ b/cpp/include/raft/core/cusparse_macros.hpp @@ -19,8 +19,6 @@ #include #include -///@todo: enable this once logging is enabled -// #include #define _CUSPARSE_ERR_TO_STR(err) \ case err: return #err; diff --git a/cpp/include/raft/core/detail/callback_sink.hpp b/cpp/include/raft/core/detail/callback_sink.hpp deleted file mode 100644 index a110af5c76..0000000000 --- a/cpp/include/raft/core/detail/callback_sink.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#define SPDLOG_HEADER_ONLY -#include -#include -#include - -namespace spdlog::sinks { - -typedef void (*LogCallback)(int lvl, const char* msg); - -template -class CallbackSink : public base_sink { - public: - explicit CallbackSink(std::string tag = "spdlog", - LogCallback callback = nullptr, - void (*flush)() = nullptr) - : _callback{callback}, _flush{flush} {}; - - void set_callback(LogCallback callback) { _callback = callback; } - void set_flush(void (*flush)()) { _flush = flush; } - - protected: - void sink_it_(const details::log_msg& msg) override - { - spdlog::memory_buf_t formatted; - base_sink::formatter_->format(msg, formatted); - std::string msg_string = fmt::to_string(formatted); - - if (_callback) { - _callback(static_cast(msg.level), msg_string.c_str()); - } else { - std::cout << msg_string; - } - } - - void flush_() override - { - if (_flush) { - _flush(); - } else { - std::cout << std::flush; - } - } - - LogCallback _callback; - void (*_flush)(); -}; - -using callback_sink_mt = CallbackSink; -using callback_sink_st = CallbackSink; - -} // end namespace spdlog::sinks diff --git a/cpp/include/raft/core/detail/fail_container_policy.hpp b/cpp/include/raft/core/detail/fail_container_policy.hpp index cf9d0887dd..f5f1bfb377 100644 --- a/cpp/include/raft/core/detail/fail_container_policy.hpp +++ b/cpp/include/raft/core/detail/fail_container_policy.hpp @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include diff --git a/cpp/include/raft/core/detail/logger.hpp b/cpp/include/raft/core/detail/logger.hpp deleted file mode 100644 index f3f52b46ae..0000000000 --- a/cpp/include/raft/core/detail/logger.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#ifndef RAFT_HIDE_DEPRECATION_WARNINGS -#pragma message(__FILE__ \ - " is deprecated and will be removed in future releases." \ - " Please use the version instead.") -#endif - -#include diff --git a/cpp/include/raft/core/logger-ext.hpp b/cpp/include/raft/core/logger-ext.hpp deleted file mode 100644 index 73fe463aba..0000000000 --- a/cpp/include/raft/core/logger-ext.hpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include // RAFT_INLINE_CONDITIONAL - -#include // std::unique_ptr -#include // std::string -#include // std::unordered_map - -namespace raft { - -static const std::string RAFT_NAME = "raft"; -static const std::string default_log_pattern("[%L] [%H:%M:%S.%f] %v"); - -namespace detail { -RAFT_INLINE_CONDITIONAL std::string format(const char* fmt, ...); -} -/** - * @brief The main Logging class for raft library. - * - * This class acts as a thin wrapper over the underlying `spdlog` interface. The - * design is done in this way in order to avoid us having to also ship `spdlog` - * header files in our installation. - * - * @todo This currently only supports logging to stdout. Need to add support in - * future to add custom loggers as well [Issue #2046] - */ -class logger { - public: - // @todo setting the logger once per process with - logger(std::string const& name_ = ""); - /** - * @brief Singleton method to get the underlying logger object - * - * @return the singleton logger object - */ - static logger& get(std::string const& name = ""); - - /** - * @brief Set the logging level. - * - * Only messages with level equal or above this will be printed - * - * @param[in] level logging level - * - * @note The log level will actually be set only if the input is within the - * range [RAFT_LEVEL_TRACE, RAFT_LEVEL_OFF]. If it is not, then it'll - * be ignored. See documentation of decisiontree for how this gets used - */ - void set_level(int level); - - /** - * @brief Set the logging pattern - * - * @param[in] pattern the pattern to be set. Refer this link - * https://github.com/gabime/spdlog/wiki/3.-Custom-formatting - * to know the right syntax of this pattern - */ - void set_pattern(const std::string& pattern); - - /** - * @brief Register a callback function to be run in place of usual log call - * - * @param[in] callback the function to be run on all logged messages - */ - void set_callback(void (*callback)(int lvl, const char* msg)); - - /** - * @brief Register a flush function compatible with the registered callback - * - * @param[in] flush the function to use when flushing logs - */ - void set_flush(void (*flush)()); - - /** - * @brief Tells whether messages will be logged for the given log level - * - * @param[in] level log level to be checked for - * @return true if messages will be logged for this level, else false - */ - bool should_log_for(int level) const; - /** - * @brief Query for the current log level - * - * @return the current log level - */ - int get_level() const; - - /** - * @brief Get the current logging pattern - * @return the pattern - */ - std::string get_pattern() const; - - /** - * @brief Main logging method - * - * @param[in] level logging level of this message - * @param[in] fmt C-like format string, followed by respective params - */ - void log(int level, const char* fmt, ...); - - /** - * @brief Flush logs by calling flush on underlying logger - */ - void flush(); - - ~logger(); - - private: - logger(); - // pimpl pattern: - // https://learn.microsoft.com/en-us/cpp/cpp/pimpl-for-compile-time-encapsulation-modern-cpp?view=msvc-170 - class impl; - std::unique_ptr pimpl; - static inline std::unordered_map> log_map; -}; // class logger - -/** - * @brief An object used for scoped log level setting - * - * Instances of `raft::log_level_setter` will set RAFT logging to the level - * indicated on construction and will revert to the previous set level on - * destruction. - */ -struct log_level_setter { - explicit log_level_setter(int level) - { - prev_level_ = logger::get(RAFT_NAME).get_level(); - logger::get(RAFT_NAME).set_level(level); - } - ~log_level_setter() { logger::get(RAFT_NAME).set_level(prev_level_); } - - private: - int prev_level_; -}; // class log_level_setter - -}; // namespace raft diff --git a/cpp/include/raft/core/logger-inl.hpp b/cpp/include/raft/core/logger-inl.hpp deleted file mode 100644 index ea5f4ea26e..0000000000 --- a/cpp/include/raft/core/logger-inl.hpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "logger-macros.hpp" - -#include - -#include -#include -#include -#include -#include -#include -// The logger-ext.hpp file contains the class declaration of the logger class. -// In this case, it is okay to include the logger-ext.hpp file because it -// contains no RAFT_EXPLICIT template instantiations. -#include "logger-ext.hpp" - -#define SPDLOG_HEADER_ONLY -#include -#include // RAFT_INLINE_CONDITIONAL - -#include // NOLINT -#include // NOLINT - -namespace raft { - -namespace detail { - -inline std::string format(const char* fmt, va_list& vl) -{ - va_list vl_copy; - va_copy(vl_copy, vl); - int length = std::vsnprintf(nullptr, 0, fmt, vl_copy); - assert(length >= 0); - std::vector buf(length + 1); - std::vsnprintf(buf.data(), length + 1, fmt, vl); - return std::string(buf.data()); -} - -RAFT_INLINE_CONDITIONAL std::string format(const char* fmt, ...) -{ - va_list vl; - va_start(vl, fmt); - std::string str = format(fmt, vl); - va_end(vl); - return str; -} - -inline int convert_level_to_spdlog(int level) -{ - level = std::max(RAFT_LEVEL_OFF, std::min(RAFT_LEVEL_TRACE, level)); - return RAFT_LEVEL_TRACE - level; -} - -} // namespace detail - -class logger::impl { // defined privately here - // ... all private data and functions: all of these - // can now change without recompiling callers ... - public: - std::shared_ptr sink; - std::shared_ptr spdlogger; - std::string cur_pattern; - int cur_level; - - impl(std::string const& name_ = "") - : sink{std::make_shared()}, - spdlogger{std::make_shared(name_, sink)}, - cur_pattern() - { - } -}; // class logger::impl - -RAFT_INLINE_CONDITIONAL logger::logger(std::string const& name_) : pimpl(new impl(name_)) -{ - set_pattern(default_log_pattern); - set_level(RAFT_ACTIVE_LEVEL); -} - -RAFT_INLINE_CONDITIONAL logger& logger::get(std::string const& name) -{ - if (log_map.find(name) == log_map.end()) { log_map[name] = std::make_shared(name); } - return *log_map[name]; -} - -RAFT_INLINE_CONDITIONAL void logger::set_level(int level) -{ - level = raft::detail::convert_level_to_spdlog(level); - pimpl->spdlogger->set_level(static_cast(level)); -} - -RAFT_INLINE_CONDITIONAL void logger::set_pattern(const std::string& pattern) -{ - pimpl->cur_pattern = pattern; - pimpl->spdlogger->set_pattern(pattern); -} - -RAFT_INLINE_CONDITIONAL void logger::set_callback(void (*callback)(int lvl, const char* msg)) -{ - pimpl->sink->set_callback(callback); -} - -RAFT_INLINE_CONDITIONAL void logger::set_flush(void (*flush)()) { pimpl->sink->set_flush(flush); } - -RAFT_INLINE_CONDITIONAL bool logger::should_log_for(int level) const -{ - level = raft::detail::convert_level_to_spdlog(level); - auto level_e = static_cast(level); - return pimpl->spdlogger->should_log(level_e); -} - -RAFT_INLINE_CONDITIONAL int logger::get_level() const -{ - auto level_e = pimpl->spdlogger->level(); - return RAFT_LEVEL_TRACE - static_cast(level_e); -} - -RAFT_INLINE_CONDITIONAL std::string logger::get_pattern() const { return pimpl->cur_pattern; } - -RAFT_INLINE_CONDITIONAL void logger::log(int level, const char* fmt, ...) -{ - level = raft::detail::convert_level_to_spdlog(level); - auto level_e = static_cast(level); - // explicit check to make sure that we only expand messages when required - if (pimpl->spdlogger->should_log(level_e)) { - va_list vl; - va_start(vl, fmt); - auto msg = raft::detail::format(fmt, vl); - va_end(vl); - pimpl->spdlogger->log(level_e, msg); - } -} - -RAFT_INLINE_CONDITIONAL void logger::flush() { pimpl->spdlogger->flush(); } - -RAFT_INLINE_CONDITIONAL logger::~logger() {} - -}; // namespace raft diff --git a/cpp/include/raft/core/logger-macros.hpp b/cpp/include/raft/core/logger-macros.hpp index 5ddb072067..e32440dcce 100644 --- a/cpp/include/raft/core/logger-macros.hpp +++ b/cpp/include/raft/core/logger-macros.hpp @@ -15,92 +15,17 @@ */ #pragma once -/** - * @defgroup logging levels used in raft - * - * @note exactly match the corresponding ones (but reverse in terms of value) - * in spdlog for wrapping purposes - * - * @{ - */ -#define RAFT_LEVEL_TRACE 6 -#define RAFT_LEVEL_DEBUG 5 -#define RAFT_LEVEL_INFO 4 -#define RAFT_LEVEL_WARN 3 -#define RAFT_LEVEL_ERROR 2 -#define RAFT_LEVEL_CRITICAL 1 -#define RAFT_LEVEL_OFF 0 -/** @} */ - -#if !defined(RAFT_ACTIVE_LEVEL) -#define RAFT_ACTIVE_LEVEL RAFT_LEVEL_INFO -#endif - -/** - * @defgroup loggerMacros Helper macros for dealing with logging - * @{ - */ -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_TRACE) -#define RAFT_LOG_TRACE(fmt, ...) \ - do { \ - std::stringstream ss; \ - ss << raft::detail::format("%s:%d ", __FILE__, __LINE__); \ - ss << raft::detail::format(fmt, ##__VA_ARGS__); \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \ - } while (0) -#else -#define RAFT_LOG_TRACE(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_TRACE) -#define RAFT_LOG_TRACE_VEC(ptr, len) \ - do { \ - std::stringstream ss; \ - ss << raft::detail::format("%s:%d ", __FILE__, __LINE__); \ - print_vector(#ptr, ptr, len, ss); \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \ +#include + +#if (RAFT_LOG_ACTIVE_LEVEL <= RAFT_LOG_LEVEL_TRACE) +#define RAFT_LOG_TRACE_VEC(ptr, len) \ + do { \ + std::stringstream ss; \ + ss << raft::detail::format("%s:%d ", __FILE__, __LINE__); \ + print_vector(#ptr, ptr, len, ss); \ + raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \ + RAFT_LOGGER_CALL(raft::default_logger(), raft::level_enum::trace, __VA_ARGS__) \ } while (0) #else #define RAFT_LOG_TRACE_VEC(ptr, len) void(0) #endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_DEBUG) -#define RAFT_LOG_DEBUG(fmt, ...) \ - do { \ - std::stringstream ss; \ - ss << raft::detail::format("%s:%d ", __FILE__, __LINE__); \ - ss << raft::detail::format(fmt, ##__VA_ARGS__); \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_DEBUG, ss.str().c_str()); \ - } while (0) -#else -#define RAFT_LOG_DEBUG(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_INFO) -#define RAFT_LOG_INFO(fmt, ...) \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_INFO, fmt, ##__VA_ARGS__) -#else -#define RAFT_LOG_INFO(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_WARN) -#define RAFT_LOG_WARN(fmt, ...) \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_WARN, fmt, ##__VA_ARGS__) -#else -#define RAFT_LOG_WARN(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_ERROR) -#define RAFT_LOG_ERROR(fmt, ...) \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_ERROR, fmt, ##__VA_ARGS__) -#else -#define RAFT_LOG_ERROR(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_CRITICAL) -#define RAFT_LOG_CRITICAL(fmt, ...) \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_CRITICAL, fmt, ##__VA_ARGS__) -#else -#define RAFT_LOG_CRITICAL(fmt, ...) void(0) -#endif -/** @} */ diff --git a/cpp/include/raft/core/logger.hpp b/cpp/include/raft/core/logger.hpp deleted file mode 100644 index e64a0db257..0000000000 --- a/cpp/include/raft/core/logger.hpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "logger-ext.hpp" -#include "logger-macros.hpp" - -#if !defined(RAFT_COMPILED) -#include "logger-inl.hpp" -#endif diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh index 55184cc615..0e00ef571f 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh index 388dd60f14..44d55c36de 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh @@ -16,7 +16,8 @@ #pragma once -#include // RAFT_LOG_TRACE +#include +#include #include #include // raft::resources #include // is_min_close, DistanceType diff --git a/cpp/include/raft/solver/detail/lap_kernels.cuh b/cpp/include/raft/solver/detail/lap_kernels.cuh index 383c3ab713..3c25852240 100644 --- a/cpp/include/raft/solver/detail/lap_kernels.cuh +++ b/cpp/include/raft/solver/detail/lap_kernels.cuh @@ -26,6 +26,7 @@ #include "../linear_assignment_types.hpp" +#include #include #include @@ -552,4 +553,4 @@ RAFT_KERNEL kernel_calcObjValPrimal(weight_t* d_obj_val_primal, } } -} // namespace raft::solver::detail \ No newline at end of file +} // namespace raft::solver::detail diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 02a77a0d99..6f03f77bc0 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/core/logger.cpp b/cpp/src/core/logger.cpp deleted file mode 100644 index 8f81cf2926..0000000000 --- a/cpp/src/core/logger.cpp +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 621ee6c160..4cd0a32f51 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -55,6 +55,7 @@ function(ConfigureTest) ${RAFT_CTK_MATH_DEPENDENCIES} $ $ + raft_test_logger ) set_target_properties( ${TEST_NAME} @@ -87,6 +88,10 @@ function(ConfigureTest) ) endfunction() +# Create an object library for the logger so that we don't have to recompile it. +add_library(raft_test_logger OBJECT) +target_link_libraries(raft_test_logger PRIVATE raft_logger_impl) + # ################################################################################################## # test sources ################################################################################## # ################################################################################################## diff --git a/cpp/test/core/device_resources_manager.cpp b/cpp/test/core/device_resources_manager.cpp index c63d5896e5..007b57378f 100644 --- a/cpp/test/core/device_resources_manager.cpp +++ b/cpp/test/core/device_resources_manager.cpp @@ -89,7 +89,7 @@ TEST(DeviceResourcesManager, ObeysSetters) // Suppress the many warnings from testing use of setters after initial // get_device_resources call - auto scoped_log_level = log_level_setter{RAFT_LEVEL_ERROR}; + auto scoped_log_level = log_level_setter{level_enum::error}; omp_set_dynamic(0); #pragma omp parallel for num_threads(5) diff --git a/cpp/test/core/logger.cpp b/cpp/test/core/logger.cpp index 7f31beed71..10adb71dda 100644 --- a/cpp/test/core/logger.cpp +++ b/cpp/test/core/logger.cpp @@ -14,10 +14,10 @@ * limitations under the License. */ -// We set RAFT_ACTIVE_LEVEL to a value that would enable testing trace and debug logs +// We set RAFT_LOG_ACTIVE_LEVEL to a value that would enable testing trace and debug logs // (otherwise trace and debug logs are desabled by default). -#undef RAFT_ACTIVE_LEVEL -#define RAFT_ACTIVE_LEVEL 6 +#undef RAFT_LOG_ACTIVE_LEVEL +#define RAFT_LOG_ACTIVE_LEVEL RAFT_LOG_LEVEL_TRACE #include @@ -34,15 +34,15 @@ TEST(logger, Test) RAFT_LOG_WARN("This is a warning message"); RAFT_LOG_INFO("This is an info message"); - logger::get(RAFT_NAME).set_level(RAFT_LEVEL_WARN); - ASSERT_EQ(RAFT_LEVEL_WARN, logger::get(RAFT_NAME).get_level()); - logger::get(RAFT_NAME).set_level(RAFT_LEVEL_INFO); - ASSERT_EQ(RAFT_LEVEL_INFO, logger::get(RAFT_NAME).get_level()); + default_logger().set_level(raft::level_enum::warn); + ASSERT_EQ(raft::level_enum::warn, default_logger().level()); + default_logger().set_level(raft::level_enum::info); + ASSERT_EQ(raft::level_enum::info, default_logger().level()); - ASSERT_FALSE(logger::get(RAFT_NAME).should_log_for(RAFT_LEVEL_TRACE)); - ASSERT_FALSE(logger::get(RAFT_NAME).should_log_for(RAFT_LEVEL_DEBUG)); - ASSERT_TRUE(logger::get(RAFT_NAME).should_log_for(RAFT_LEVEL_INFO)); - ASSERT_TRUE(logger::get(RAFT_NAME).should_log_for(RAFT_LEVEL_WARN)); + ASSERT_FALSE(default_logger().should_log(raft::level_enum::trace)); + ASSERT_FALSE(default_logger().should_log(raft::level_enum::debug)); + ASSERT_TRUE(default_logger().should_log(raft::level_enum::info)); + ASSERT_TRUE(default_logger().should_log(raft::level_enum::warn)); } std::string logged = ""; @@ -57,60 +57,61 @@ class loggerTest : public ::testing::Test { { flushCount = 0; logged = ""; - logger::get(RAFT_NAME).set_level(RAFT_LEVEL_TRACE); + default_logger().set_level(raft::level_enum::trace); } void TearDown() override { - logger::get(RAFT_NAME).set_callback(nullptr); - logger::get(RAFT_NAME).set_flush(nullptr); - logger::get(RAFT_NAME).set_level(RAFT_LEVEL_INFO); + default_logger().sinks().pop_back(); + default_logger().set_level(raft::level_enum::info); } }; -// The logging macros depend on `RAFT_ACTIVE_LEVEL` as well as the logger verbosity; -// The verbosity is set to `RAFT_LEVEL_TRACE`, but `RAFT_ACTIVE_LEVEL` is set outside of here. -auto check_if_logged(const std::string& msg, int log_level_def) -> bool +// The logging macros depend on `RAFT_LOG_ACTIVE_LEVEL` as well as the logger verbosity; +// The verbosity is set to `RAFT_LOG_LEVEL_TRACE`, but `RAFT_LOG_ACTIVE_LEVEL` is set outside of +// here. +auto check_if_logged(const std::string& msg, raft::level_enum log_level_def) -> bool { bool actually_logged = logged.find(msg) != std::string::npos; - bool should_be_logged = RAFT_ACTIVE_LEVEL >= log_level_def; + bool should_be_logged = RAFT_LOG_ACTIVE_LEVEL <= static_cast(log_level_def); return actually_logged == should_be_logged; } TEST_F(loggerTest, callback) { std::string testMsg; - logger::get(RAFT_NAME).set_callback(exampleCallback); + default_logger().sinks().push_back(std::make_shared(exampleCallback)); testMsg = "This is a critical message"; RAFT_LOG_CRITICAL(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_CRITICAL)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::critical)); testMsg = "This is an error message"; RAFT_LOG_ERROR(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_ERROR)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::error)); testMsg = "This is a warning message"; RAFT_LOG_WARN(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_WARN)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::warn)); testMsg = "This is an info message"; RAFT_LOG_INFO(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_INFO)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::info)); testMsg = "This is a debug message"; RAFT_LOG_DEBUG(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_DEBUG)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::debug)); testMsg = "This is a trace message"; RAFT_LOG_TRACE(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_TRACE)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::trace)); } TEST_F(loggerTest, flush) { - logger::get(RAFT_NAME).set_flush(exampleFlush); - logger::get(RAFT_NAME).flush(); + default_logger().sinks().push_back( + std::make_shared(exampleCallback, exampleFlush)); + default_logger().flush(); ASSERT_EQ(1, flushCount); } diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index 5cc694dc8f..6240b2638b 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -256,14 +256,14 @@ There are 7 logging levels with each successive level becoming quieter: 7. RAFT_LEVEL_OFF Pass one of these as per your needs into the `set_level()` method as follows: ```cpp -raft::logger::get().set_level(RAFT_LEVEL_WARN); +raft::default_logger().set_level(RAFT_LEVEL_WARN); // From now onwards, this will print only WARN and above kind of messages ``` ### Changing logging pattern Pass the [format string](https://github.com/gabime/spdlog/wiki/3.-Custom-formatting) as follows in order use a different logging pattern than the default. ```cpp -raft::logger::get.set_pattern(YourFavoriteFormat); +raft::default_logger().set_pattern(YourFavoriteFormat); ``` One can also use the corresponding `get_pattern()` method to know the current format as well. From 26f8d06c6699e50fa085763fc8f0d7a6d02c5ceb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 3 Jan 2025 11:29:49 -0800 Subject: [PATCH 15/37] Use rapids-cmake for the logger (#2534) This PR switches raft to use rapids-cmake to fetch rapids-logger so that it uses a consistent version with the rest of RAPIDS to avoid any cases where transitive CPM loads result in multiple packages being built from source that require a different version of rapids-logger. Depends on https://github.com/rapidsai/rapids-cmake/pull/737 and https://github.com/rapidsai/rmm/pull/1776. Contributes to rapidsai/build-planning#104. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/raft/pull/2534 --- cpp/CMakeLists.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 06531941aa..621f9fcef2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -163,11 +163,8 @@ include(cmake/modules/ConfigureCUDA.cmake) # add third party dependencies using CPM rapids_cpm_init() -# Not using rapids-cmake since we never want to find, always download. -CPMAddPackage( - NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW FALSE GIT_TAG - 4df3ee70c6746fd1b6c0dc14209dae2e2d4378c6 VERSION 4df3ee70c6746fd1b6c0dc14209dae2e2d4378c6 -) +include(${rapids-cmake-dir}/cpm/rapids_logger.cmake) +rapids_cpm_rapids_logger() rapids_make_logger(raft LOGGER_HEADER_DIR include/raft/core EXPORT_SET raft-exports) # CCCL before rmm/cuco so we get the right version of CCCL From 8fc988e11d82404ef7b52f4c810d4a4ed07cd2a2 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 7 Jan 2025 15:49:56 -0600 Subject: [PATCH 16/37] remove unused 'joblib' and 'numba' dependencies, other packaging cleanup (#2532) Proposes some cleanup of packaging details, noticed while I was working on #2531 * removes runtime dependencies on `joblib` and `numba` for `raft-dask` - *`raft-dask` doesn't directly import from these libraries, and the git blame didn't suggest any other reason that they were being pinned here* - *checked with `git grep -E 'joblib|numba'` * removes `setup.cfg` files - *these are currently being ignored by tools, in favor of identical configuration in `pyproject.toml` and `.flake8` files* - e.g. https://github.com/rapidsai/raft/blob/bfd190687ee396374b7106d9ac26add73b57b22a/.pre-commit-config.yaml#L16-L19 * packages license files in conda packages - *think these were just missed in the round of PRs like this: https://github.com/rapidsai/cuml/pull/6061* * removes some outdated / inaccurate comments in packaging configs Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2532 --- .pre-commit-config.yaml | 3 +- .../all_cuda-118_arch-aarch64.yaml | 2 - .../all_cuda-118_arch-x86_64.yaml | 2 - .../all_cuda-125_arch-aarch64.yaml | 2 - .../all_cuda-125_arch-x86_64.yaml | 2 - conda/recipes/pylibraft/meta.yaml | 4 +- conda/recipes/raft-dask/meta.yaml | 5 +- dependencies.yaml | 4 -- pyproject.toml | 2 +- python/pylibraft/setup.cfg | 38 ------------- python/raft-dask/pyproject.toml | 2 - setup.cfg | 55 ------------------- 12 files changed, 4 insertions(+), 117 deletions(-) delete mode 100644 python/pylibraft/setup.cfg delete mode 100644 setup.cfg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e3b3c8c440..d5456ba30b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -110,8 +110,7 @@ repos: [.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx)$| CMakeLists[.]txt$| CMakeLists_standalone[.]txt$| - meta[.]yaml$| - setup[.]cfg$ + meta[.]yaml$ exclude: | (?x) cpp/include/raft/neighbors/detail/faiss_select/| diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index e145aeb92e..793ca8dc67 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -26,7 +26,6 @@ dependencies: - gcc_linux-aarch64=11.* - graphviz - ipython -- joblib>=0.11 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -38,7 +37,6 @@ dependencies: - libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja -- numba>=0.57 - numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-aarch64=11.8 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 75dcffa95d..a9f839bd03 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -26,7 +26,6 @@ dependencies: - gcc_linux-64=11.* - graphviz - ipython -- joblib>=0.11 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -38,7 +37,6 @@ dependencies: - libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja -- numba>=0.57 - numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-64=11.8 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index bfa32c80d1..9d7286bb8e 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -27,7 +27,6 @@ dependencies: - gcc_linux-aarch64=11.* - graphviz - ipython -- joblib>=0.11 - libcublas-dev - libcurand-dev - libcusolver-dev @@ -35,7 +34,6 @@ dependencies: - libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja -- numba>=0.57 - numpy>=1.23,<3.0a0 - numpydoc - pre-commit diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 98ec334635..e4ec074ae5 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -27,7 +27,6 @@ dependencies: - gcc_linux-64=11.* - graphviz - ipython -- joblib>=0.11 - libcublas-dev - libcurand-dev - libcusolver-dev @@ -35,7 +34,6 @@ dependencies: - libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja -- numba>=0.57 - numpy>=1.23,<3.0a0 - numpydoc - pre-commit diff --git a/conda/recipes/pylibraft/meta.yaml b/conda/recipes/pylibraft/meta.yaml index 4a8ed29c85..0b57432402 100644 --- a/conda/recipes/pylibraft/meta.yaml +++ b/conda/recipes/pylibraft/meta.yaml @@ -1,7 +1,5 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Usage: -# conda build . -c conda-forge -c numba -c rapidsai -c pytorch {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version = environ['CONDA_PY'] %} @@ -81,5 +79,5 @@ tests: about: home: https://rapids.ai/ license: Apache-2.0 - # license_file: LICENSE + license_file: LICENSE summary: pylibraft library diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index a8be273f82..19155166af 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -1,7 +1,5 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Usage: -# conda build . -c conda-forge -c numba -c rapidsai -c pytorch {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version = environ['CONDA_PY'] %} @@ -70,7 +68,6 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - dask-cuda ={{ minor_version }} - rapids-dask-dependency ={{ minor_version }} - - joblib >=0.11 - nccl {{ nccl_version }} - pylibraft {{ version }} - python x.x @@ -87,5 +84,5 @@ tests: about: home: https://rapids.ai/ license: Apache-2.0 - # license_file: LICENSE + license_file: LICENSE summary: raft-dask library diff --git a/dependencies.yaml b/dependencies.yaml index dc1807fbf9..689cf8414c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -392,8 +392,6 @@ dependencies: - output_types: [conda, pyproject] packages: - dask-cuda==25.2.*,>=0.0.0a0 - - joblib>=0.11 - - numba>=0.57 - rapids-dask-dependency==25.2.*,>=0.0.0a0 - output_types: conda packages: @@ -402,7 +400,6 @@ dependencies: - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for cudf and rmm. - --extra-index-url=https://pypi.nvidia.com - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: @@ -482,7 +479,6 @@ dependencies: - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for rmm-cu{11,12}. - --extra-index-url=https://pypi.nvidia.com - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: diff --git a/pyproject.toml b/pyproject.toml index 5042113388..2f23debfbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,6 @@ exclude = [ skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild" # ignore short words, and typename parameters like OffsetT ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" -ignore-words-list = "inout,numer" +ignore-words-list = "inout,unparseable,numer" builtin = "clear" quiet-level = 3 diff --git a/python/pylibraft/setup.cfg b/python/pylibraft/setup.cfg deleted file mode 100644 index 7d1a0c9065..0000000000 --- a/python/pylibraft/setup.cfg +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. - -[isort] -line_length=79 -multi_line_output=3 -include_trailing_comma=True -force_grid_wrap=0 -combine_as_imports=True -order_by_type=True -known_dask= - dask - distributed - dask_cuda -known_rapids= - nvtext - cudf - cuml - cugraph - dask_cudf - rmm -known_first_party= - raft - pylibraft -default_section=THIRDPARTY -sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER -skip= - thirdparty - .eggs - .git - .hg - .mypy_cache - .tox - .venv - _build - buck-out - build - dist - __init__.py diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index 33643c481e..cabe8e72a6 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -33,8 +33,6 @@ requires-python = ">=3.10" dependencies = [ "dask-cuda==25.2.*,>=0.0.0a0", "distributed-ucxx==0.42.*,>=0.0.0a0", - "joblib>=0.11", - "numba>=0.57", "pylibraft==25.2.*,>=0.0.0a0", "rapids-dask-dependency==25.2.*,>=0.0.0a0", "ucx-py==0.42.*,>=0.0.0a0", diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 94140d4d00..0000000000 --- a/setup.cfg +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. - -[flake8] -filename = *.py, *.pyx, *.pxd, *.pxi -exclude = __init__.py, *.egg, build, docs, .git -force-check = True -ignore = - # line break before binary operator - W503, - # whitespace before : - E203 -per-file-ignores = - # Rules ignored only in Cython: - # E211: whitespace before '(' (used in multi-line imports) - # E225: Missing whitespace around operators (breaks cython casting syntax like ) - # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) - # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) - # E275: Missing whitespace after keyword (Doesn't work with Cython except?) - # E402: invalid syntax (works for Python, not Cython) - # E999: invalid syntax (works for Python, not Cython) - # W504: line break after binary operator (breaks lines that end with a pointer) - *.pyx: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxd: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxi: E211, E225, E226, E227, E275, E402, E999, W504 - -[pydocstyle] -# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather -# than include using match-dir. Note that as discussed in -# https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle, -# unlike the match option above this match-dir will have no effect when -# pydocstyle is invoked from pre-commit. Therefore this exclusion list must -# also be maintained in the pre-commit config file. -match-dir = ^(?!(ci|cpp|conda|docs)).*$ -# Allow missing docstrings for docutils -ignore-decorators = .*(docutils|doc_apply|copy_docstring).* -select = - D201, D204, D206, D207, D208, D209, D210, D211, D214, D215, D300, D301, D302, D403, D405, D406, D407, D408, D409, D410, D411, D412, D414, D418 - # Would like to enable the following rules in the future: - # D200, D202, D205, D400 - -[mypy] -ignore_missing_imports = True -# If we don't specify this, then mypy will check excluded files if -# they are imported by a checked file. -follow_imports = skip - -[codespell] -# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - -# this is only to allow you to run codespell interactively -skip = ./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild -# ignore short words, and typename parameters like OffsetT -ignore-regex = \b(.{1,4}|[A-Z]\w*T)\b -ignore-words-list = inout,unparseable,numer -builtin = clear -quiet-level = 3 From 1b62c4117a35b11ce3c830daae248e32ebf75e3f Mon Sep 17 00:00:00 2001 From: Victor Lafargue Date: Fri, 10 Jan 2025 22:29:03 +0100 Subject: [PATCH 17/37] Fix lanczos solver integer overflow (#2536) Partially answers https://github.com/rapidsai/cuml/issues/6204 Authors: - Victor Lafargue (https://github.com/viclafargue) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/raft/pull/2536 --- cpp/include/raft/sparse/detail/coo.cuh | 14 ++++++++----- .../raft/sparse/solver/detail/lanczos.cuh | 21 +++++++++++-------- .../raft/spectral/detail/matrix_wrappers.hpp | 8 +++---- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/cpp/include/raft/sparse/detail/coo.cuh b/cpp/include/raft/sparse/detail/coo.cuh index 91ba363168..9a38c11a07 100644 --- a/cpp/include/raft/sparse/detail/coo.cuh +++ b/cpp/include/raft/sparse/detail/coo.cuh @@ -182,7 +182,7 @@ class COO { * @param n_rows: number of rows in the dense matrix * @param n_cols: number of columns in the dense matrix */ - void setSize(int n_rows, int n_cols) + void setSize(Index_Type n_rows, Index_Type n_cols) { this->n_rows = n_rows; this->n_cols = n_cols; @@ -192,7 +192,7 @@ class COO { * @brief Set the number of rows and cols for a square dense matrix * @param n: number of rows and cols */ - void setSize(int n) + void setSize(Index_Type n) { this->n_rows = n; this->n_cols = n; @@ -204,7 +204,10 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - void allocate(int nnz, bool init, cudaStream_t stream) { this->allocate(nnz, 0, init, stream); } + void allocate(Index_Type nnz, bool init, cudaStream_t stream) + { + this->allocate(nnz, 0, init, stream); + } /** * @brief Allocate the underlying arrays @@ -213,7 +216,7 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - void allocate(int nnz, int size, bool init, cudaStream_t stream) + void allocate(Index_Type nnz, Index_Type size, bool init, cudaStream_t stream) { this->allocate(nnz, size, size, init, stream); } @@ -226,7 +229,8 @@ class COO { * @param init: should values be initialized to 0? * @param stream: stream to use for init */ - void allocate(int nnz, int n_rows, int n_cols, bool init, cudaStream_t stream) + void allocate( + Index_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) { this->n_rows = n_rows; this->n_cols = n_cols; diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 6f03f77bc0..ddfa01731a 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -624,7 +624,7 @@ static int lanczosRestart(raft::resources const& handle, value_type_t* shifts_host; // Orthonormal matrix for similarity transform - value_type_t* V_dev = work_dev + n * iter; + value_type_t* V_dev = work_dev + (size_t)n * (size_t)iter; // ------------------------------------------------------- // Implementation @@ -641,7 +641,7 @@ static int lanczosRestart(raft::resources const& handle, // std::cout < 0 && nEigVecs <= n, "Invalid number of eigenvectors."); + RAFT_EXPECTS(nEigVecs > 0 && (size_t)nEigVecs <= n, "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter."); @@ -1395,10 +1398,10 @@ int computeLargestEigenvectors( unsigned long long seed = 123456) { // Matrix dimension - index_type_t n = A.nrows_; + size_t n = A.nrows_; // Check that parameters are valid - RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors."); + RAFT_EXPECTS(nEigVecs > 0 && (size_t)nEigVecs <= n, "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter."); diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp index 1fe078bd32..db8a5dc9ef 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp @@ -39,14 +39,14 @@ // ========================================================= // Get index of matrix entry -#define IDX(i, j, lda) ((i) + (j) * (lda)) +#define IDX(i, j, lda) ((size_t)(i) + (j) * (lda)) namespace raft { namespace spectral { namespace matrix { namespace detail { -using size_type = int; // for now; TODO: move it in appropriate header +using size_type = size_t; // for now; TODO: move it in appropriate header // Apply diagonal matrix to vector: // @@ -326,7 +326,7 @@ struct laplacian_matrix_t : sparse_matrix_t { raft_handle, row_offsets, col_indices, values, nrows, nnz), diagonal_(raft_handle, nrows) { - vector_t ones{raft_handle, nrows}; + vector_t ones{raft_handle, (size_t)nrows}; ones.fill(1.0); sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); } @@ -341,7 +341,7 @@ struct laplacian_matrix_t : sparse_matrix_t { csr_m.nnz_), diagonal_(raft_handle, csr_m.nrows_) { - vector_t ones{raft_handle, csr_m.nrows_}; + vector_t ones{raft_handle, (size_t)csr_m.nrows_}; ones.fill(1.0); sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); } From 5c826d7320486852c30a18f6e039d0cda83c5c62 Mon Sep 17 00:00:00 2001 From: Micka Date: Tue, 14 Jan 2025 00:34:20 +0100 Subject: [PATCH 18/37] Add support for different data type of bitset (#2535) This PR is useful for Milvus. Previously the `bitset_view` object only supported the data type used to create the bitset. With the proposed modifications, a `bitset_view` object can support any data type used to create the bitset by specifying the `original_nbits` parameter in the class constructor. Authors: - Micka (https://github.com/lowener) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - rhdong (https://github.com/rhdong) URL: https://github.com/rapidsai/raft/pull/2535 --- cpp/include/raft/core/bitmap.hpp | 24 ++++++-- cpp/include/raft/core/bitset.cuh | 53 ++++++++++++++--- cpp/include/raft/core/bitset.hpp | 34 +++++++++-- cpp/test/core/bitset.cu | 98 ++++++++++++++++++++++++++++++-- 4 files changed, 188 insertions(+), 21 deletions(-) diff --git a/cpp/include/raft/core/bitmap.hpp b/cpp/include/raft/core/bitmap.hpp index 86b2d77478..5a6656f572 100644 --- a/cpp/include/raft/core/bitmap.hpp +++ b/cpp/include/raft/core/bitmap.hpp @@ -53,9 +53,18 @@ struct bitmap_view : public bitset_view { * @param bitmap_ptr Device raw pointer * @param rows Number of row in the matrix. * @param cols Number of col in the matrix. + * @param original_nbits Original number of bits used when the bitmap was created, to handle + * potential mismatches of data types. This is useful for using ANN indexes when a bitmap was + * originally created with a different data type than the ones currently supported in cuVS ANN + * indexes. */ - _RAFT_HOST_DEVICE bitmap_view(bitmap_t* bitmap_ptr, index_t rows, index_t cols) - : bitset_view(bitmap_ptr, rows * cols), rows_(rows), cols_(cols) + _RAFT_HOST_DEVICE bitmap_view(bitmap_t* bitmap_ptr, + index_t rows, + index_t cols, + index_t original_nbits = 0) + : bitset_view(bitmap_ptr, rows * cols, original_nbits), + rows_(rows), + cols_(cols) { } @@ -65,11 +74,18 @@ struct bitmap_view : public bitset_view { * @param bitmap_span Device vector view of the bitmap * @param rows Number of row in the matrix. * @param cols Number of col in the matrix. + * @param original_nbits Original number of bits used when the bitmap was created, to handle + * potential mismatches of data types. This is useful for using ANN indexes when a bitmap was + * originally created with a different data type than the ones currently supported in cuVS ANN + * indexes. */ _RAFT_HOST_DEVICE bitmap_view(raft::device_vector_view bitmap_span, index_t rows, - index_t cols) - : bitset_view(bitmap_span, rows * cols), rows_(rows), cols_(cols) + index_t cols, + index_t original_nbits = 0) + : bitset_view(bitmap_span, rows * cols, original_nbits), + rows_(rows), + cols_(cols) { } diff --git a/cpp/include/raft/core/bitset.cuh b/cpp/include/raft/core/bitset.cuh index d1bffdb81e..feaef1a172 100644 --- a/cpp/include/raft/core/bitset.cuh +++ b/cpp/include/raft/core/bitset.cuh @@ -32,12 +32,41 @@ namespace raft::core { +template +_RAFT_HOST_DEVICE void inline compute_original_nbits_position(const index_t original_nbits, + const index_t nbits, + const index_t sample_index, + index_t& new_bit_index, + index_t& new_bit_offset) +{ + const index_t original_bit_index = sample_index / original_nbits; + const index_t original_bit_offset = sample_index % original_nbits; + new_bit_index = original_bit_index * original_nbits / nbits; + new_bit_offset = 0; + if (original_nbits > nbits) { + new_bit_index += original_bit_offset / nbits; + new_bit_offset = original_bit_offset % nbits; + } else { + index_t ratio = nbits / original_nbits; + new_bit_offset += (original_bit_index % ratio) * original_nbits; + new_bit_offset += original_bit_offset % nbits; + } +} + template _RAFT_HOST_DEVICE inline bool bitset_view::test(const index_t sample_index) const { - const bitset_t bit_element = bitset_ptr_[sample_index / bitset_element_size]; - const index_t bit_index = sample_index % bitset_element_size; - const bool is_bit_set = (bit_element & (bitset_t{1} << bit_index)) != 0; + const index_t nbits = sizeof(bitset_t) * 8; + index_t bit_index = 0; + index_t bit_offset = 0; + if (original_nbits_ == 0 || nbits == original_nbits_) { + bit_index = sample_index / bitset_element_size; + bit_offset = sample_index % bitset_element_size; + } else { + compute_original_nbits_position(original_nbits_, nbits, sample_index, bit_index, bit_offset); + } + const bitset_t bit_element = bitset_ptr_[bit_index]; + const bool is_bit_set = (bit_element & (bitset_t{1} << bit_offset)) != 0; return is_bit_set; } @@ -51,14 +80,22 @@ template _RAFT_DEVICE void bitset_view::set(const index_t sample_index, bool set_value) const { - const index_t bit_element = sample_index / bitset_element_size; - const index_t bit_index = sample_index % bitset_element_size; - const bitset_t bitmask = bitset_t{1} << bit_index; + const index_t nbits = sizeof(bitset_t) * 8; + index_t bit_index = 0; + index_t bit_offset = 0; + + if (original_nbits_ == 0 || nbits == original_nbits_) { + bit_index = sample_index / bitset_element_size; + bit_offset = sample_index % bitset_element_size; + } else { + compute_original_nbits_position(original_nbits_, nbits, sample_index, bit_index, bit_offset); + } + const bitset_t bitmask = bitset_t{1} << bit_offset; if (set_value) { - atomicOr(bitset_ptr_ + bit_element, bitmask); + atomicOr(bitset_ptr_ + bit_index, bitmask); } else { const bitset_t bitmask2 = ~bitmask; - atomicAnd(bitset_ptr_ + bit_element, bitmask2); + atomicAnd(bitset_ptr_ + bit_index, bitmask2); } } diff --git a/cpp/include/raft/core/bitset.hpp b/cpp/include/raft/core/bitset.hpp index be828def87..e4bea2c0c5 100644 --- a/cpp/include/raft/core/bitset.hpp +++ b/cpp/include/raft/core/bitset.hpp @@ -42,8 +42,20 @@ template struct bitset_view { static constexpr index_t bitset_element_size = sizeof(bitset_t) * 8; - _RAFT_HOST_DEVICE bitset_view(bitset_t* bitset_ptr, index_t bitset_len) - : bitset_ptr_{bitset_ptr}, bitset_len_{bitset_len} + /** + * @brief Create a bitset view from a device pointer to the bitset. + * + * @param bitset_ptr Device pointer to the bitset + * @param bitset_len Number of bits in the bitset + * @param original_nbits Original number of bits used when the bitset was created, to handle + * potential mismatches of data types. This is useful for using ANN indexes when a bitset was + * originally created with a different data type than the ones currently supported in cuVS ANN + * indexes. + */ + _RAFT_HOST_DEVICE bitset_view(bitset_t* bitset_ptr, + index_t bitset_len, + index_t original_nbits = 0) + : bitset_ptr_{bitset_ptr}, bitset_len_{bitset_len}, original_nbits_{original_nbits} { } /** @@ -51,10 +63,17 @@ struct bitset_view { * * @param bitset_span Device vector view of the bitset * @param bitset_len Number of bits in the bitset + * @param original_nbits Original number of bits used when the bitset was created, to handle + * potential mismatches of data types. This is useful for using ANN indexes when a bitset was + * originally created with a different data type than the ones currently supported in cuVS ANN + * indexes. */ _RAFT_HOST_DEVICE bitset_view(raft::device_vector_view bitset_span, - index_t bitset_len) - : bitset_ptr_{bitset_span.data_handle()}, bitset_len_{bitset_len} + index_t bitset_len, + index_t original_nbits = 0) + : bitset_ptr_{bitset_span.data_handle()}, + bitset_len_{bitset_len}, + original_nbits_{original_nbits} { } /** @@ -180,9 +199,16 @@ struct bitset_view { return (bitset_len + bits_per_element - 1) / bits_per_element; } + /** + * @brief Get the original number of bits of the bitset. + */ + auto get_original_nbits() const -> index_t { return original_nbits_; } + void set_original_nbits(index_t original_nbits) { original_nbits_ = original_nbits; } + private: bitset_t* bitset_ptr_; index_t bitset_len_; + index_t original_nbits_; }; /** diff --git a/cpp/test/core/bitset.cu b/cpp/test/core/bitset.cu index ac601274c1..f094f60ded 100644 --- a/cpp/test/core/bitset.cu +++ b/cpp/test/core/bitset.cu @@ -24,6 +24,8 @@ #include #include +#include +#include #include namespace raft::core { @@ -73,6 +75,40 @@ void test_cpu_bitset(const std::vector& bitset, } } +template +void test_cpu_bitset_nbits(const bitset_t* bitset, + const std::vector& queries, + std::vector& result, + unsigned original_nbits_) +{ + constexpr size_t nbits = sizeof(bitset_t) * 8; + if (original_nbits_ == nbits) { + for (size_t i = 0; i < queries.size(); i++) { + result[i] = + uint8_t((bitset[queries[i] / nbits] & (bitset_t{1} << (queries[i] % nbits))) != 0); + } + } + for (size_t i = 0; i < queries.size(); i++) { + const index_t sample_index = queries[i]; + const index_t original_bit_index = sample_index / original_nbits_; + const index_t original_bit_offset = sample_index % original_nbits_; + index_t new_bit_index = original_bit_index * original_nbits_ / nbits; + index_t new_bit_offset = 0; + if (original_nbits_ > nbits) { + new_bit_index += original_bit_offset / nbits; + new_bit_offset = original_bit_offset % nbits; + } else { + index_t ratio = nbits / original_nbits_; + new_bit_offset += (original_bit_index % ratio) * original_nbits_; + new_bit_offset += original_bit_offset % nbits; + } + const bitset_t bit_element = bitset[new_bit_index]; + const bool is_bit_set = (bit_element & (bitset_t{1} << new_bit_offset)) != 0; + + result[i] = uint8_t(is_bit_set); + } +} + template void flip_cpu_bitset(std::vector& bitset) { @@ -168,11 +204,12 @@ class BitsetTest : public testing::TestWithParam { resource::sync_stream(res, stream); ASSERT_TRUE(hostVecMatch(bitset_ref, bitset_result, raft::Compare())); - auto query_device = raft::make_device_vector(res, spec.query_len); - auto result_device = raft::make_device_vector(res, spec.query_len); - auto query_cpu = std::vector(spec.query_len); - auto result_cpu = std::vector(spec.query_len); - auto result_ref = std::vector(spec.query_len); + auto query_device = raft::make_device_vector(res, spec.query_len); + auto result_device = raft::make_device_vector(res, spec.query_len); + auto query_cpu = std::vector(spec.query_len); + auto result_cpu = std::vector(spec.query_len); + auto result_ref_nbits = std::vector(spec.query_len); + auto result_ref = std::vector(spec.query_len); // Create queries and verify the test results raft::random::uniformInt(res, rng, query_device.view(), index_t(0), index_t(spec.bitset_len)); @@ -194,6 +231,57 @@ class BitsetTest : public testing::TestWithParam { resource::sync_stream(res, stream); ASSERT_TRUE(hostVecMatch(bitset_ref, bitset_result, raft::Compare())); + // Reinterpret the bitset as uint8_t, uint32 then uint64_t + { + // Test CPU logic + test_cpu_bitset(bitset_ref, query_cpu, result_ref); + uint8_t* bitset_cpu_uint8 = (uint8_t*)std::malloc(sizeof(bitset_t) * bitset_ref.size()); + std::memcpy(bitset_cpu_uint8, bitset_ref.data(), sizeof(bitset_t) * bitset_ref.size()); + test_cpu_bitset_nbits(bitset_cpu_uint8, query_cpu, result_ref_nbits, sizeof(bitset_t) * 8); + ASSERT_TRUE(hostVecMatch(result_ref, result_ref_nbits, raft::Compare())); + std::free(bitset_cpu_uint8); + + // Test GPU uint8_t, uint32_t, uint64_t + auto my_bitset_view_uint8_t = raft::core::bitset_view( + reinterpret_cast(my_bitset.data()), my_bitset.size(), sizeof(bitset_t) * 8); + raft::linalg::map( + res, + result_device.view(), + [my_bitset_view_uint8_t] __device__(index_t query) { + return my_bitset_view_uint8_t.test(query); + }, + raft::make_const_mdspan(query_device.view())); + update_host(result_cpu.data(), result_device.data_handle(), result_device.extent(0), stream); + resource::sync_stream(res, stream); + ASSERT_TRUE(hostVecMatch(result_ref, result_cpu, Compare())); + + auto my_bitset_view_uint32_t = raft::core::bitset_view( + reinterpret_cast(my_bitset.data()), my_bitset.size(), sizeof(bitset_t) * 8); + raft::linalg::map( + res, + result_device.view(), + [my_bitset_view_uint32_t] __device__(index_t query) { + return my_bitset_view_uint32_t.test(query); + }, + raft::make_const_mdspan(query_device.view())); + update_host(result_cpu.data(), result_device.data_handle(), result_device.extent(0), stream); + resource::sync_stream(res, stream); + ASSERT_TRUE(hostVecMatch(result_ref, result_cpu, Compare())); + + auto my_bitset_view_uint64_t = raft::core::bitset_view( + reinterpret_cast(my_bitset.data()), my_bitset.size(), sizeof(bitset_t) * 8); + raft::linalg::map( + res, + result_device.view(), + [my_bitset_view_uint64_t] __device__(index_t query) { + return my_bitset_view_uint64_t.test(query); + }, + raft::make_const_mdspan(query_device.view())); + update_host(result_cpu.data(), result_device.data_handle(), result_device.extent(0), stream); + resource::sync_stream(res, stream); + ASSERT_TRUE(hostVecMatch(result_ref, result_cpu, Compare())); + } + // test sparsity, repeat and eval_n_elements { auto my_bitset_view = my_bitset.view(); From 32e918b37809d132fb71faefa7e06b7eea43565d Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 15 Jan 2025 07:38:33 -0800 Subject: [PATCH 19/37] Add missing `#include ` (#2540) This is needed to define `uint64_t` later on. Authors: - https://github.com/jakirkham Approvers: - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/raft/pull/2540 --- cpp/include/raft/util/integer_utils.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/raft/util/integer_utils.hpp b/cpp/include/raft/util/integer_utils.hpp index 5224d5ac4c..7ea5a3d212 100644 --- a/cpp/include/raft/util/integer_utils.hpp +++ b/cpp/include/raft/util/integer_utils.hpp @@ -25,6 +25,7 @@ #include +#include #include #include #include From a7f191639575f5bfcccf48d670ed33a34860f763 Mon Sep 17 00:00:00 2001 From: rhdong Date: Wed, 15 Jan 2025 08:04:55 -0800 Subject: [PATCH 20/37] [Feat] Support `bitset_to_csr` (#2523) This API, `bitset_to_csr,` will be utilized to implement the `bitset'- based filter for prefiltered Brute Force. Authors: - rhdong (https://github.com/rhdong) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/raft/pull/2523 --- cpp/bench/prims/linalg/masked_matmul.cu | 111 ++++--- cpp/bench/prims/sparse/bitset_to_csr.cu | 178 +++++++++++ cpp/include/raft/core/bitmap.cuh | 8 + cpp/include/raft/core/bitmap.hpp | 20 ++ cpp/include/raft/core/bitset.cuh | 8 + cpp/include/raft/core/bitset.hpp | 63 ++++ cpp/include/raft/sparse/convert/csr.cuh | 78 ++++- .../sparse/convert/detail/bitmap_to_csr.cuh | 12 +- .../sparse/convert/detail/bitset_to_csr.cuh | 184 +++++++++++ .../sparse/linalg/detail/masked_matmul.cuh | 66 +++- .../raft/sparse/linalg/masked_matmul.cuh | 117 +++++++ .../raft/sparse/linalg/masked_matmul.hpp | 61 +--- cpp/test/sparse/convert_csr.cu | 289 +++++++++++++++++- cpp/test/sparse/masked_matmul.cu | 130 ++++++-- 14 files changed, 1199 insertions(+), 126 deletions(-) create mode 100644 cpp/bench/prims/sparse/bitset_to_csr.cu create mode 100644 cpp/include/raft/sparse/convert/detail/bitset_to_csr.cuh create mode 100644 cpp/include/raft/sparse/linalg/masked_matmul.cuh diff --git a/cpp/bench/prims/linalg/masked_matmul.cu b/cpp/bench/prims/linalg/masked_matmul.cu index eda9cb1710..b96e14a25d 100644 --- a/cpp/bench/prims/linalg/masked_matmul.cu +++ b/cpp/bench/prims/linalg/masked_matmul.cu @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -49,11 +49,14 @@ inline auto operator<<(std::ostream& os, const MaskedMatmulBenchParams& { os << " m*k*n=" << params.m << "*" << params.k << "*" << params.n << "\tsparsity=" << params.sparsity; - if (params.sparsity == 1.0) { os << "<-inner product for comparison"; } + if (params.sparsity == 0.0) { os << "<-inner product for comparison"; } return os; } -template +template struct MaskedMatmulBench : public fixture { MaskedMatmulBench(const MaskedMatmulBenchParams& p) : fixture(true), @@ -64,15 +67,15 @@ struct MaskedMatmulBench : public fixture { c_indptr_d(0, stream), c_indices_d(0, stream), c_data_d(0, stream), - bitmap_d(0, stream), + bits_d(0, stream), c_dense_data_d(0, stream) { - index_t element = raft::ceildiv(index_t(params.m * params.n), index_t(sizeof(bitmap_t) * 8)); - std::vector bitmap_h(element); + index_t element = raft::ceildiv(index_t(params.m * params.n), index_t(sizeof(bits_t) * 8)); + std::vector bits_h(element); a_data_d.resize(params.m * params.k, stream); b_data_d.resize(params.k * params.n, stream); - bitmap_d.resize(element, stream); + bits_d.resize(element, stream); raft::random::RngState rng(2024ULL); raft::random::uniform( @@ -82,7 +85,13 @@ struct MaskedMatmulBench : public fixture { std::vector c_dense_data_h(params.m * params.n); - c_true_nnz = create_sparse_matrix(params.m, params.n, params.sparsity, bitmap_h); + if constexpr (bitmap_or_bitset) { + c_true_nnz = create_sparse_matrix(params.m, params.n, params.sparsity, bits_h); + } else { + c_true_nnz = create_sparse_matrix(1, params.n, params.sparsity, bits_h); + repeat_cpu_bitset_inplace(bits_h, params.n, params.m - 1); + c_true_nnz *= params.m; + } std::vector values(c_true_nnz); std::vector indices(c_true_nnz); @@ -93,24 +102,49 @@ struct MaskedMatmulBench : public fixture { c_indices_d.resize(c_true_nnz, stream); c_dense_data_d.resize(params.m * params.n, stream); - cpu_convert_to_csr(bitmap_h, params.m, params.n, indices, indptr); + cpu_convert_to_csr(bits_h, params.m, params.n, indices, indptr); RAFT_EXPECTS(c_true_nnz == c_indices_d.size(), "Something wrong. The c_true_nnz != c_indices_d.size()!"); update_device(c_data_d.data(), values.data(), c_true_nnz, stream); update_device(c_indices_d.data(), indices.data(), c_true_nnz, stream); update_device(c_indptr_d.data(), indptr.data(), params.m + 1, stream); - update_device(bitmap_d.data(), bitmap_h.data(), element, stream); + update_device(bits_d.data(), bits_h.data(), element, stream); + } + + void repeat_cpu_bitset_inplace(std::vector& inout, size_t input_bits, size_t repeat) + { + size_t output_bit_index = input_bits; + + for (size_t r = 0; r < repeat; ++r) { + for (size_t i = 0; i < input_bits; ++i) { + size_t input_unit_index = i / (sizeof(bits_t) * 8); + size_t input_bit_offset = i % (sizeof(bits_t) * 8); + bool bit = (inout[input_unit_index] >> input_bit_offset) & 1; + + size_t output_unit_index = output_bit_index / (sizeof(bits_t) * 8); + size_t output_bit_offset = output_bit_index % (sizeof(bits_t) * 8); + + inout[output_unit_index] |= (static_cast(bit) << output_bit_offset); + + ++output_bit_index; + } + } } - index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitmap) + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bits) { index_t total = static_cast(m * n); - index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t num_ones = static_cast((total * 1.0f) * (1.0f - sparsity)); index_t res = num_ones; - for (auto& item : bitmap) { - item = static_cast(0); + if (sparsity == 0.0f) { + std::fill(bits.begin(), bits.end(), 0xffffffff); + return num_ones; + } + + for (auto& item : bits) { + item = static_cast(0); } std::random_device rd; @@ -120,8 +154,8 @@ struct MaskedMatmulBench : public fixture { while (num_ones > 0) { index_t index = dis(gen); - bitmap_t& element = bitmap[index / (8 * sizeof(bitmap_t))]; - index_t bit_position = index % (8 * sizeof(bitmap_t)); + bits_t& element = bits[index / (8 * sizeof(bits_t))]; + index_t bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1) == 0) { element |= (static_cast(1) << bit_position); @@ -131,7 +165,7 @@ struct MaskedMatmulBench : public fixture { return res; } - void cpu_convert_to_csr(std::vector& bitmap, + void cpu_convert_to_csr(std::vector& bits, index_t rows, index_t cols, std::vector& indices, @@ -142,14 +176,14 @@ struct MaskedMatmulBench : public fixture { indptr[offset_indptr++] = 0; index_t index = 0; - bitmap_t element = 0; + bits_t element = 0; index_t bit_position = 0; for (index_t i = 0; i < rows; ++i) { for (index_t j = 0; j < cols; ++j) { index = i * cols + j; - element = bitmap[index / (8 * sizeof(bitmap_t))]; - bit_position = index % (8 * sizeof(bitmap_t)); + element = bits[index / (8 * sizeof(bits_t))]; + bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1)) { indices[offset_values] = static_cast(j); @@ -181,13 +215,17 @@ struct MaskedMatmulBench : public fixture { params.n, static_cast(c_indices_d.size())); - auto mask = - raft::core::bitmap_view(bitmap_d.data(), params.m, params.n); - auto c = raft::make_device_csr_matrix_view(c_data_d.data(), c_structure); - if (params.sparsity < 1.0) { - raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + if (params.sparsity > 0.0) { + if constexpr (bitmap_or_bitset) { + auto mask = + raft::core::bitmap_view(bits_d.data(), params.m, params.n); + raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + } else { + auto mask = raft::core::bitset_view(bits_d.data(), params.n); + raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + } } else { raft::distance::pairwise_distance(handle, a_data_d.data(), @@ -201,12 +239,16 @@ struct MaskedMatmulBench : public fixture { } resource::sync_stream(handle); - raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); - resource::sync_stream(handle); - - loop_on_state(state, [this, &a, &b, &mask, &c]() { - if (params.sparsity < 1.0) { - raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + loop_on_state(state, [this, &a, &b, &c]() { + if (params.sparsity > 0.0) { + if constexpr (bitmap_or_bitset) { + auto mask = + raft::core::bitmap_view(bits_d.data(), params.m, params.n); + raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + } else { + auto mask = raft::core::bitset_view(bits_d.data(), params.n); + raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + } } else { raft::distance::pairwise_distance(handle, a_data_d.data(), @@ -228,7 +270,7 @@ struct MaskedMatmulBench : public fixture { rmm::device_uvector a_data_d; rmm::device_uvector b_data_d; - rmm::device_uvector bitmap_d; + rmm::device_uvector bits_d; rmm::device_uvector c_dense_data_d; @@ -253,7 +295,7 @@ static std::vector> getInputs() raft::util::itertools::product({size_t(10), size_t(1024)}, {size_t(128), size_t(1024)}, {size_t(1024 * 1024)}, - {0.01f, 0.1f, 0.2f, 0.5f, 1.0f}); + {0.99f, 0.9f, 0.8f, 0.5f, 0.0f}); param_vec.reserve(params_group.size()); for (TestParams params : params_group) { @@ -263,6 +305,7 @@ static std::vector> getInputs() return param_vec; } -RAFT_BENCH_REGISTER((MaskedMatmulBench), "", getInputs()); +RAFT_BENCH_REGISTER((MaskedMatmulBench), "", getInputs()); +RAFT_BENCH_REGISTER((MaskedMatmulBench), "", getInputs()); } // namespace raft::bench::linalg diff --git a/cpp/bench/prims/sparse/bitset_to_csr.cu b/cpp/bench/prims/sparse/bitset_to_csr.cu new file mode 100644 index 0000000000..fef2d44d3e --- /dev/null +++ b/cpp/bench/prims/sparse/bitset_to_csr.cu @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace raft::bench::sparse { + +template +struct bench_param { + index_t n_repeat; + index_t n_cols; + float sparsity; +}; + +template +inline auto operator<<(std::ostream& os, const bench_param& params) -> std::ostream& +{ + os << " rows*cols=" << params.n_repeat << "*" << params.n_cols + << "\tsparsity=" << params.sparsity; + return os; +} + +template +struct BitsetToCsrBench : public fixture { + BitsetToCsrBench(const bench_param& p) + : fixture(true), + params(p), + handle(stream), + bitset_d(0, stream), + nnz(0), + indptr_d(0, stream), + indices_d(0, stream), + values_d(0, stream) + { + index_t element = raft::ceildiv(1 * params.n_cols, index_t(sizeof(bitset_t) * 8)); + std::vector bitset_h(element); + nnz = create_sparse_matrix(1, params.n_cols, params.sparsity, bitset_h); + + bitset_d.resize(bitset_h.size(), stream); + indptr_d.resize(params.n_repeat + 1, stream); + indices_d.resize(nnz, stream); + values_d.resize(nnz, stream); + + update_device(bitset_d.data(), bitset_h.data(), bitset_h.size(), stream); + + resource::sync_stream(handle); + } + + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitset) + { + index_t total = static_cast(m * n); + index_t num_ones = static_cast((total * 1.0f) * (1.0f - sparsity)); + index_t res = num_ones; + + for (auto& item : bitset) { + item = static_cast(0); + } + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, total - 1); + + while (num_ones > 0) { + index_t index = dis(gen); + + bitset_t& element = bitset[index / (8 * sizeof(bitset_t))]; + index_t bit_position = index % (8 * sizeof(bitset_t)); + + if (((element >> bit_position) & 1) == 0) { + element |= (static_cast(1) << bit_position); + num_ones--; + } + } + return res; + } + + void run_benchmark(::benchmark::State& state) override + { + std::ostringstream label_stream; + label_stream << params; + state.SetLabel(label_stream.str()); + + auto bitset = raft::core::bitset_view(bitset_d.data(), 1 * params.n_cols); + + auto csr_view = raft::make_device_compressed_structure_view( + indptr_d.data(), indices_d.data(), params.n_repeat, params.n_cols, nnz); + auto csr = raft::make_device_csr_matrix(handle, csr_view); + + raft::sparse::convert::bitset_to_csr(handle, bitset, csr); + + resource::sync_stream(handle); + loop_on_state(state, [this, &bitset, &csr]() { + raft::sparse::convert::bitset_to_csr(handle, bitset, csr); + }); + } + + protected: + const raft::device_resources handle; + + bench_param params; + + rmm::device_uvector bitset_d; + rmm::device_uvector indptr_d; + rmm::device_uvector indices_d; + rmm::device_uvector values_d; + + index_t nnz; +}; // struct BitsetToCsrBench + +template +const std::vector> getInputs() +{ + std::vector> param_vec; + struct TestParams { + index_t m; + index_t n; + float sparsity; + }; + + const std::vector params_group = raft::util::itertools::product( + {index_t(10), index_t(1024)}, {index_t(1024 * 1024)}, {0.99f, 0.9f, 0.8f, 0.5f}); + + param_vec.reserve(params_group.size()); + for (TestParams params : params_group) { + param_vec.push_back(bench_param({params.m, params.n, params.sparsity})); + } + return param_vec; +} + +template +const std::vector> getLargeInputs() +{ + std::vector> param_vec; + struct TestParams { + index_t m; + index_t n; + float sparsity; + }; + + const std::vector params_group = raft::util::itertools::product( + {index_t(1), index_t(100)}, {index_t(100 * 1000000)}, {0.95f, 0.99f}); + + param_vec.reserve(params_group.size()); + for (TestParams params : params_group) { + param_vec.push_back(bench_param({params.m, params.n, params.sparsity})); + } + return param_vec; +} + +RAFT_BENCH_REGISTER((BitsetToCsrBench), "", getInputs()); +RAFT_BENCH_REGISTER((BitsetToCsrBench), "", getInputs()); + +RAFT_BENCH_REGISTER((BitsetToCsrBench), "", getLargeInputs()); + +} // namespace raft::bench::sparse diff --git a/cpp/include/raft/core/bitmap.cuh b/cpp/include/raft/core/bitmap.cuh index 024b1244a6..b2c9df436f 100644 --- a/cpp/include/raft/core/bitmap.cuh +++ b/cpp/include/raft/core/bitmap.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -42,4 +43,11 @@ _RAFT_DEVICE void bitmap_view::set(const index_t row, set(row * cols_ + col, new_value); } +template +template +void bitmap_view::to_csr(const raft::resources& res, csr_matrix_t& csr) const +{ + raft::sparse::convert::bitmap_to_csr(res, *this, csr); +} + } // end namespace raft::core diff --git a/cpp/include/raft/core/bitmap.hpp b/cpp/include/raft/core/bitmap.hpp index 5a6656f572..be305152e8 100644 --- a/cpp/include/raft/core/bitmap.hpp +++ b/cpp/include/raft/core/bitmap.hpp @@ -133,6 +133,26 @@ struct bitmap_view : public bitset_view { */ inline _RAFT_HOST_DEVICE index_t get_n_cols() const { return cols_; } + /** + * @brief Converts to a Compressed Sparse Row (CSR) format matrix. + * + * This method transforms a two-dimensional bitmap matrix into a CSR representation, + * where each '1' bit in the bitmap corresponds to a non-zero entry in the CSR matrix. + * The bitmap is interpreted as a row-major matrix, with rows and columns defined by + * the dimensions of the bitmap. + * + * @tparam csr_matrix_t Specifies the CSR matrix type, constrained to raft::device_csr_matrix. + * + * @param[in] res RAFT resources for managing CUDA streams and execution policies. + * @param[out] csr Output parameter where the resulting CSR matrix is stored. Each '1' bit in + * the bitmap corresponds to a non-zero element in the CSR matrix. + * + * The caller must ensure that: The `csr` matrix is pre-allocated with dimensions and non-zero + * count matching the expected output. + */ + template + void to_csr(const raft::resources& res, csr_matrix_t& csr) const; + private: index_t rows_; index_t cols_; diff --git a/cpp/include/raft/core/bitset.cuh b/cpp/include/raft/core/bitset.cuh index feaef1a172..24ef3148b8 100644 --- a/cpp/include/raft/core/bitset.cuh +++ b/cpp/include/raft/core/bitset.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -202,6 +203,13 @@ double bitset_view::sparsity(const raft::resources& res) cons return static_cast((1.0 * (size_h - count_h)) / (1.0 * size_h)); } +template +template +void bitset_view::to_csr(const raft::resources& res, csr_matrix_t& csr) const +{ + raft::sparse::convert::bitset_to_csr(res, *this, csr); +} + template bitset::bitset(const raft::resources& res, raft::device_vector_view mask_index, diff --git a/cpp/include/raft/core/bitset.hpp b/cpp/include/raft/core/bitset.hpp index e4bea2c0c5..94113822fb 100644 --- a/cpp/include/raft/core/bitset.hpp +++ b/cpp/include/raft/core/bitset.hpp @@ -205,6 +205,69 @@ struct bitset_view { auto get_original_nbits() const -> index_t { return original_nbits_; } void set_original_nbits(index_t original_nbits) { original_nbits_ = original_nbits; } + /** + * @brief Converts to a Compressed Sparse Row (CSR) format matrix. + * + * This method transforms the bitset view into a CSR matrix representation, where each '1' bit in + * the bitset corresponds to a non-zero entry in the CSR matrix. The bitset format supports + * only a single-row matrix, so if the CSR matrix requires multiple rows, the bitset data is + * repeated for each row in the output. + * + * Example usage: + * + * @code{.cpp} + * #include + * #include + * #include + * + * using bitset_t = uint32_t; + * using index_t = int; + * using value_t = float; + * + * raft::resources handle; + * auto stream = resource::get_cuda_stream(handle); + * index_t n_rows = 3; + * index_t n_cols = 30; + * + * // Compute bitset size and initialize device memory + * index_t bitset_size = (n_cols + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8); + * rmm::device_uvector bitset_d(bitset_size, stream); + * std::vector bitset_h = { + * bitset_t(0b11001010), + * }; // Example bitset, with 4 non-zero entries. + * + * raft::copy(bitset_d.data(), bitset_h.data(), bitset_h.size(), stream); + * + * // Create bitset view and CSR matrix + * auto bitset_view = raft::core::bitset_view(bitset_d.data(), n_cols); + * auto csr = raft::make_device_csr_matrix(handle, n_rows, n_cols, 4 * n_rows); + * + * // Convert bitset to CSR + * bitset_view.to_csr(handle, csr); + * resource::sync_stream(handle); + * + * // Results: + * // csr.indptr = [0, 4, 8, 12]; + * // csr.indices = [1, 3, 6, 7, + * // 1, 3, 6, 7, + * // 1, 3, 6, 7]; + * // csr.values = [1, 1, 1, 1, + * // 1, 1, 1, 1, + * // 1, 1, 1, 1]; + * @endcode + * + * @tparam csr_matrix_t Specifies the CSR matrix type, constrained to raft::device_csr_matrix. + * + * @param[in] res RAFT resources for managing CUDA streams and execution policies. + * @param[out] csr Output parameter where the resulting CSR matrix is stored. Each '1' bit in + * the bitset corresponds to a non-zero element in the CSR matrix. + * + * The caller must ensure that: The `csr` matrix is pre-allocated with dimensions and non-zero + * count matching the expected output, i.e., `nnz_for_csr = nnz_for_bitset * n_rows`. + */ + template + void to_csr(const raft::resources& res, csr_matrix_t& csr) const; + private: bitset_t* bitset_ptr_; index_t bitset_len_; diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh index 081192ed44..73d099a719 100644 --- a/cpp/include/raft/sparse/convert/csr.cuh +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -18,10 +18,12 @@ #pragma once -#include +#include +#include #include #include #include +#include #include #include @@ -129,6 +131,80 @@ void bitmap_to_csr(raft::resources const& handle, detail::bitmap_to_csr(handle, bitmap, csr); } +/** + * @brief Converts a bitset matrix to a Compressed Sparse Row (CSR) format matrix. + * + * The bitset format inherently supports only a single-row matrix (rows=1). If the CSR matrix + * requires multiple rows, the data from the bitset will be repeated for each row in the output. + * + * Example usage: + * + * @code{.cpp} + * #include + * #include + * #include + * + * #include + * + * using bitset_t = uint32_t; + * using index_t = int; + * using value_t = float; + * using nnz_t = index_t; + * + * raft::resources handle; + * auto stream = resource::get_cuda_stream(handle); + * index_t n_rows = 3; + * index_t n_cols = 30; + * + * nnz_t nnz_for_bitset = 4; + * nnz_t nnz_for_csr = nnz_for_bitset * n_rows; + * + * index_t bitset_size = (n_cols + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8); // = 1 + * + * rmm::device_uvector bitset_d(bitset_size, stream); + * std::vector bitset_h = { + * bitset_t(0b11001010), + * }; // nnz_for_bitset = 4; + * + * raft::copy(bitset_d.data(), bitset_h.data(), bitset_h.size(), stream); + * + * auto bitset_view = raft::core::bitset_view(bitset_d.data(), n_cols); + * auto csr = raft::make_device_csr_matrix(handle, n_rows, n_cols, nnz_for_csr); + * + * raft::sparse::convert::bitset_to_csr(handle, bitset_view, csr); + * resource::sync_stream(handle); + * + * // Results: + * // csr.indptr = [0, 4, 8, 12]; + * // csr.indices = [1, 3, 6, 7, + * // 1, 3, 6, 7, + * // 1, 3, 6, 7]; + * // csr.values = [1, 1, 1, 1, + * // 1, 1, 1, 1, + * // 1, 1, 1, 1]; + * @endcode + * + * @tparam bitset_t The data type of the elements in the bitset matrix. + * @tparam index_t The data type used for indexing the elements in the matrices. + * @tparam csr_matrix_t Specifies the CSR matrix type, constrained to + * raft::device_csr_matrix. + * + * @param[in] handle The RAFT handle containing the CUDA stream for operations. + * @param[in] bitset The bitset matrix view, to be converted to CSR format. + * @param[out] csr Output parameter where the resulting CSR matrix is stored. In the + * bitset, each '1' bit corresponds to a non-zero element in the CSR matrix. + */ +template >> +void bitset_to_csr(raft::resources const& handle, + raft::core::bitset_view bitset, + csr_matrix_t& csr) +{ + detail::bitset_to_csr(handle, bitset, csr); +} + }; // end NAMESPACE convert }; // end NAMESPACE sparse }; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh b/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh index 866923d647..be62f76502 100644 --- a/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh @@ -283,10 +283,6 @@ void bitmap_to_csr(raft::resources const& handle, using nnz_t = typename csr_matrix_t::nnz_type; auto csr_view = csr.structure_view(); - if (csr_view.get_n_rows() == 0 || csr_view.get_n_cols() == 0 || csr_view.get_nnz() == 0) { - return; - } - RAFT_EXPECTS(bitmap.get_n_rows() == csr_view.get_n_rows(), "Number of rows in bitmap must be equal to " "number of rows in csr"); @@ -295,6 +291,8 @@ void bitmap_to_csr(raft::resources const& handle, "Number of columns in bitmap must be equal to " "number of columns in csr"); + if (csr_view.get_n_rows() == 0 || csr_view.get_n_cols() == 0) { return; } + auto thrust_policy = resource::get_thrust_policy(handle); auto stream = resource::get_cuda_stream(handle); @@ -330,12 +328,14 @@ void bitmap_to_csr(raft::resources const& handle, thrust_policy, sub_nnz.data(), sub_nnz.data() + sub_nnz_size + 1, sub_nnz.data()); if constexpr (is_device_csr_sparsity_owning_v) { - index_t nnz = 0; + nnz_t nnz = 0; RAFT_CUDA_TRY(cudaMemcpyAsync( - &nnz, sub_nnz.data() + sub_nnz_size, sizeof(index_t), cudaMemcpyDeviceToHost, stream)); + &nnz, sub_nnz.data() + sub_nnz_size, sizeof(nnz_t), cudaMemcpyDeviceToHost, stream)); resource::sync_stream(handle); csr.initialize_sparsity(nnz); + if (nnz == 0) return; } + constexpr bool check_nnz = is_device_csr_sparsity_preserving_v; fill_indices_by_rows(handle, bitmap.data(), diff --git a/cpp/include/raft/sparse/convert/detail/bitset_to_csr.cuh b/cpp/include/raft/sparse/convert/detail/bitset_to_csr.cuh new file mode 100644 index 0000000000..b3b341d793 --- /dev/null +++ b/cpp/include/raft/sparse/convert/detail/bitset_to_csr.cuh @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include // detail::popc +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace raft { +namespace sparse { +namespace convert { +namespace detail { + +template +RAFT_KERNEL repeat_csr_kernel(const index_t* indptr, + const index_t* indices, + index_t* repeated_indptr, + index_t* repeated_indices, + nnz_t nnz, + index_t repeat_count) +{ + int global_id = blockIdx.x * blockDim.x + threadIdx.x; + bool guard = global_id < nnz; + index_t* repeated_indices_addr = repeated_indices + global_id; + + for (index_t i = global_id; i < repeat_count; i += gridDim.x * blockDim.x) { + repeated_indptr[i] = (i + 2) * nnz; + } + + __syncthreads(); + + index_t item; + item = (global_id < nnz) ? indices[global_id] : -1; + + __syncthreads(); + + for (index_t row = 0; row < repeat_count; ++row) { + index_t start_offset = row * nnz; + if (guard) { repeated_indices_addr[start_offset] = item; } + } +} + +template +void gpu_repeat_csr(raft::resources const& handle, + const index_t* d_indptr, + const index_t* d_indices, + nnz_t nnz, + index_t repeat_count, + index_t* d_repeated_indptr, + index_t* d_repeated_indices) +{ + if (nnz == 0) return; + + auto stream = resource::get_cuda_stream(handle); + index_t repeat_csr_tpb = 256; + index_t grid = (nnz + repeat_csr_tpb - 1) / (repeat_csr_tpb); + + repeat_csr_kernel<<>>( + d_indptr, d_indices, d_repeated_indptr, d_repeated_indices, nnz, repeat_count); +} + +template >> +void bitset_to_csr(raft::resources const& handle, + raft::core::bitset_view bitset, + csr_matrix_t& csr) +{ + using row_t = typename csr_matrix_t::row_type; + using nnz_t = typename csr_matrix_t::nnz_type; + + auto csr_view = csr.structure_view(); + + RAFT_EXPECTS(bitset.size() == csr_view.get_n_cols(), + "Number of size in bitset must be equal to " + "number of columns in csr"); + if (csr_view.get_n_rows() == 0 || csr_view.get_n_cols() == 0) { return; } + + auto thrust_policy = resource::get_thrust_policy(handle); + auto stream = resource::get_cuda_stream(handle); + + index_t* indptr = csr_view.get_indptr().data(); + index_t* indices = csr_view.get_indices().data(); + + RAFT_CUDA_TRY(cudaMemsetAsync(indptr, 0, (csr_view.get_n_rows() + 1) * sizeof(index_t), stream)); + + size_t sub_nnz_size = 0; + index_t bits_per_sub_col = 0; + + // Get buffer size and number of bits per each sub-columns + calc_nnz_by_rows(handle, + bitset.data(), + row_t(1), + csr_view.get_n_cols(), + static_cast(nullptr), + sub_nnz_size, + bits_per_sub_col); + + rmm::device_async_resource_ref device_memory = resource::get_workspace_resource(handle); + rmm::device_uvector sub_nnz(sub_nnz_size + 1, stream, device_memory); + + calc_nnz_by_rows(handle, + bitset.data(), + row_t(1), + csr_view.get_n_cols(), + sub_nnz.data(), + sub_nnz_size, + bits_per_sub_col); + + thrust::exclusive_scan( + thrust_policy, sub_nnz.data(), sub_nnz.data() + sub_nnz_size + 1, sub_nnz.data()); + + nnz_t bitset_nnz = 0; + if constexpr (is_device_csr_sparsity_owning_v) { + RAFT_CUDA_TRY(cudaMemcpyAsync( + &bitset_nnz, sub_nnz.data() + sub_nnz_size, sizeof(nnz_t), cudaMemcpyDeviceToHost, stream)); + resource::sync_stream(handle); + csr.initialize_sparsity(bitset_nnz * csr_view.get_n_rows()); + if (bitset_nnz == 0) return; + } else { + bitset_nnz = csr_view.get_nnz() / csr_view.get_n_rows(); + } + + constexpr bool check_nnz = is_device_csr_sparsity_preserving_v; + fill_indices_by_rows(handle, + bitset.data(), + indptr, + 1, + csr_view.get_n_cols(), + csr_view.get_nnz(), + indices, + sub_nnz.data(), + bits_per_sub_col, + sub_nnz_size); + if (csr_view.get_n_rows() > 1) { + gpu_repeat_csr(handle, + indptr, + indices, + bitset_nnz, + csr_view.get_n_rows() - 1, + indptr + 2, + indices + bitset_nnz); + } + + thrust::fill_n(thrust_policy, + csr.get_elements().data(), + csr_view.get_nnz(), + typename csr_matrix_t::element_type(1)); +} + +}; // end NAMESPACE detail +}; // end NAMESPACE convert +}; // end NAMESPACE sparse +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/linalg/detail/masked_matmul.cuh b/cpp/include/raft/sparse/linalg/detail/masked_matmul.cuh index 276960628d..bfffa413b2 100644 --- a/cpp/include/raft/sparse/linalg/detail/masked_matmul.cuh +++ b/cpp/include/raft/sparse/linalg/detail/masked_matmul.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -41,7 +42,7 @@ template & A, raft::device_matrix_view& B, - raft::core::bitmap_view& mask, + raft::core::bitmap_view& mask, raft::device_csr_matrix_view& C, std::optional> alpha, std::optional> beta) @@ -100,6 +101,69 @@ void masked_matmul(raft::resources const& handle, } } +template +void masked_matmul(raft::resources const& handle, + raft::device_matrix_view& A, + raft::device_matrix_view& B, + raft::core::bitset_view& mask, + raft::device_csr_matrix_view& C, + std::optional> alpha, + std::optional> beta) +{ + index_t m = A.extent(0); + index_t n = B.extent(0); + index_t dim = A.extent(1); + + auto compressed_C_view = C.structure_view(); + + RAFT_EXPECTS(A.extent(1) == B.extent(1), "The dim of A must be equal to the dim of B."); + RAFT_EXPECTS(A.extent(0) == compressed_C_view.get_n_rows(), + "Number of rows in C must match the number of rows in A."); + RAFT_EXPECTS(B.extent(0) == compressed_C_view.get_n_cols(), + "Number of columns in C must match the number of columns in B."); + + auto stream = raft::resource::get_cuda_stream(handle); + + auto C_matrix = raft::make_device_csr_matrix(handle, compressed_C_view); + + // fill C + raft::sparse::convert::bitset_to_csr(handle, mask, C_matrix); + + if (m > 10 || alpha.has_value() || beta.has_value()) { + auto C_view = raft::make_device_csr_matrix_view( + C.get_elements().data(), compressed_C_view); + + // create B col_major view + auto B_col_major = raft::make_device_matrix_view( + B.data_handle(), dim, n); + + output_t default_alpha = static_cast(1.0f); + output_t default_beta = static_cast(0.0f); + + if (!alpha.has_value()) { alpha = raft::make_host_scalar_view(&default_alpha); } + if (!beta.has_value()) { beta = raft::make_host_scalar_view(&default_beta); } + + raft::sparse::linalg::sddmm(handle, + A, + B_col_major, + C_view, + raft::linalg::Operation::NON_TRANSPOSE, + raft::linalg::Operation::NON_TRANSPOSE, + *alpha, + *beta); + } else { + raft::sparse::distance::detail::faster_dot_on_csr(handle, + C.get_elements().data(), + compressed_C_view.get_nnz(), + compressed_C_view.get_indptr().data(), + compressed_C_view.get_indices().data(), + A.data_handle(), + B.data_handle(), + compressed_C_view.get_n_rows(), + dim); + } +} + } // namespace detail } // namespace linalg } // namespace sparse diff --git a/cpp/include/raft/sparse/linalg/masked_matmul.cuh b/cpp/include/raft/sparse/linalg/masked_matmul.cuh new file mode 100644 index 0000000000..c33a1afd43 --- /dev/null +++ b/cpp/include/raft/sparse/linalg/masked_matmul.cuh @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain A copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace raft { +namespace sparse { +namespace linalg { + +/** + * @defgroup masked_matmul Masked Matrix Multiplication + * @{ + */ + +/** + * @brief Performs a masked multiplication of dense matrices A and B, followed by an element-wise + * multiplication with the sparsity pattern defined by the mask, resulting in the computation + * C = alpha * ((A * B) โˆ˜ spy(mask)) + beta * C. + * + * This function multiplies two dense matrices A and B, and then applies an element-wise + * multiplication using the sparsity pattern provided by the mask. The result is scaled by alpha + * and added to beta times the original matrix C. + * + * @tparam value_t Data type of elements in the input matrices (e.g., half, float, double) + * @tparam output_t Data type of elements in the output matrices (e.g., float, double) + * @tparam index_t Type used for matrix indices + * @tparam nnz_t Type used for the number of non-zero entries in CSR format + * @tparam bitmap_t Type of the bitmap used for the mask + * + * @param[in] handle RAFT handle for resource management + * @param[in] A Input dense matrix (device_matrix_view) with shape [m, k] + * @param[in] B Input dense matrix (device_matrix_view) with shape [n, k] + * @param[in] mask Bitmap view representing the sparsity pattern (bitmap_view) with logical shape + * [m, n]. Each bit in the mask indicates whether the corresponding element pair in A and B is + * included (1) or masked out (0). + * @param[inout] C Output sparse matrix in CSR format (device_csr_matrix_view) with dense shape [m, + * n] + * @param[in] alpha Optional scalar multiplier for the product of A and B (default: 1.0 if + * std::nullopt) + * @param[in] beta Optional scalar multiplier for the original matrix C (default: 0 if std::nullopt) + */ +template +void masked_matmul(raft::resources const& handle, + raft::device_matrix_view A, + raft::device_matrix_view B, + raft::core::bitmap_view mask, + raft::device_csr_matrix_view C, + std::optional> alpha = std::nullopt, + std::optional> beta = std::nullopt) +{ + detail::masked_matmul(handle, A, B, mask, C, alpha, beta); +} + +/** + * @brief Computes a sparse matrix product with a masked sparsity pattern and scaling. + * + * This function computes the result of: + * C = alpha * ((A * B) โˆ˜ spy(mask)) + beta * C + * where: + * - A and B are dense input matrices. + * - "mask" defines the sparsity pattern for element-wise multiplication. + * - The result is scaled by alpha and added to beta times the original C. + * + * **Special behavior of the mask**: + * - The `bitset` mask represents a single row of data, with its bits indicating whether + * each corresponding element in (A * B) is included (1) or masked out (0). + * - If the output CSR matrix `C` has multiple rows, the `bitset` is logically repeated + * across all rows of `C`. For example, if `C` has `n_rows` rows, the same `bitset` + * pattern is applied to all rows. + * + * @tparam value_t Data type of input matrix elements (e.g., half, float, double). + * @tparam output_t Data type of output matrix elements (e.g., float, double). + * @tparam index_t Type for matrix indices. + * @tparam nnz_t Type for non-zero entries in CSR format. + * @tparam bitset_t Type for the bitset mask. + * + * @param[in] handle RAFT handle for managing resources. + * @param[in] A Dense input matrix [m, k] (row-major). + * @param[in] B Dense input matrix [n, k] (row-major). + * @param[in] mask Bitmap view representing a single row [1, n], where each bit + * indicates if the corresponding element in (A * B) is included (1) + * or masked out (0). The pattern is repeated for all rows of `C`. + * @param[inout] C Output sparse matrix in CSR format [m, n]. + * @param[in] alpha Scalar multiplier for (A * B) (default: 1.0 if std::nullopt). + * @param[in] beta Scalar multiplier for the initial C (default: 0 if std::nullopt). + */ +template +void masked_matmul(raft::resources const& handle, + raft::device_matrix_view A, + raft::device_matrix_view B, + raft::core::bitset_view mask, + raft::device_csr_matrix_view C, + std::optional> alpha = std::nullopt, + std::optional> beta = std::nullopt) +{ + detail::masked_matmul(handle, A, B, mask, C, alpha, beta); +} + +/** @} */ // end of masked_matmul + +} // end namespace linalg +} // end namespace sparse +} // end namespace raft diff --git a/cpp/include/raft/sparse/linalg/masked_matmul.hpp b/cpp/include/raft/sparse/linalg/masked_matmul.hpp index 6cf6e834b9..32322b90f6 100644 --- a/cpp/include/raft/sparse/linalg/masked_matmul.hpp +++ b/cpp/include/raft/sparse/linalg/masked_matmul.hpp @@ -13,60 +13,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#pragma once - -#include - -namespace raft { -namespace sparse { -namespace linalg { - /** - * @defgroup masked_matmul Masked Matrix Multiplication - * @{ + * This file is deprecated and will be removed in future release. + * Please use the cuh version instead. */ /** - * @brief Performs a masked multiplication of dense matrices A and B, followed by an element-wise - * multiplication with the sparsity pattern defined by the mask, resulting in the computation - * C = alpha * ((A * B) โˆ˜ spy(mask)) + beta * C. - * - * This function multiplies two dense matrices A and B, and then applies an element-wise - * multiplication using the sparsity pattern provided by the mask. The result is scaled by alpha - * and added to beta times the original matrix C. - * - * @tparam value_t Data type of elements in the input matrices (e.g., half, float, double) - * @tparam output_t Data type of elements in the output matrices (e.g., float, double) - * @tparam index_t Type used for matrix indices - * @tparam nnz_t Type used for the number of non-zero entries in CSR format - * @tparam bitmap_t Type of the bitmap used for the mask - * - * @param[in] handle RAFT handle for resource management - * @param[in] A Input dense matrix (device_matrix_view) with shape [m, k] - * @param[in] B Input dense matrix (device_matrix_view) with shape [n, k] - * @param[in] mask Bitmap view representing the sparsity pattern (bitmap_view) with logical shape - * [m, n]. Each bit in the mask indicates whether the corresponding element pair in A and B is - * included (1) or masked out (0). - * @param[inout] C Output sparse matrix in CSR format (device_csr_matrix_view) with dense shape [m, - * n] - * @param[in] alpha Optional scalar multiplier for the product of A and B (default: 1.0 if - * std::nullopt) - * @param[in] beta Optional scalar multiplier for the original matrix C (default: 0 if std::nullopt) + * DISCLAIMER: this file is deprecated: use masked_matmul.cuh instead */ -template -void masked_matmul(raft::resources const& handle, - raft::device_matrix_view A, - raft::device_matrix_view B, - raft::core::bitmap_view mask, - raft::device_csr_matrix_view C, - std::optional> alpha = std::nullopt, - std::optional> beta = std::nullopt) -{ - detail::masked_matmul(handle, A, B, mask, C, alpha, beta); -} -/** @} */ // end of masked_matmul +#pragma once + +#ifndef RAFT_HIDE_DEPRECATION_WARNINGS +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") +#endif -} // end namespace linalg -} // end namespace sparse -} // end namespace raft +#include diff --git a/cpp/test/sparse/convert_csr.cu b/cpp/test/sparse/convert_csr.cu index c1a495ea3d..d74296a267 100644 --- a/cpp/test/sparse/convert_csr.cu +++ b/cpp/test/sparse/convert_csr.cu @@ -17,6 +17,7 @@ #include "../test_utils.cuh" #include +#include #include #include #include @@ -370,7 +371,7 @@ class BitmapToCSRTest : public ::testing::TestWithParam(handle, params.n_rows, params.n_cols, nnz); auto csr_view = csr.structure_view(); - convert::bitmap_to_csr(handle, bitmap, csr); + bitmap.to_csr(handle, csr); raft::copy(indptr_d.data(), csr_view.get_indptr().data(), indptr_d.size(), stream); raft::copy(indices_d.data(), csr_view.get_indices().data(), indices_d.size(), stream); raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); @@ -379,7 +380,7 @@ class BitmapToCSRTest : public ::testing::TestWithParam(handle, csr_view); - convert::bitmap_to_csr(handle, bitmap, csr); + bitmap.to_csr(handle, csr); raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); } resource::sync_stream(handle); @@ -477,5 +478,289 @@ INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, BitmapToCSRTestLOnLargeSize, ::testing::ValuesIn(bitmaptocsr_large_inputs)); +/******************************** bitset to csr ********************************/ + +template +struct BitsetToCSRInputs { + index_t n_repeat; + index_t n_cols; + float sparsity; + bool owning; +}; + +template +class BitsetToCSRTest : public ::testing::TestWithParam> { + public: + BitsetToCSRTest() + : stream(resource::get_cuda_stream(handle)), + params(::testing::TestWithParam>::GetParam()), + bitset_d(0, stream), + indices_d(0, stream), + indptr_d(0, stream), + values_d(0, stream), + indptr_expected_d(0, stream), + indices_expected_d(0, stream), + values_expected_d(0, stream) + { + } + + protected: + void repeat_cpu_bitset(std::vector& input, + size_t input_bits, + size_t repeat, + std::vector& output) + { + const size_t output_bits = input_bits * repeat; + const size_t output_units = (output_bits + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8); + + std::memset(output.data(), 0, output_units * sizeof(bitset_t)); + + size_t output_bit_index = 0; + + for (size_t r = 0; r < repeat; ++r) { + for (size_t i = 0; i < input_bits; ++i) { + size_t input_unit_index = i / (sizeof(bitset_t) * 8); + size_t input_bit_offset = i % (sizeof(bitset_t) * 8); + bool bit = (input[input_unit_index] >> input_bit_offset) & 1; + + size_t output_unit_index = output_bit_index / (sizeof(bitset_t) * 8); + size_t output_bit_offset = output_bit_index % (sizeof(bitset_t) * 8); + + output[output_unit_index] |= (static_cast(bit) << output_bit_offset); + + ++output_bit_index; + } + } + } + + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitset) + { + index_t total = static_cast(m * n); + index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t res = num_ones; + + for (auto& item : bitset) { + item = static_cast(0); + } + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, total - 1); + + while (num_ones > 0) { + index_t index = dis(gen); + + bitset_t& element = bitset[index / (8 * sizeof(bitset_t))]; + index_t bit_position = index % (8 * sizeof(bitset_t)); + + if (((element >> bit_position) & 1) == 0) { + element |= (static_cast(1) << bit_position); + num_ones--; + } + } + return res; + } + + void cpu_convert_to_csr(std::vector& bitset, + index_t rows, + index_t cols, + std::vector& indices, + std::vector& indptr) + { + index_t offset_indptr = 0; + index_t offset_values = 0; + indptr[offset_indptr++] = 0; + + index_t index = 0; + bitset_t element = 0; + index_t bit_position = 0; + + for (index_t i = 0; i < rows; ++i) { + for (index_t j = 0; j < cols; ++j) { + index = i * cols + j; + element = bitset[index / (8 * sizeof(bitset_t))]; + bit_position = index % (8 * sizeof(bitset_t)); + + if (((element >> bit_position) & 1)) { + indices[offset_values] = static_cast(j); + offset_values++; + } + } + indptr[offset_indptr++] = static_cast(offset_values); + } + } + + bool csr_compare(const std::vector& row_ptrs1, + const std::vector& col_indices1, + const std::vector& row_ptrs2, + const std::vector& col_indices2) + { + if (row_ptrs1.size() != row_ptrs2.size()) { return false; } + + if (col_indices1.size() != col_indices2.size()) { return false; } + + if (!std::equal(row_ptrs1.begin(), row_ptrs1.end(), row_ptrs2.begin())) { return false; } + + for (size_t i = 0; i < row_ptrs1.size() - 1; ++i) { + size_t start_idx = row_ptrs1[i]; + size_t end_idx = row_ptrs1[i + 1]; + + std::vector cols1(col_indices1.begin() + start_idx, col_indices1.begin() + end_idx); + std::vector cols2(col_indices2.begin() + start_idx, col_indices2.begin() + end_idx); + + std::sort(cols1.begin(), cols1.end()); + std::sort(cols2.begin(), cols2.end()); + + if (cols1 != cols2) { return false; } + } + + return true; + } + + void SetUp() override + { + index_t element = raft::ceildiv(1 * params.n_cols, index_t(sizeof(bitset_t) * 8)); + std::vector bitset_h(element); + std::vector bitset_repeat_h(element * params.n_repeat); + + nnz = create_sparse_matrix(1, params.n_cols, params.sparsity, bitset_h); + + repeat_cpu_bitset(bitset_h, size_t(params.n_cols), size_t(params.n_repeat), bitset_repeat_h); + nnz *= params.n_repeat; + + std::vector indices_h(nnz); + std::vector indptr_h(params.n_repeat + 1); + + cpu_convert_to_csr(bitset_repeat_h, params.n_repeat, params.n_cols, indices_h, indptr_h); + + bitset_d.resize(bitset_h.size(), stream); + indptr_d.resize(params.n_repeat + 1, stream); + indices_d.resize(nnz, stream); + + indptr_expected_d.resize(params.n_repeat + 1, stream); + indices_expected_d.resize(nnz, stream); + values_expected_d.resize(nnz, stream); + + thrust::fill_n(resource::get_thrust_policy(handle), values_expected_d.data(), nnz, value_t{1}); + + values_d.resize(nnz, stream); + + update_device(indices_expected_d.data(), indices_h.data(), indices_h.size(), stream); + update_device(indptr_expected_d.data(), indptr_h.data(), indptr_h.size(), stream); + update_device(bitset_d.data(), bitset_h.data(), bitset_h.size(), stream); + + resource::sync_stream(handle); + } + + void Run() + { + auto bitset = raft::core::bitset_view(bitset_d.data(), params.n_cols); + + if (params.owning) { + auto csr = + raft::make_device_csr_matrix(handle, params.n_repeat, params.n_cols, nnz); + auto csr_view = csr.structure_view(); + + bitset.to_csr(handle, csr); + raft::copy(indptr_d.data(), csr_view.get_indptr().data(), indptr_d.size(), stream); + raft::copy(indices_d.data(), csr_view.get_indices().data(), indices_d.size(), stream); + raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); + } else { + auto csr_view = raft::make_device_compressed_structure_view( + indptr_d.data(), indices_d.data(), params.n_repeat, params.n_cols, nnz); + auto csr = raft::make_device_csr_matrix(handle, csr_view); + + bitset.to_csr(handle, csr); + raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); + } + resource::sync_stream(handle); + + std::vector indices_h(indices_expected_d.size(), 0); + std::vector indices_expected_h(indices_expected_d.size(), 0); + update_host(indices_h.data(), indices_d.data(), indices_h.size(), stream); + update_host(indices_expected_h.data(), indices_expected_d.data(), indices_h.size(), stream); + + std::vector indptr_h(indptr_expected_d.size(), 0); + std::vector indptr_expected_h(indptr_expected_d.size(), 0); + update_host(indptr_h.data(), indptr_d.data(), indptr_h.size(), stream); + update_host(indptr_expected_h.data(), indptr_expected_d.data(), indptr_h.size(), stream); + + resource::sync_stream(handle); + + ASSERT_TRUE(csr_compare(indptr_h, indices_h, indptr_expected_h, indices_expected_h)); + ASSERT_TRUE(raft::devArrMatch( + values_expected_d.data(), values_d.data(), nnz, raft::Compare(), stream)); + } + + protected: + raft::resources handle; + cudaStream_t stream; + + BitsetToCSRInputs params; + + rmm::device_uvector bitset_d; + + index_t nnz; + + rmm::device_uvector indptr_d; + rmm::device_uvector indices_d; + rmm::device_uvector values_d; + + rmm::device_uvector indptr_expected_d; + rmm::device_uvector indices_expected_d; + rmm::device_uvector values_expected_d; +}; + +using BitsetToCSRTestI = BitsetToCSRTest; +TEST_P(BitsetToCSRTestI, Result) { Run(); } + +using BitsetToCSRTestL = BitsetToCSRTest; +TEST_P(BitsetToCSRTestL, Result) { Run(); } + +using BitsetToCSRTestLOnLargeSize = BitsetToCSRTest; +TEST_P(BitsetToCSRTestLOnLargeSize, Result) { Run(); } + +template +const std::vector> bitsettocsr_inputs = { + {0, 0, 0.2, false}, + {10, 32, 0.4, false}, + {10, 3, 0.2, false}, + {32, 1024, 0.4, false}, + {1024, 1048576, 0.01, false}, + {1024, 1024, 0.4, false}, + {64 * 1024 + 10, 2, 0.3, false}, // 64K + 10 is slightly over maximum of blockDim.y + {16, 16, 0.3, false}, // No peeling-remainder + {17, 16, 0.3, false}, // Check peeling-remainder + {18, 16, 0.3, false}, // Check peeling-remainder + {32 + 9, 33, 0.2, false}, // Check peeling-remainder + {2, 33, 0.2, false}, // Check peeling-remainder + {0, 0, 0.2, true}, + {10, 32, 0.4, true}, + {10, 3, 0.2, true}, + {32, 1024, 0.4, true}, + {1024, 1048576, 0.01, true}, + {1024, 1024, 0.4, true}, + {64 * 1024 + 10, 2, 0.3, true}, // 64K + 10 is slightly over maximum of blockDim.y + {16, 16, 0.3, true}, // No peeling-remainder + {17, 16, 0.3, true}, // Check peeling-remainder + {18, 16, 0.3, true}, // Check peeling-remainder + {32 + 9, 33, 0.2, true}, // Check peeling-remainder + {2, 33, 0.2, true}, // Check peeling-remainder +}; + +template +const std::vector> bitsettocsr_large_inputs = { + {100, 100000000, 0.01, true}, {100, 100000000, 0.05, false}, {100, 100000000 + 17, 0.05, false}}; + +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitsetToCSRTestI, + ::testing::ValuesIn(bitsettocsr_inputs)); +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitsetToCSRTestL, + ::testing::ValuesIn(bitsettocsr_inputs)); +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitsetToCSRTestLOnLargeSize, + ::testing::ValuesIn(bitsettocsr_large_inputs)); + } // namespace sparse } // namespace raft diff --git a/cpp/test/sparse/masked_matmul.cu b/cpp/test/sparse/masked_matmul.cu index f883beae32..5ee1677015 100644 --- a/cpp/test/sparse/masked_matmul.cu +++ b/cpp/test/sparse/masked_matmul.cu @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -46,6 +46,8 @@ struct MaskedMatmulInputs { unsigned long long int seed; }; +enum class BitsLayout { Bitset, Bitmap }; + template struct sum_abs_op { __host__ __device__ value_t operator()(const value_t& x, const value_t& y) const @@ -87,7 +89,8 @@ bool isCuSparseVersionGreaterThan_12_0_1() template class MaskedMatmulTest @@ -98,7 +101,7 @@ class MaskedMatmulTest stream(resource::get_cuda_stream(handle)), a_data_d(0, resource::get_cuda_stream(handle)), b_data_d(0, resource::get_cuda_stream(handle)), - bitmap_d(0, resource::get_cuda_stream(handle)), + bits_d(0, resource::get_cuda_stream(handle)), c_indptr_d(0, resource::get_cuda_stream(handle)), c_indices_d(0, resource::get_cuda_stream(handle)), c_data_d(0, resource::get_cuda_stream(handle)), @@ -107,14 +110,14 @@ class MaskedMatmulTest } protected: - index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitmap) + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bits) { index_t total = static_cast(m * n); index_t num_ones = static_cast((total * 1.0f) * sparsity); index_t res = num_ones; - for (auto& item : bitmap) { - item = static_cast(0); + for (auto& item : bits) { + item = static_cast(0); } std::random_device rd; @@ -124,8 +127,8 @@ class MaskedMatmulTest while (num_ones > 0) { index_t index = dis(gen); - bitmap_t& element = bitmap[index / (8 * sizeof(bitmap_t))]; - index_t bit_position = index % (8 * sizeof(bitmap_t)); + bits_t& element = bits[index / (8 * sizeof(bits_t))]; + index_t bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1) == 0) { element |= (static_cast(1) << bit_position); @@ -135,7 +138,27 @@ class MaskedMatmulTest return res; } - void cpu_convert_to_csr(std::vector& bitmap, + void repeat_cpu_bitset_inplace(std::vector& inout, size_t input_bits, size_t repeat) + { + size_t output_bit_index = input_bits; + + for (size_t r = 0; r < repeat; ++r) { + for (size_t i = 0; i < input_bits; ++i) { + size_t input_unit_index = i / (sizeof(bits_t) * 8); + size_t input_bit_offset = i % (sizeof(bits_t) * 8); + bool bit = (inout[input_unit_index] >> input_bit_offset) & 1; + + size_t output_unit_index = output_bit_index / (sizeof(bits_t) * 8); + size_t output_bit_offset = output_bit_index % (sizeof(bits_t) * 8); + + inout[output_unit_index] |= (static_cast(bit) << output_bit_offset); + + ++output_bit_index; + } + } + } + + void cpu_convert_to_csr(std::vector& bits, index_t rows, index_t cols, std::vector& indices, @@ -146,14 +169,14 @@ class MaskedMatmulTest indptr[offset_indptr++] = 0; index_t index = 0; - bitmap_t element = 0; + bits_t element = 0; index_t bit_position = 0; for (index_t i = 0; i < rows; ++i) { for (index_t j = 0; j < cols; ++j) { index = i * cols + j; - element = bitmap[index / (8 * sizeof(bitmap_t))]; - bit_position = index % (8 * sizeof(bitmap_t)); + element = bits[index / (8 * sizeof(bits_t))]; + bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1)) { indices[offset_values] = static_cast(j); @@ -201,15 +224,17 @@ class MaskedMatmulTest index_t b_size = params.k * params.n; index_t c_size = params.m * params.n; - index_t element = raft::ceildiv(params.m * params.n, index_t(sizeof(bitmap_t) * 8)); - std::vector bitmap_h(element); + index_t element = raft::ceildiv(params.m * params.n, index_t(sizeof(bits_t) * 8)); + std::vector bits_h(element); + + std::memset(bits_h.data(), 0, bits_h.size() * sizeof(bits_t)); std::vector a_data_h(a_size); std::vector b_data_h(b_size); a_data_d.resize(a_size, stream); b_data_d.resize(b_size, stream); - bitmap_d.resize(bitmap_h.size(), stream); + bits_d.resize(bits_h.size(), stream); auto blobs_a_b = raft::make_device_matrix(handle, 1, a_size + b_size); auto labels = raft::make_device_vector(handle, 1); @@ -262,18 +287,27 @@ class MaskedMatmulTest resource::sync_stream(handle); - index_t c_true_nnz = create_sparse_matrix(params.m, params.n, params.sparsity, bitmap_h); + index_t c_true_nnz = 0; + if constexpr (bits_layout == BitsLayout::Bitmap) { + c_true_nnz = create_sparse_matrix(params.m, params.n, params.sparsity, bits_h); + } else if constexpr (bits_layout == BitsLayout::Bitset) { + c_true_nnz = create_sparse_matrix(1, params.n, params.sparsity, bits_h); + repeat_cpu_bitset_inplace(bits_h, params.n, params.m - 1); + c_true_nnz *= params.m; + } else { + GTEST_SKIP() << "Unsupported BitsLayout!"; + } std::vector c_indptr_h(params.m + 1); std::vector c_indices_h(c_true_nnz); std::vector c_data_h(c_true_nnz); - cpu_convert_to_csr(bitmap_h, params.m, params.n, c_indices_h, c_indptr_h); + cpu_convert_to_csr(bits_h, params.m, params.n, c_indices_h, c_indptr_h); c_data_d.resize(c_data_h.size(), stream); update_device(c_data_d.data(), c_data_h.data(), c_data_h.size(), stream); - update_device(bitmap_d.data(), bitmap_h.data(), bitmap_h.size(), stream); + update_device(bits_d.data(), bits_h.data(), bits_h.size(), stream); resource::sync_stream(handle); cpu_sddmm(a_data_h, b_data_h, c_data_h, c_indices_h, c_indptr_h, true, true); @@ -304,9 +338,6 @@ class MaskedMatmulTest auto B = raft::make_device_matrix_view(b_data_d.data(), params.n, params.k); - auto mask = - raft::core::bitmap_view(bitmap_d.data(), params.m, params.n); - auto c_structure = raft::make_device_compressed_structure_view( c_indptr_d.data(), c_indices_d.data(), @@ -316,7 +347,15 @@ class MaskedMatmulTest auto C = raft::make_device_csr_matrix_view(c_data_d.data(), c_structure); - raft::sparse::linalg::masked_matmul(handle, A, B, mask, C); + if constexpr (bits_layout == BitsLayout::Bitmap) { + auto mask = raft::core::bitmap_view(bits_d.data(), params.m, params.n); + raft::sparse::linalg::masked_matmul(handle, A, B, mask, C); + } else if constexpr (bits_layout == BitsLayout::Bitset) { + auto mask = raft::core::bitset_view(bits_d.data(), params.n); + raft::sparse::linalg::masked_matmul(handle, A, B, mask, C); + } else { + GTEST_SKIP() << "Unsupported BitsLayout!"; + } resource::sync_stream(handle); @@ -344,7 +383,7 @@ class MaskedMatmulTest rmm::device_uvector a_data_d; rmm::device_uvector b_data_d; - rmm::device_uvector bitmap_d; + rmm::device_uvector bits_d; rmm::device_uvector c_indptr_d; rmm::device_uvector c_indices_d; @@ -353,14 +392,23 @@ class MaskedMatmulTest rmm::device_uvector c_expected_data_d; }; -using MaskedMatmulTestF = MaskedMatmulTest; -TEST_P(MaskedMatmulTestF, Result) { Run(); } +using MaskedMatmulOnBitmapTestF = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitmapTestF, Result) { Run(); } + +using MaskedMatmulOnBitmapTestD = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitmapTestD, Result) { Run(); } -using MaskedMatmulTestD = MaskedMatmulTest; -TEST_P(MaskedMatmulTestD, Result) { Run(); } +using MaskedMatmulOnBitmapTestH = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitmapTestH, Result) { Run(); } -using MaskedMatmulTestH = MaskedMatmulTest; -TEST_P(MaskedMatmulTestH, Result) { Run(); } +using MaskedMatmulOnBitsetTestF = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitsetTestF, Result) { Run(); } + +using MaskedMatmulOnBitsetTestD = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitsetTestD, Result) { Run(); } + +using MaskedMatmulOnBitsetTestH = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitsetTestH, Result) { Run(); } const std::vector> sddmm_inputs_f = { {0.001f, 2, 255, 1023, 0.19, 1234ULL}, @@ -419,11 +467,29 @@ const std::vector> sddmm_inputs_h = { {0.0003f, 31, 1025, 1025, 0.19, 1234ULL}, {0.001f, 1024, 1024, 1024, 0.1, 1234ULL}}; -INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, MaskedMatmulTestF, ::testing::ValuesIn(sddmm_inputs_f)); +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitmapTestF, + ::testing::ValuesIn(sddmm_inputs_f)); + +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitmapTestD, + ::testing::ValuesIn(sddmm_inputs_d)); + +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitmapTestH, + ::testing::ValuesIn(sddmm_inputs_h)); + +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitsetTestF, + ::testing::ValuesIn(sddmm_inputs_f)); -INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, MaskedMatmulTestD, ::testing::ValuesIn(sddmm_inputs_d)); +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitsetTestD, + ::testing::ValuesIn(sddmm_inputs_d)); -INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, MaskedMatmulTestH, ::testing::ValuesIn(sddmm_inputs_h)); +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitsetTestH, + ::testing::ValuesIn(sddmm_inputs_h)); } // namespace sparse } // namespace raft From 8ea0e7e71ad4360b99d3a45aa0b2f124cf01abd1 Mon Sep 17 00:00:00 2001 From: Micka Date: Thu, 16 Jan 2025 17:22:38 +0100 Subject: [PATCH 21/37] Fix broken link to python doc (#2537) Apply the same change as https://github.com/rapidsai/cuml/pull/6202 to fix Python links to source code Authors: - Micka (https://github.com/lowener) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2537 --- docs/source/conf.py | 2 +- docs/source/sphinxext/github_link.py | 24 +++++++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 7a287b689f..e5e6e0871a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -208,7 +208,7 @@ def setup(app): linkcode_resolve = make_linkcode_resolve( "pylibraft", "https://github.com/rapidsai/raft" - "raft/blob/{revision}/python/pylibraft" + "/blob/{revision}/python/pylibraft/" "{package}/{path}#L{lineno}", ) diff --git a/docs/source/sphinxext/github_link.py b/docs/source/sphinxext/github_link.py index a7a46fdd9d..5712bbe5cb 100644 --- a/docs/source/sphinxext/github_link.py +++ b/docs/source/sphinxext/github_link.py @@ -1,5 +1,20 @@ # This contains code with copyright by the scikit-learn project, subject to the # license in /thirdparty/LICENSES/LICENSE.scikit_learn +# +# Copyright (c) 2024-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import inspect import os @@ -96,15 +111,14 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): # fn is expected to be the absolute path. fn = os.path.relpath(source_file, start=package) print("{}:{}".format( - os.path.abspath(os.path.join("..", "python", "cuml", fn)), + os.path.abspath(os.path.join("..", "python", "pylibraft", fn)), lineno)) else: return else: - # Test if we are absolute or not (pyx are relative) - if (not os.path.isabs(fn)): - # Should be relative to docs right now - fn = os.path.abspath(os.path.join("..", "python", fn)) + if fn.endswith(".pyx"): + sp_path = next(x for x in sys.path if re.match(".*site-packages$", x)) + fn = fn.replace("/opt/conda/conda-bld/work/python/pylibraft", sp_path) # Convert to relative from module root fn = os.path.relpath(fn, From fb6bfe6ee956a5e40295300d453f1261ece3cedf Mon Sep 17 00:00:00 2001 From: Victor Lafargue Date: Thu, 16 Jan 2025 19:19:59 +0100 Subject: [PATCH 22/37] Introduction of the `raft::device_resources_snmg` type (#2487) Introduces the `raft::device_resources_snmg` type to hold all resources required for the NCCL clique. ~Answers https://github.com/rapidsai/raft/issues/2459~ Removed call to `raft::comms::build_comms_nccl_only` (https://github.com/rapidsai/raft/issues/2465) Authors: - Victor Lafargue (https://github.com/viclafargue) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2487 --- cpp/include/raft/comms/nccl_clique.hpp | 156 ------------- .../raft/core/device_resources_snmg.hpp | 217 ++++++++++++++++++ .../raft/core/resource/nccl_clique.hpp | 66 ------ cpp/include/raft/core/resources.hpp | 3 +- docs/source/cpp_api/core_resources.rst | 17 ++ 5 files changed, 236 insertions(+), 223 deletions(-) delete mode 100644 cpp/include/raft/comms/nccl_clique.hpp create mode 100644 cpp/include/raft/core/device_resources_snmg.hpp delete mode 100644 cpp/include/raft/core/resource/nccl_clique.hpp diff --git a/cpp/include/raft/comms/nccl_clique.hpp b/cpp/include/raft/comms/nccl_clique.hpp deleted file mode 100644 index c6520af753..0000000000 --- a/cpp/include/raft/comms/nccl_clique.hpp +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -#include - -/** - * @brief Error checking macro for NCCL runtime API functions. - * - * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an - * exception detailing the NCCL error that occurred - */ -#define RAFT_NCCL_TRY(call) \ - do { \ - ncclResult_t const status = (call); \ - if (ncclSuccess != status) { \ - std::string msg{}; \ - SET_ERROR_MSG(msg, \ - "NCCL error encountered at: ", \ - "call='%s', Reason=%d:%s", \ - #call, \ - status, \ - ncclGetErrorString(status)); \ - throw raft::logic_error(msg); \ - } \ - } while (0); - -namespace raft::comms { -void build_comms_nccl_only(raft::resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank); -} - -namespace raft::comms { - -struct nccl_clique { - using pool_mr = rmm::mr::pool_memory_resource; - - /** - * Instantiates a NCCL clique with all available GPUs - * - * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool - * - */ - nccl_clique(int percent_of_free_memory = 80) - : root_rank_(0), - percent_of_free_memory_(percent_of_free_memory), - per_device_pools_(0), - device_resources_(0) - { - cudaGetDeviceCount(&num_ranks_); - device_ids_.resize(num_ranks_); - std::iota(device_ids_.begin(), device_ids_.end(), 0); - nccl_comms_.resize(num_ranks_); - nccl_clique_init(); - } - - /** - * Instantiates a NCCL clique - * - * Usage example: - * @code{.cpp} - * int n_devices; - * cudaGetDeviceCount(&n_devices); - * std::vector device_ids(n_devices); - * std::iota(device_ids.begin(), device_ids.end(), 0); - * cuvs::neighbors::mg::nccl_clique& clique(device_ids); // first device is the root rank - * @endcode - * - * @param[in] device_ids list of device IDs to be used to initiate the clique - * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool - * - */ - nccl_clique(const std::vector& device_ids, int percent_of_free_memory = 80) - : root_rank_(0), - num_ranks_(device_ids.size()), - percent_of_free_memory_(percent_of_free_memory), - device_ids_(device_ids), - nccl_comms_(device_ids.size()), - per_device_pools_(0), - device_resources_(0) - { - nccl_clique_init(); - } - - void nccl_clique_init() - { - RAFT_NCCL_TRY(ncclCommInitAll(nccl_comms_.data(), num_ranks_, device_ids_.data())); - - for (int rank = 0; rank < num_ranks_; rank++) { - RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); - - // create a pool memory resource for each device - auto old_mr = rmm::mr::get_current_device_resource(); - per_device_pools_.push_back(std::make_unique( - old_mr, rmm::percent_of_free_device_memory(percent_of_free_memory_))); - rmm::cuda_device_id id(device_ids_[rank]); - rmm::mr::set_per_device_resource(id, per_device_pools_.back().get()); - - // create a device resource handle for each device - device_resources_.emplace_back(); - - // add NCCL communications to the device resource handle - raft::comms::build_comms_nccl_only( - &device_resources_[rank], nccl_comms_[rank], num_ranks_, rank); - } - - for (int rank = 0; rank < num_ranks_; rank++) { - RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); - raft::resource::sync_stream(device_resources_[rank]); - } - } - - const raft::device_resources& set_current_device_to_root_rank() const - { - int root_device_id = device_ids_[root_rank_]; - RAFT_CUDA_TRY(cudaSetDevice(root_device_id)); - return device_resources_[root_rank_]; - } - - ~nccl_clique() - { -#pragma omp parallel for // necessary to avoid hangs - for (int rank = 0; rank < num_ranks_; rank++) { - cudaSetDevice(device_ids_[rank]); - ncclCommDestroy(nccl_comms_[rank]); - rmm::cuda_device_id id(device_ids_[rank]); - rmm::mr::set_per_device_resource(id, nullptr); - } - } - - int root_rank_; - int num_ranks_; - int percent_of_free_memory_; - std::vector device_ids_; - std::vector nccl_comms_; - std::vector> per_device_pools_; - std::vector device_resources_; -}; - -} // namespace raft::comms diff --git a/cpp/include/raft/core/device_resources_snmg.hpp b/cpp/include/raft/core/device_resources_snmg.hpp new file mode 100644 index 0000000000..f20a81a1c6 --- /dev/null +++ b/cpp/include/raft/core/device_resources_snmg.hpp @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +#include +#include + +/** + * @brief Error checking macro for NCCL runtime API functions. + * + * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an + * exception detailing the NCCL error that occurred + */ +#define RAFT_NCCL_TRY(call) \ + do { \ + ncclResult_t const status = (call); \ + if (ncclSuccess != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "NCCL error encountered at: ", \ + "call='%s', Reason=%d:%s", \ + #call, \ + status, \ + ncclGetErrorString(status)); \ + throw raft::logic_error(msg); \ + } \ + } while (0); + +namespace raft { + +/** + * @brief SNMG (single-node multi-GPU) resource container object that stores a NCCL clique and all + * necessary resources used for calling device functions, cuda kernels, libraries and/or NCCL + * communications on each GPU. Note the `device_resources_snmg` object can also be used as a classic + * `device_resources` object. The associated resources will be the ones of the GPU used during + * object instantiation and a GPU switch operation will be ordered during the retrieval of said + * resources. + * + * The `device_resources_snmg` class is intended to be used in a single process to manage several + * GPUs. Please note that NCCL communications are the responsibility of the user. Blocking NCCL + * calls will sometimes require the use of several threads to avoid hangs. + */ +class device_resources_snmg : public device_resources { + public: + /** + * @brief Construct a SNMG resources instance with all available GPUs + */ + device_resources_snmg() : device_resources(), root_rank_(0) + { + cudaGetDevice(&main_gpu_id_); + + int num_ranks; + RAFT_CUDA_TRY(cudaGetDeviceCount(&num_ranks)); + device_ids_.resize(num_ranks); + std::iota(device_ids_.begin(), device_ids_.end(), 0); + nccl_comms_.resize(num_ranks); + initialize(); + } + + /** + * @brief Construct a SNMG resources instance with a subset of available GPUs + * + * @param[in] device_ids List of device IDs to be used by the NCCL clique + */ + device_resources_snmg(const std::vector& device_ids) + : device_resources(), root_rank_(0), device_ids_(device_ids), nccl_comms_(device_ids.size()) + { + cudaGetDevice(&main_gpu_id_); + + initialize(); + } + + /** + * @brief SNMG resources instance copy constructor + * + * @param[in] clique A SNMG resources instance + */ + device_resources_snmg(const device_resources_snmg& clique) + : device_resources(clique), + root_rank_(clique.root_rank_), + main_gpu_id_(clique.main_gpu_id_), + device_ids_(clique.device_ids_), + nccl_comms_(clique.nccl_comms_), + device_resources_(clique.device_resources_) + { + } + + device_resources_snmg(device_resources_snmg&&) = delete; + device_resources_snmg& operator=(device_resources_snmg&&) = delete; + + /** + * @brief Set root rank of NCCL clique + */ + inline int set_root_rank(int rank) { this->root_rank_ = rank; } + + /** + * @brief Get root rank of NCCL clique + */ + inline int get_root_rank() const { return this->root_rank_; } + + /** + * @brief Get number of ranks in NCCL clique + */ + inline int get_num_ranks() const { return this->device_ids_.size(); } + + /** + * @brief Get device ID of rank in NCCL clique + */ + inline int get_device_id(int rank) const { return this->device_ids_[rank]; } + + /** + * @brief Get NCCL comm object of rank in NCCL clique + */ + inline ncclComm_t get_nccl_comm(int rank) const { return this->nccl_comms_[rank]; } + + /** + * @brief Get raft::device_resources object of rank in NCCL clique + */ + inline const raft::device_resources& get_device_resources(int rank) const + { + return this->device_resources_[rank]; + } + + /** + * @brief Set current device ID to root rank and return its raft::device_resources object + */ + inline const raft::device_resources& set_current_device_to_root_rank() const + { + return set_current_device_to_rank(get_root_rank()); + } + + /** + * @brief Set current device ID to rank and return its raft::device_resources object + */ + inline const raft::device_resources& set_current_device_to_rank(int rank) const + { + RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); + return get_device_resources(rank); + } + + /** + * @brief Set a memory pool on all GPUs of the clique + */ + void set_memory_pool(int percent_of_free_memory) const + { + for (int rank = 0; rank < get_num_ranks(); rank++) { + RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); + size_t limit = + rmm::percent_of_free_device_memory(percent_of_free_memory); // check limit for each device + raft::resource::set_workspace_to_pool_resource(get_device_resources(rank), limit); + } + cudaSetDevice(this->main_gpu_id_); + } + + bool has_resource_factory(resource::resource_type resource_type) const override + { + cudaSetDevice(this->main_gpu_id_); + return raft::resources::has_resource_factory(resource_type); + } + + /** Destroys all held-up resources */ + ~device_resources_snmg() + { +#pragma omp parallel for // necessary to avoid hangs + for (int rank = 0; rank < get_num_ranks(); rank++) { + RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); + RAFT_NCCL_TRY(ncclCommDestroy(get_nccl_comm(rank))); + } + cudaSetDevice(this->main_gpu_id_); + } + + private: + /** + * @brief Initializes the NCCL clique and raft::device_resources objects + */ + void initialize() + { + RAFT_NCCL_TRY(ncclCommInitAll(nccl_comms_.data(), get_num_ranks(), device_ids_.data())); + + for (int rank = 0; rank < get_num_ranks(); rank++) { + RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); + device_resources_.emplace_back(); + + // ideally add the ncclComm_t to the device_resources object with + // raft::comms::build_comms_nccl_only + } + cudaSetDevice(this->main_gpu_id_); + } + + int root_rank_; + int main_gpu_id_; + std::vector device_ids_; + std::vector nccl_comms_; + std::vector device_resources_; + +}; // class device_resources_snmg + +} // namespace raft diff --git a/cpp/include/raft/core/resource/nccl_clique.hpp b/cpp/include/raft/core/resource/nccl_clique.hpp deleted file mode 100644 index edda5043ae..0000000000 --- a/cpp/include/raft/core/resource/nccl_clique.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include - -#include - -namespace raft::resource { - -class nccl_clique_resource : public resource { - public: - nccl_clique_resource() : clique_(std::make_unique()) {} - ~nccl_clique_resource() override {} - void* get_resource() override { return clique_.get(); } - - private: - std::unique_ptr clique_; -}; - -/** Factory that knows how to construct a specific raft::resource to populate the res_t. */ -class nccl_clique_resource_factory : public resource_factory { - public: - resource_type get_resource_type() override { return resource_type::NCCL_CLIQUE; } - resource* make_resource() override { return new nccl_clique_resource(); } -}; - -/** - * @defgroup nccl_clique_resource resource functions - * @{ - */ - -/** - * Retrieves a NCCL clique from raft res if it exists, otherwise initializes it and return it. - * - * @param[in] res the raft resources object - * @return NCCL clique - */ -inline const raft::comms::nccl_clique& get_nccl_clique(resources const& res) -{ - if (!res.has_resource_factory(resource_type::NCCL_CLIQUE)) { - res.add_resource_factory(std::make_shared()); - } - return *res.get_resource(resource_type::NCCL_CLIQUE); -}; - -/** - * @} - */ - -} // namespace raft::resource diff --git a/cpp/include/raft/core/resources.hpp b/cpp/include/raft/core/resources.hpp index b0827d8e11..44525edb23 100644 --- a/cpp/include/raft/core/resources.hpp +++ b/cpp/include/raft/core/resources.hpp @@ -72,6 +72,7 @@ class resources { resources(const resources& res) : factories_(res.factories_), resources_(res.resources_) {} resources(resources&&) = delete; resources& operator=(resources&&) = delete; + virtual ~resources() {} /** * @brief Returns true if a resource_factory has been registered for the @@ -79,7 +80,7 @@ class resources { * @param resource_type resource type to check * @return true if resource_factory is registered for the given resource_type */ - bool has_resource_factory(resource::resource_type resource_type) const + virtual bool has_resource_factory(resource::resource_type resource_type) const { std::lock_guard _(mutex_); return factories_.at(resource_type).first != resource::resource_type::LAST_KEY; diff --git a/docs/source/cpp_api/core_resources.rst b/docs/source/cpp_api/core_resources.rst index 0da11acae6..3c242af848 100644 --- a/docs/source/cpp_api/core_resources.rst +++ b/docs/source/cpp_api/core_resources.rst @@ -55,6 +55,23 @@ namespace *raft::core* :project: RAFT :members: +SNMG Device Resources +--------------------- + +The `raft::device_resources_snmg` provides a convenient way to design SNMG +(single-node multi-GPU) algorithms. It initiates device-related resources +for a set of devices forming clique. This includes NCCL communications. +GPUs can be addressed and exchanges be made over multiple threads +for performance or convenience. + +``#include `` + +namespace *raft::core* + +.. doxygenclass:: raft::device_resources_snmg + :project: RAFT + :members: + Resource Functions ------------------ From 8299f17621a26802714f5cb219f020c2783c8b6d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 16 Jan 2025 17:15:41 -0600 Subject: [PATCH 23/37] introduce libraft wheels (#2531) Replaces #2306, contributes to https://github.com/rapidsai/build-planning/issues/33. Proposes packaging `libraft` as a wheel, which is then re-used by: * `pylibraft-cu{11,12}` and `raft-cu{11,12}` (this PR) * `libcugraph-cu{11,12}`, `pylibcugraph-cu{11,12}`, and `cugraph-cu{11,12}` in https://github.com/rapidsai/cugraph/pull/4804 * `libcuml-cu{11,12}` and `cuml-cu{11,12}` in https://github.com/rapidsai/cuml/pull/6199 As part of this, also proposes: * introducing a new CMake option, `RAFT_COMPILE_DYNAMIC_ONLY`, to allow building/installing only the dynamic shared library (i.e. skipping the static library) * enforcing `rapids-cmake`'s preferred CMake style (https://github.com/rapidsai/raft/pull/2531#discussion_r1917039870) * making wheel-building CI jobs always depend on other wheel-building CI jobs, not tests or `*-publish` (to reduce end-to-end CI time) ## Notes for Reviewers ### Benefits of these changes * smaller wheels (see "Size Changes" below) * faster compile times (no more re-compiling RAFT in cuGraph and cuML CI) * other benefits mentioned in https://github.com/rapidsai/build-planning/issues/33 ### Wheel contents `libraft`: * `libraft.so` (shared library) * RAFT headers * vendored dependencies (`fmt`, CCCL, `cuco`, `cute`, `cutlass`) `pylibraft`: * `pylibraft` Python / Cython code and compiled Cython extensions `raft-dask`: * `raft-dask` Python / Cython code and compiled Cython extension ### Dependency Flows In short.... `libraft` contains a `libraft.so` dynamic library and the headers to link against it. * Anything that needs to link against RAFT at build time pulls in `libraft` wheels as a build dependency. * Anything that needs RAFT's symbols at runtime pulls it in as a runtime dependency, and calls `libraft.load_library()`. For more details and some flowcharts, see https://github.com/rapidsai/build-planning/issues/33#issuecomment-2590129852 ### Size changes (CUDA 12, Python 3.12, x86_64) | wheel | num files (before) | num files (these PRs) | size (before) | size (these PRs) | |:---------------:|------------------:|-----------------:|--------------:|-------------:| | `libraft`. | --- | 3169 | --- | 19M | | `pylibraft` | 64 | 63 | 11M | 1M | | `raft-dask` | 29 | 28 | 188M | 188M | | `libcugraph` | --- | 1762 | --- | 903M | | `pylibcugraph` | 190 | 187 | 901M | 2M | | `cugraph` | 315 | 313 | 899M | 3.0M | | `libcuml` | --- | 1766 | --- | 289M | | `cuml` | 442 | --- | 517M | --- | |**TOTAL** | **1,040** | **7,268** | **2,516M** | **1,405M** | *NOTES: size = compressed, "before" = 2025-01-13 nightlies*
how I calculated those (click me) * `cugraph`: nightly commit = https://github.com/rapidsai/cugraph/commit/8507cbf63db2f349136b266d3e6e787b189f45a0, PR = https://github.com/rapidsai/cugraph/pull/4804 * `cuml`: nightly commit = https://github.com/rapidsai/cuml/commit/7c715c494dff71274d0fdec774bdee12a7e78827, PR = https://github.com/rapidsai/cuml/pull/6199 * `raft`: nightly commit = https://github.com/rapidsai/raft/commit/1b62c4117a35b11ce3c830daae248e32ebf75e3f, PR = this PR ```shell docker run \ --rm \ --network host \ --env RAPIDS_NIGHTLY_DATE=2025-01-13 \ --env CUGRAPH_NIGHTLY_SHA=8507cbf63db2f349136b266d3e6e787b189f45a0 \ --env CUGRAPH_PR="pull-request/4804" \ --env CUGRAPH_PR_SHA="2ef32eaa006a84c0bd16220bb8e8af34198fbee8" \ --env CUML_NIGHTLY_SHA=7c715c494dff71274d0fdec774bdee12a7e78827 \ --env CUML_PR="pull-request/6199" \ --env CUML_PR_SHA="2ef32eaa006a84c0bd16220bb8e8af34198fbee8" \ --env RAFT_NIGHTLY_SHA=1b62c4117a35b11ce3c830daae248e32ebf75e3f \ --env RAFT_PR="pull-request/2531" \ --env RAFT_PR_SHA="0d6597b08919f2aae8ac268f1a68d6a8fe5beb4e" \ --env RAPIDS_PY_CUDA_SUFFIX=cu12 \ --env WHEEL_DIR_BEFORE=/tmp/wheels-before \ --env WHEEL_DIR_AFTER=/tmp/wheels-after \ -it rapidsai/ci-wheel:cuda12.5.1-rockylinux8-py3.12 \ bash # --- nightly wheels --- # mkdir -p ./wheels-before export RAPIDS_BUILD_TYPE=branch export RAPIDS_REF_NAME="branch-25.02" # pylibraft RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/raft \ RAPIDS_SHA=${RAFT_NIGHTLY_SHA} \ rapids-download-wheels-from-s3 python ./wheels-before # raft-dask RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/raft \ RAPIDS_SHA=${RAFT_NIGHTLY_SHA} \ rapids-download-wheels-from-s3 python ./wheels-before # cugraph RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cugraph \ RAPIDS_SHA=${CUGRAPH_NIGHTLY_SHA} \ rapids-download-wheels-from-s3 python ./wheels-before # pylibcugraph RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cugraph \ RAPIDS_SHA=${CUGRAPH_NIGHTLY_SHA} \ rapids-download-wheels-from-s3 python ./wheels-before # cuml RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cuml \ RAPIDS_SHA=${CUML_NIGHTLY_SHA} \ rapids-download-wheels-from-s3 python ./wheels-before # --- wheels from CI --- # mkdir -p ./wheels-after export RAPIDS_BUILD_TYPE="pull-request" # libraft RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/raft \ RAPIDS_REF_NAME="${RAFT_PR}" \ RAPIDS_SHA="${RAFT_PR_SHA}" \ rapids-download-wheels-from-s3 cpp ./wheels-after # pylibraft RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/raft \ RAPIDS_REF_NAME="${RAFT_PR}" \ RAPIDS_SHA="${RAFT_PR_SHA}" \ rapids-download-wheels-from-s3 python ./wheels-after # raft-dask RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/raft \ RAPIDS_REF_NAME="${RAFT_PR}" \ RAPIDS_SHA="${RAFT_PR_SHA}" \ rapids-download-wheels-from-s3 python ./wheels-after # libcugraph RAPIDS_PY_WHEEL_NAME="libcugraph_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cugraph \ RAPIDS_REF_NAME="${CUGRAPH_PR}" \ RAPIDS_SHA="${CUGRAPH_PR_SHA}" \ rapids-download-wheels-from-s3 cpp ./wheels-after # pylibcugraph RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cugraph \ RAPIDS_REF_NAME="${CUGRAPH_PR}" \ RAPIDS_SHA="${CUGRAPH_PR_SHA}" \ rapids-download-wheels-from-s3 python ./wheels-after # cugraph RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cugraph \ RAPIDS_REF_NAME="${CUGRAPH_PR}" \ RAPIDS_SHA="${CUGRAPH_PR_SHA}" \ rapids-download-wheels-from-s3 python ./wheels-after # libcuml RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cuml \ RAPIDS_REF_NAME="${CUML_PR}" \ RAPIDS_SHA="${CUML_PR_SHA}" \ rapids-download-wheels-from-s3 cpp ./wheels-after # cuml RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" \ RAPIDS_REPOSITORY=rapidsai/cuml \ RAPIDS_REF_NAME="${CUML_PR}" \ RAPIDS_SHA="${CUML_PR_SHA}" \ rapids-download-wheels-from-s3 python ./wheels-after pip install pydistcheck pydistcheck \ --inspect \ --select 'distro-too-large-compressed' \ ./wheels-before/*.whl \ | grep -E '^checking|files: | compressed' \ > ./before.txt # get more exact sizes du -sh ./wheels-before/* pydistcheck \ --inspect \ --select 'distro-too-large-compressed' \ ./wheels-after/*.whl \ | grep -E '^checking|files: | compressed' \ > ./after.txt # get more exact sizes du -sh ./wheels-after/* ```
### How I tested this These other PRs: * https://github.com/rapidsai/devcontainers/pull/435 * https://github.com/rapidsai/cugraph-gnn/pull/110 * https://github.com/rapidsai/cuml/pull/6199 * https://github.com/rapidsai/cugraph/pull/4804 --- .github/workflows/build.yaml | 26 ++++ .github/workflows/pr.yaml | 17 ++- build.sh | 7 +- ci/build_wheel.sh | 9 +- ci/build_wheel_libraft.sh | 43 +++++++ ci/build_wheel_pylibraft.sh | 21 ++-- ci/build_wheel_raft_dask.sh | 14 ++- ci/check_style.sh | 7 ++ ci/release/update-version.sh | 2 + ci/test_wheel_pylibraft.sh | 8 +- ci/test_wheel_raft_dask.sh | 8 +- ci/validate_wheel.sh | 16 +-- cpp/CMakeLists.txt | 73 +++++++---- cpp/cmake/modules/ConfigureCUDA.cmake | 4 +- dependencies.yaml | 97 +++++++++++++-- python/libraft/CMakeLists.txt | 65 ++++++++++ python/libraft/LICENSE | 1 + python/libraft/README.md | 1 + python/libraft/libraft/VERSION | 1 + python/libraft/libraft/__init__.py | 16 +++ python/libraft/libraft/_version.py | 33 +++++ python/libraft/libraft/load.py | 80 ++++++++++++ python/libraft/pyproject.toml | 115 ++++++++++++++++++ python/pylibraft/CMakeLists.txt | 59 +-------- python/pylibraft/pylibraft/__init__.py | 11 ++ .../pylibraft/pylibraft/common/CMakeLists.txt | 2 +- .../pylibraft/pylibraft/random/CMakeLists.txt | 2 +- .../pylibraft/sparse/linalg/CMakeLists.txt | 2 +- python/pylibraft/pyproject.toml | 13 +- python/raft-dask/CMakeLists.txt | 32 +---- .../raft-dask/cmake/thirdparty/get_ucxx.cmake | 4 +- python/raft-dask/pyproject.toml | 3 + python/raft-dask/raft_dask/__init__.py | 15 ++- .../raft-dask/raft_dask/common/CMakeLists.txt | 3 +- .../raft_dask/include_test/CMakeLists.txt | 3 +- rapids_config.cmake | 6 +- 36 files changed, 643 insertions(+), 176 deletions(-) create mode 100755 ci/build_wheel_libraft.sh create mode 100644 python/libraft/CMakeLists.txt create mode 120000 python/libraft/LICENSE create mode 120000 python/libraft/README.md create mode 120000 python/libraft/libraft/VERSION create mode 100644 python/libraft/libraft/__init__.py create mode 100644 python/libraft/libraft/_version.py create mode 100644 python/libraft/libraft/load.py create mode 100644 python/libraft/pyproject.toml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 5f80d8cfda..d484bcae22 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -66,7 +66,30 @@ jobs: node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" sha: ${{ inputs.sha }} + wheel-build-libraft: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_libraft.sh + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + wheel-publish-libraft: + needs: wheel-build-libraft + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: libraft + package-type: cpp wheel-build-pylibraft: + needs: wheel-build-libraft secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: @@ -85,7 +108,9 @@ jobs: sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: pylibraft + package-type: python wheel-build-raft-dask: + needs: wheel-build-libraft secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: @@ -104,3 +129,4 @@ jobs: sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: raft_dask + package-type: python diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index a270df1dfa..9a51c783e9 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -21,6 +21,7 @@ jobs: - conda-python-build - conda-python-tests - docs-build + - wheel-build-libraft - wheel-build-pylibraft - wheel-tests-pylibraft - wheel-build-raft-dask @@ -116,10 +117,22 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" - wheel-build-pylibraft: + wheel-build-libraft: needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + with: + build_type: pull-request + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_libraft.sh + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + wheel-build-pylibraft: + needs: [checks, wheel-build-libraft] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh @@ -132,7 +145,7 @@ jobs: build_type: pull-request script: ci/test_wheel_pylibraft.sh wheel-build-raft-dask: - needs: wheel-tests-pylibraft + needs: [checks, wheel-build-libraft] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: diff --git a/build.sh b/build.sh index a95cb8ee23..de3ebfa3c5 100755 --- a/build.sh +++ b/build.sh @@ -347,13 +347,8 @@ if [[ ${CMAKE_TARGET} == "" ]]; then CMAKE_TARGET="all" fi -# Append `-DFIND_RAFT_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option. -SKBUILD_EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}" -if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_RAFT_CPP"* ]]; then - SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS} -DFIND_RAFT_CPP=ON" -fi # Replace spaces with semicolons in SKBUILD_EXTRA_CMAKE_ARGS -SKBUILD_EXTRA_CMAKE_ARGS=$(echo ${SKBUILD_EXTRA_CMAKE_ARGS} | sed 's/ /;/g') +SKBUILD_EXTRA_CMAKE_ARGS=$(echo ${EXTRA_CMAKE_ARGS} | sed 's/ /;/g') # If clean given, run it prior to any other steps if (( ${CLEAN} == 1 )); then diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 326ee9a4c7..4c295c416e 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -5,6 +5,7 @@ set -euo pipefail package_name=$1 package_dir=$2 +package_type=$3 underscore_package_name=$(echo "${package_name}" | tr "-" "_") # Clear out system ucx files to ensure that we're getting ucx from the wheel. @@ -39,6 +40,12 @@ case "${RAPIDS_CUDA_VERSION}" in ;; esac +if [[ ${package_name} != "libraft" ]]; then + EXCLUDE_ARGS+=( + --exclude "libraft.so" + ) +fi + sccache --zero-stats rapids-logger "Building '${package_name}' wheel" @@ -55,4 +62,4 @@ sccache --show-adv-stats mkdir -p final_dist python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/* -RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist +RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_type} final_dist diff --git a/ci/build_wheel_libraft.sh b/ci/build_wheel_libraft.sh new file mode 100755 index 0000000000..825a5124a8 --- /dev/null +++ b/ci/build_wheel_libraft.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_name="libraft" +package_dir="python/libraft" + +rapids-logger "Generating build requirements" +matrix_selectors="cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" + +rapids-dependency-file-generator \ + --output requirements \ + --file-key "py_build_${package_name}" \ + --file-key "py_rapids_build_${package_name}" \ + --matrix "${matrix_selectors}" \ +| tee /tmp/requirements-build.txt + +rapids-logger "Installing build requirements" +python -m pip install \ + -v \ + --prefer-binary \ + -r /tmp/requirements-build.txt + +# build with '--no-build-isolation', for better sccache hit rate +# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) +export PIP_NO_BUILD_ISOLATION=0 + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +case "${RAPIDS_CUDA_VERSION}" in + 12.*) + EXTRA_CMAKE_ARGS="-DUSE_CUDA_MATH_WHEELS=ON" + ;; + 11.*) + EXTRA_CMAKE_ARGS="-DUSE_CUDA_MATH_WHEELS=OFF" + ;; +esac + +export SKBUILD_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}" + +ci/build_wheel.sh libraft ${package_dir} cpp +ci/validate_wheel.sh ${package_dir} final_dist libraft diff --git a/ci/build_wheel_pylibraft.sh b/ci/build_wheel_pylibraft.sh index dd62ab5399..6f74e0e8c5 100755 --- a/ci/build_wheel_pylibraft.sh +++ b/ci/build_wheel_pylibraft.sh @@ -5,17 +5,16 @@ set -euo pipefail package_dir="python/pylibraft" -case "${RAPIDS_CUDA_VERSION}" in - 12.*) - EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" - ;; - 11.*) - EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF" - ;; -esac +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -# Set up skbuild options. Enable sccache in skbuild config options -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_RAFT_CPP=OFF${EXTRA_CMAKE_ARGS}" +# Downloads libraft wheels from this current build, +# then ensures 'pylibraft' wheel builds always use the 'libraft' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libraft_dist +echo "libraft-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libraft_dist/libraft_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" -ci/build_wheel.sh pylibraft ${package_dir} +ci/build_wheel.sh pylibraft ${package_dir} python ci/validate_wheel.sh ${package_dir} final_dist pylibraft diff --git a/ci/build_wheel_raft_dask.sh b/ci/build_wheel_raft_dask.sh index d49d131abf..0cacb6fe30 100755 --- a/ci/build_wheel_raft_dask.sh +++ b/ci/build_wheel_raft_dask.sh @@ -5,8 +5,16 @@ set -euo pipefail package_dir="python/raft-dask" -# Set up skbuild options. Enable sccache in skbuild config options -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_RAFT_CPP=OFF" +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -ci/build_wheel.sh raft-dask ${package_dir} +# Downloads libraft wheels from this current build, +# then ensures 'raft-dask' wheel builds always use the 'libraft' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libraft_dist +echo "libraft-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libraft_dist/libraft_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" + +ci/build_wheel.sh raft-dask ${package_dir} python ci/validate_wheel.sh ${package_dir} final_dist raft-dask diff --git a/ci/check_style.sh b/ci/check_style.sh index d7ba4cae25..e0c30a2d41 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -14,5 +14,12 @@ rapids-dependency-file-generator \ rapids-mamba-retry env create --yes -f env.yaml -n checks conda activate checks +# get config for cmake-format checks +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" +FORMAT_FILE_URL="https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/cmake-format-rapids-cmake.json" +export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json +mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE}) +wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL} + # Run pre-commit checks pre-commit run --all-files --show-diff-on-failure diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index a70fed9ec8..1ab9157b89 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -43,6 +43,8 @@ echo "${NEXT_FULL_TAG}" > VERSION DEPENDENCIES=( dask-cuda + libraft + librmm pylibraft rmm rapids-dask-dependency diff --git a/ci/test_wheel_pylibraft.sh b/ci/test_wheel_pylibraft.sh index b38f5a690b..1e0b34d609 100755 --- a/ci/test_wheel_pylibraft.sh +++ b/ci/test_wheel_pylibraft.sh @@ -5,9 +5,13 @@ set -euo pipefail mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libraft-dep +RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + # echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install $(echo ./dist/pylibraft*.whl)[test] +python -m pip install \ + ./local-libraft-dep/libraft*.whl \ + "$(echo ./dist/pylibraft*.whl)[test]" python -m pytest ./python/pylibraft/pylibraft/test diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh index a778a3ec51..011de4d409 100755 --- a/ci/test_wheel_raft_dask.sh +++ b/ci/test_wheel_raft_dask.sh @@ -5,13 +5,13 @@ set -euo pipefail mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist - -# Download the pylibraft built in the previous step -RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibraft-dep +RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libraft-dep +RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./local-pylibraft-dep +RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install -v \ + ./local-libraft-dep/libraft*.whl \ ./local-pylibraft-dep/pylibraft*.whl \ "$(echo ./dist/raft_dask_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 5ef72ad895..ca506af004 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -10,23 +10,17 @@ package_name=$3 RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" # some packages are much larger on CUDA 11 than on CUDA 12 -if [[ "${package_name}" == "raft-dask" ]]; then - PYDISTCHECK_ARGS=( - --max-allowed-size-compressed '200M' - ) -elif [[ "${package_name}" == "pylibraft" ]]; then +PYDISTCHECK_ARGS=() +if [[ "${package_name}" == "libraft" ]]; then if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then - PYDISTCHECK_ARGS=( - --max-allowed-size-compressed '600M' + PYDISTCHECK_ARGS+=( + --max-allowed-size-compressed '750M' ) else - PYDISTCHECK_ARGS=( + PYDISTCHECK_ARGS+=( --max-allowed-size-compressed '100M' ) fi -else - echo "Unsupported package name: ${package_name}" - exit 1 fi cd "${package_dir}" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 621f9fcef2..eb7e8540f0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -65,9 +65,12 @@ set(RAFT_COMPILE_LIBRARY_DEFAULT OFF) if(BUILD_TESTS OR BUILD_PRIMS_BENCH) set(RAFT_COMPILE_LIBRARY_DEFAULT ON) endif() -option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations" +option(RAFT_COMPILE_LIBRARY "Enable building raft library instantiations" ${RAFT_COMPILE_LIBRARY_DEFAULT} ) +option(RAFT_COMPILE_DYNAMIC_ONLY "Only build the shared library and skip the +static library. Has no effect if RAFT_COMPILE_LIBRARY is OFF" OFF +) # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to # have different values for the `Threads::Threads` target. Setting this flag ensures @@ -311,17 +314,23 @@ if(RAFT_COMPILE_LIBRARY) # Make sure not to add the rmm logger twice since it will be brought in as an interface source by # the rmm::rmm_logger_impl target. add_library(raft_lib SHARED $,EXCLUDE,rmm.*logger>) - add_library(raft_lib_static STATIC $,EXCLUDE,rmm.*logger>) + + set(_raft_lib_targets raft_lib) + if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + add_library(raft_lib_static STATIC $,EXCLUDE,rmm.*logger>) + list(APPEND _raft_lib_targets raft_lib_static) + endif() set_target_properties( - raft_lib raft_lib_static + ${_raft_lib_targets} PROPERTIES OUTPUT_NAME raft BUILD_RPATH "\$ORIGIN" INSTALL_RPATH "\$ORIGIN" INTERFACE_POSITION_INDEPENDENT_CODE ON ) - foreach(target raft_lib raft_lib_static raft_objs) + list(APPEND _raft_lib_targets raft_objs) + foreach(target IN LISTS _raft_lib_targets) target_link_libraries( ${target} PUBLIC raft::raft @@ -336,7 +345,9 @@ if(RAFT_COMPILE_LIBRARY) target_link_options(${target} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") endforeach() target_link_libraries(raft_lib PRIVATE rmm::rmm_logger_impl raft_logger_impl) - target_link_libraries(raft_lib_static PRIVATE rmm::rmm_logger_impl raft_logger_impl) + if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + target_link_libraries(raft_lib_static PRIVATE rmm::rmm_logger_impl raft_logger_impl) + endif() endif() if(TARGET raft_lib AND (NOT TARGET raft::raft_lib)) @@ -348,20 +359,22 @@ target_link_libraries(raft_compiled INTERFACE raft::raft $ -) + target_link_libraries( + raft_compiled_static INTERFACE raft::raft $ + ) +endif() # ################################################################################################## # * raft_distributed ------------------------------------------------------------------------------- @@ -410,8 +423,12 @@ install( EXPORT raft-exports ) +set(_raft_compiled_install_targets raft_compiled) +if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + list(APPEND _raft_compiled_install_targets raft_compiled_static) +endif() install( - TARGETS raft_compiled raft_compiled_static + TARGETS ${_raft_compiled_install_targets} DESTINATION ${lib_dir} COMPONENT raft EXPORT raft-compiled-exports @@ -424,12 +441,14 @@ if(TARGET raft_lib) COMPONENT compiled EXPORT raft-compiled-lib-exports ) - install( - TARGETS raft_lib_static - DESTINATION ${lib_dir} - COMPONENT compiled-static - EXPORT raft-compiled-static-lib-exports - ) + if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + install( + TARGETS raft_lib_static + DESTINATION ${lib_dir} + COMPONENT compiled-static + EXPORT raft-compiled-static-lib-exports + ) + endif() install( DIRECTORY include/raft_runtime DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} @@ -500,8 +519,12 @@ endif() set(raft_components compiled distributed) set(raft_export_sets raft-compiled-exports raft-distributed-exports) if(TARGET raft_lib) - list(APPEND raft_components compiled compiled-static) - list(APPEND raft_export_sets raft-compiled-lib-exports raft-compiled-static-lib-exports) + list(APPEND raft_components compiled) + list(APPEND raft_export_sets raft-compiled-lib-exports) + if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + list(APPEND raft_components compiled-static) + list(APPEND raft_export_sets raft-compiled-static-lib-exports) + endif() endif() string( diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index b364d8418d..25b9b0ddf8 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -14,7 +14,9 @@ if(DISABLE_DEPRECATION_WARNINGS) list(APPEND RAFT_CXX_FLAGS -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS) - list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS) + list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations + -DRAFT_HIDE_DEPRECATION_WARNINGS + ) endif() # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with diff --git a/dependencies.yaml b/dependencies.yaml index 689cf8414c..44c240b6ce 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -6,6 +6,8 @@ files: cuda: ["11.8", "12.5"] arch: [x86_64, aarch64] includes: + - build_common + - build_cython - checks - cuda - cuda_version @@ -15,7 +17,6 @@ files: - depends_on_rmm - develop - docs - - rapids_build - rapids_build_skbuild - run_pylibraft - run_raft_dask @@ -48,6 +49,29 @@ files: - docs - py_version - test_pylibraft + py_build_libraft: + output: pyproject + pyproject_dir: python/libraft + extras: + table: build-system + includes: + - rapids_build_skbuild + py_rapids_build_libraft: + output: pyproject + pyproject_dir: python/libraft + extras: + table: tool.rapids-build-backend + key: requires + includes: + - build_common + - depends_on_librmm + py_run_libraft: + output: pyproject + pyproject_dir: python/libraft + extras: + table: project + includes: + - cuda_wheels py_build_pylibraft: output: pyproject pyproject_dir: python/pylibraft @@ -62,16 +86,19 @@ files: table: tool.rapids-build-backend key: requires includes: + - build_common + - build_cython + - depends_on_libraft + - depends_on_librmm - depends_on_cuda_python - depends_on_rmm - - rapids_build py_run_pylibraft: output: pyproject pyproject_dir: python/pylibraft extras: table: project includes: - - cuda_wheels + - depends_on_libraft - depends_on_cuda_python - depends_on_rmm - run_pylibraft @@ -99,8 +126,11 @@ files: table: tool.rapids-build-backend key: requires includes: + - build_common + - build_cython + - depends_on_libraft + - depends_on_librmm - depends_on_ucx_build - - rapids_build py_run_raft_dask: output: pyproject pyproject_dir: python/raft-dask @@ -108,6 +138,7 @@ files: table: project includes: - depends_on_distributed_ucxx + - depends_on_libraft - run_raft_dask py_test_raft_dask: output: pyproject @@ -135,12 +166,11 @@ dependencies: - output_types: [requirements, pyproject] packages: - scikit-build-core[pyproject]>=0.10.0 - rapids_build: + build_common: common: - output_types: [conda, requirements, pyproject] packages: - &cmake_ver cmake>=3.26.4,!=3.30.0 - - cython>=3.0.0,<3.1.0a0 - ninja - output_types: [conda] packages: @@ -182,7 +212,11 @@ dependencies: packages: [nvcc_linux-64=11.2] - matrix: {cuda: "11.2", arch: aarch64} packages: [nvcc_linux-aarch64=11.2] - + build_cython: + common: + - output_types: [conda, requirements, pyproject] + packages: + - cython>=3.0.0,<3.1.0a0 checks: common: - output_types: [conda, requirements] @@ -471,6 +505,55 @@ dependencies: packages: - distributed-ucxx-cu11==0.42.*,>=0.0.0a0 - {matrix: null, packages: [*distributed_ucxx_unsuffixed]} + depends_on_libraft: + common: + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libraft-cu12==25.2.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libraft-cu11==25.2.*,>=0.0.0a0 + - matrix: + packages: + - libraft==25.2.*,>=0.0.0a0 + depends_on_librmm: + common: + - output_types: conda + packages: + - &librmm_unsuffixed librmm==25.2.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - librmm-cu12==25.2.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - librmm-cu11==25.2.*,>=0.0.0a0 + - matrix: + packages: + - *librmm_unsuffixed depends_on_rmm: common: - output_types: conda diff --git a/python/libraft/CMakeLists.txt b/python/libraft/CMakeLists.txt new file mode 100644 index 0000000000..57efcd61ab --- /dev/null +++ b/python/libraft/CMakeLists.txt @@ -0,0 +1,65 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../../rapids_config.cmake) + +project( + libraft-python + VERSION "${RAPIDS_VERSION}" + LANGUAGES CXX +) + +option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) + +# Check if raft is already available. If so, it is the user's responsibility to ensure that the +# CMake package is also available at build time of the Python raft package. +find_package(raft "${RAPIDS_VERSION}") + +if(raft_FOUND) + return() +endif() + +unset(raft_FOUND) + +# --- CUDA --- # +find_package(CUDAToolkit REQUIRED) +set(CUDA_STATIC_RUNTIME ON) +set(CUDA_STATIC_MATH_LIBRARIES ON) +if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) + set(CUDA_STATIC_MATH_LIBRARIES OFF) +elseif(USE_CUDA_MATH_WHEELS) + message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") +endif() + +# --- RAFT ---# +set(BUILD_TESTS OFF) +set(BUILD_PRIMS_BENCH OFF) +set(RAFT_COMPILE_DYNAMIC_ONLY ON) +set(RAFT_COMPILE_LIBRARY ON) + +add_subdirectory(../../cpp raft-cpp) + +if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) + set_property( + TARGET raft_lib + PROPERTY INSTALL_RPATH + "$ORIGIN/../nvidia/cublas/lib" + "$ORIGIN/../nvidia/curand/lib" + "$ORIGIN/../nvidia/cusolver/lib" + "$ORIGIN/../nvidia/cusparse/lib" + "$ORIGIN/../nvidia/nvjitlink/lib" + ) +endif() diff --git a/python/libraft/LICENSE b/python/libraft/LICENSE new file mode 120000 index 0000000000..30cff7403d --- /dev/null +++ b/python/libraft/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/libraft/README.md b/python/libraft/README.md new file mode 120000 index 0000000000..fe84005413 --- /dev/null +++ b/python/libraft/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/python/libraft/libraft/VERSION b/python/libraft/libraft/VERSION new file mode 120000 index 0000000000..d62dc733ef --- /dev/null +++ b/python/libraft/libraft/VERSION @@ -0,0 +1 @@ +../../../VERSION \ No newline at end of file diff --git a/python/libraft/libraft/__init__.py b/python/libraft/libraft/__init__.py new file mode 100644 index 0000000000..9260f4e67c --- /dev/null +++ b/python/libraft/libraft/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libraft._version import __git_commit__, __version__ +from libraft.load import load_library diff --git a/python/libraft/libraft/_version.py b/python/libraft/libraft/_version.py new file mode 100644 index 0000000000..530bf8bea6 --- /dev/null +++ b/python/libraft/libraft/_version.py @@ -0,0 +1,33 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = ( + importlib.resources.files(__package__) + .joinpath("VERSION") + .read_text() + .strip() +) +try: + __git_commit__ = ( + importlib.resources.files(__package__) + .joinpath("GIT_COMMIT") + .read_text() + .strip() + ) +except FileNotFoundError: + __git_commit__ = "" + +__all__ = ["__git_commit__", "__version__"] diff --git a/python/libraft/libraft/load.py b/python/libraft/libraft/load.py new file mode 100644 index 0000000000..ad3db9e09c --- /dev/null +++ b/python/libraft/libraft/load.py @@ -0,0 +1,80 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ctypes +import os + +# Loading with RTLD_LOCAL adds the library itself to the loader's +# loaded library cache without loading any symbols into the global +# namespace. This allows libraries that express a dependency on +# this library to be loaded later and successfully satisfy this dependency +# without polluting the global symbol table with symbols from +# libraft that could conflict with symbols from other DSOs. +PREFERRED_LOAD_FLAG = ctypes.RTLD_LOCAL + + +def _load_system_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + Raises ``OSError`` if library cannot be loaded. + """ + return ctypes.CDLL(soname, PREFERRED_LOAD_FLAG) + + +def _load_wheel_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + Returns ``None`` if the library cannot be loaded. + """ + if os.path.isfile( + lib := os.path.join(os.path.dirname(__file__), "lib64", soname) + ): + return ctypes.CDLL(lib, PREFERRED_LOAD_FLAG) + return None + + +def load_library(): + """Dynamically load libraft.so and its dependencies""" + prefer_system_installation = ( + os.getenv("RAPIDS_LIBRAFT_PREFER_SYSTEM_LIBRARY", "false").lower() + != "false" + ) + + soname = "libraft.so" + libraft_lib = None + if prefer_system_installation: + # Prefer a system library if one is present to + # avoid clobbering symbols that other packages might expect, but if no + # other library is present use the one in the wheel. + try: + libraft_lib = _load_system_installation(soname) + except OSError: + libraft_lib = _load_wheel_installation(soname) + else: + # Prefer the libraries bundled in this package. If they aren't found + # (which might be the case in builds where the library was prebuilt + # before packaging the wheel), look for a system installation. + try: + libraft_lib = _load_wheel_installation(soname) + if libraft_lib is None: + libraft_lib = _load_system_installation(soname) + except OSError: + # If none of the searches above succeed, just silently return None + # and rely on other mechanisms (like RPATHs on other DSOs) to + # help the loader find the library. + pass + + # The caller almost never needs to do anything with this library, but no + # harm in offering the option since this object at least provides a handle + # to inspect where libraft was loaded from. + return libraft_lib diff --git a/python/libraft/pyproject.toml b/python/libraft/pyproject.toml new file mode 100644 index 0000000000..549a1bf651 --- /dev/null +++ b/python/libraft/pyproject.toml @@ -0,0 +1,115 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] + +requires = [ + "rapids-build-backend>=0.3.0,<0.4.0.dev0", + "scikit-build-core[pyproject]>=0.10.0", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +build-backend = "rapids_build_backend.build" + +[project] +name = "libraft" +dynamic = ["version"] +description = "RAFT: Reusable Algorithms Functions and other Tools (C++)" +readme = { file = "README.md", content-type = "text/markdown" } +authors = [ + { name = "NVIDIA Corporation" }, +] +license = { text = "Apache 2.0" } +requires-python = ">=3.10" +dependencies = [ + "nvidia-cublas", + "nvidia-curand", + "nvidia-cusolver", + "nvidia-cusparse", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +classifiers = [ + "Intended Audience :: Developers", +] + +[project.urls] +Homepage = "https://github.com/rapidsai/raft" +Documentation = "https://docs.rapids.ai/api/raft/stable/" + +[project.entry-points."cmake.prefix"] +libraft = "libraft" + +[tool.isort] +line_length = 79 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +combine_as_imports = true +order_by_type = true +known_first_party = [ + "libraft", +] +default_section = "THIRDPARTY" +sections = [ + "FUTURE", + "STDLIB", + "THIRDPARTY", + "DASK", + "RAPIDS", + "FIRSTPARTY", + "LOCALFOLDER", +] +skip = [ + "thirdparty", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".tox", + ".venv", + "_build", + "buck-out", + "build", + "dist", + "__init__.py", +] + +[tool.scikit-build] +build-dir = "build/{wheel_tag}" +cmake.build-type = "Release" +cmake.version = "CMakeLists.txt" +minimum-version = "build-system.requires" +ninja.make-fallback = true +sdist.reproducible = true +wheel.install-dir = "libraft" +wheel.packages = ["libraft"] +wheel.py-api = "py3" + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "libraft/VERSION" +regex = "(?P.*)" + +[tool.rapids-build-backend] +build-backend = "scikit_build_core.build" +requires = [ + "cmake>=3.26.4,!=3.30.0", + "librmm==25.2.*,>=0.0.0a0", + "ninja", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" + +[tool.pydistcheck] +select = [ + # NOTE: size threshold is managed via CLI args in CI scripts + "distro-too-large-compressed", +] diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index 758c1e4711..83c262dc10 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -27,68 +27,13 @@ project( LANGUAGES CXX CUDA ) -option(FIND_RAFT_CPP "Search for existing RAFT C++ installations before defaulting to local files" - ON -) -option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) - -# If the user requested it we attempt to find RAFT. -if(FIND_RAFT_CPP) - find_package(raft "${RAPIDS_VERSION}" REQUIRED COMPONENTS compiled) - if(NOT TARGET raft::raft_lib) - message( - FATAL_ERROR - "Building against a preexisting libraft library requires the compiled libraft to have been built!" - ) - - endif() -else() - set(raft_FOUND OFF) -endif() +# an installed version of raft contains the other necessary targets (like CCCL and cuco) +find_package(raft "${RAPIDS_VERSION}" REQUIRED COMPONENTS raft compiled) include(rapids-cython-core) -if(NOT raft_FOUND) - find_package(CUDAToolkit REQUIRED) - - set(BUILD_TESTS OFF) - set(BUILD_PRIMS_BENCH OFF) - set(RAFT_COMPILE_LIBRARY ON) - set(CUDA_STATIC_RUNTIME ON) - set(CUDA_STATIC_MATH_LIBRARIES ON) - if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) - set(CUDA_STATIC_MATH_LIBRARIES OFF) - elseif(USE_CUDA_MATH_WHEELS) - message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") - endif() - - add_subdirectory(../../cpp raft-cpp EXCLUDE_FROM_ALL) - - if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) - set_property( - TARGET raft_lib - PROPERTY INSTALL_RPATH - "$ORIGIN/../nvidia/cublas/lib" - "$ORIGIN/../nvidia/curand/lib" - "$ORIGIN/../nvidia/cusolver/lib" - "$ORIGIN/../nvidia/cusparse/lib" - "$ORIGIN/../nvidia/nvjitlink/lib" - ) - endif() - - # When building the C++ libraries from source we must copy libraft.so alongside the - # pairwise_distance and random Cython libraries TODO: when we have a single 'compiled' raft - # library, we shouldn't need this - set(cython_lib_dir pylibraft) - install(TARGETS raft_lib DESTINATION ${cython_lib_dir}) -endif() - rapids_cython_init() add_subdirectory(pylibraft/common) add_subdirectory(pylibraft/random) add_subdirectory(pylibraft/sparse) - -if(DEFINED cython_lib_dir) - rapids_cython_add_rpath_entries(TARGET raft PATHS "${cython_lib_dir}") -endif() diff --git a/python/pylibraft/pylibraft/__init__.py b/python/pylibraft/pylibraft/__init__.py index b0869501f3..a01e02ec33 100644 --- a/python/pylibraft/pylibraft/__init__.py +++ b/python/pylibraft/pylibraft/__init__.py @@ -13,4 +13,15 @@ # limitations under the License. # +# If libraft was installed as a wheel, we must request it to load the library +# symbols. Otherwise, we assume that the library was installed in a system path that ld +# can find. +try: + import libraft +except ModuleNotFoundError: + pass +else: + libraft.load_library() + del libraft + from pylibraft._version import __git_commit__, __version__ diff --git a/python/pylibraft/pylibraft/common/CMakeLists.txt b/python/pylibraft/pylibraft/common/CMakeLists.txt index 53279bfaf7..d1c1acb3aa 100644 --- a/python/pylibraft/pylibraft/common/CMakeLists.txt +++ b/python/pylibraft/pylibraft/common/CMakeLists.txt @@ -20,5 +20,5 @@ set(linked_libraries raft::raft) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS raft MODULE_PREFIX common_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX common_ ) diff --git a/python/pylibraft/pylibraft/random/CMakeLists.txt b/python/pylibraft/pylibraft/random/CMakeLists.txt index 10ff776471..7d61855111 100644 --- a/python/pylibraft/pylibraft/random/CMakeLists.txt +++ b/python/pylibraft/pylibraft/random/CMakeLists.txt @@ -23,5 +23,5 @@ set(linked_libraries raft::raft raft::compiled) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS raft MODULE_PREFIX random_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX random_ ) diff --git a/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt b/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt index ef16981644..7b2c9f6162 100644 --- a/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt +++ b/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt @@ -23,5 +23,5 @@ set(linked_libraries raft::raft raft::compiled) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS raft MODULE_PREFIX sparse_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX sparse_ ) diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index ba454af591..912f1ad947 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -32,11 +32,8 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "cuda-python", + "libraft==25.2.*,>=0.0.0a0", "numpy>=1.23,<3.0a0", - "nvidia-cublas", - "nvidia-curand", - "nvidia-cusolver", - "nvidia-cusparse", "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -124,18 +121,22 @@ requires = [ "cmake>=3.26.4,!=3.30.0", "cuda-python", "cython>=3.0.0,<3.1.0a0", + "libraft==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "ninja", "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" -matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" +matrix-entry = "cuda_suffixed=true" [tool.pydistcheck] select = [ - # NOTE: size threshold is managed via CLI args in CI scripts "distro-too-large-compressed", ] +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] filterwarnings = [ "error", diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt index 9ebbaa5298..1fcb40a58d 100644 --- a/python/raft-dask/CMakeLists.txt +++ b/python/raft-dask/CMakeLists.txt @@ -25,38 +25,16 @@ project( LANGUAGES CXX CUDA ) -option(FIND_RAFT_CPP "Search for existing RAFT C++ installations before defaulting to local files" - OFF -) - rapids_cpm_init() # Once https://github.com/rapidsai/ucxx/issues/173 is resolved we can remove this. find_package(ucx REQUIRED) include(cmake/thirdparty/get_ucxx.cmake) -# If the user requested it we attempt to find RAFT. -if(FIND_RAFT_CPP) - find_package(raft "${RAPIDS_VERSION}" REQUIRED COMPONENTS distributed) -else() - set(raft_FOUND OFF) -endif() - -if(NOT raft_FOUND) - # raft-dask doesn't actually use raft libraries, it just needs the headers, so we can turn off all - # library compilation and we don't need to install anything here. - set(BUILD_TESTS OFF) - set(BUILD_PRIMS_BENCH OFF) - set(RAFT_COMPILE_LIBRARIES OFF) - set(RAFT_COMPILE_DIST_LIBRARY OFF) - set(RAFT_COMPILE_NN_LIBRARY OFF) - set(CUDA_STATIC_RUNTIME ON) - set(CUDA_STATIC_MATH_LIBRARIES ON) - set(RAFT_DASK_UCXX_STATIC ON) - - add_subdirectory(../../cpp raft-cpp EXCLUDE_FROM_ALL) - list(APPEND CMAKE_MODULE_PATH ${CMAKE_BINARY_DIR}/cmake/find_modules) - find_package(NCCL REQUIRED) -endif() +# why these components: +# +# * 'raft' = the headers, needed to link against libraft +# * 'distributed' = needed for NCCL +find_package(raft "${RAPIDS_VERSION}" REQUIRED COMPONENTS raft distributed) include(rapids-cython-core) rapids_cython_init() diff --git a/python/raft-dask/cmake/thirdparty/get_ucxx.cmake b/python/raft-dask/cmake/thirdparty/get_ucxx.cmake index f5daf70f92..e6b9c4aa0e 100644 --- a/python/raft-dask/cmake/thirdparty/get_ucxx.cmake +++ b/python/raft-dask/cmake/thirdparty/get_ucxx.cmake @@ -45,8 +45,8 @@ function(find_and_configure_ucxx) endfunction() # Change pinned tag here to test a commit in CI -# To use a different RAFT locally, set the CMake variable -# CPM_raft_SOURCE=/path/to/local/raft +# To use a different ucxx locally, set the CMake variable +# CPM_ucxx_SOURCE=/path/to/local/ucxx find_and_configure_ucxx(VERSION 0.42 FORK rapidsai PINNED_TAG branch-0.42 diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index cabe8e72a6..d3a26db282 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -33,6 +33,7 @@ requires-python = ">=3.10" dependencies = [ "dask-cuda==25.2.*,>=0.0.0a0", "distributed-ucxx==0.42.*,>=0.0.0a0", + "libraft==25.2.*,>=0.0.0a0", "pylibraft==25.2.*,>=0.0.0a0", "rapids-dask-dependency==25.2.*,>=0.0.0a0", "ucx-py==0.42.*,>=0.0.0a0", @@ -119,6 +120,8 @@ build-backend = "scikit_build_core.build" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.0,<3.1.0a0", + "libraft==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "libucx==1.15.0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/raft-dask/raft_dask/__init__.py b/python/raft-dask/raft_dask/__init__.py index 19a037ae75..78248fad7a 100644 --- a/python/raft-dask/raft_dask/__init__.py +++ b/python/raft-dask/raft_dask/__init__.py @@ -13,8 +13,6 @@ # limitations under the License. # -from raft_dask._version import __git_commit__, __version__ - # If libucx was installed as a wheel, we must request it to load the library symbols. # Otherwise, we assume that the library was installed in a system path that ld can find. try: @@ -24,3 +22,16 @@ else: libucx.load_library() del libucx + +# If libraft was installed as a wheel, we must request it to load the library +# symbols. Otherwise, we assume that the library was installed in a system path that ld +# can find. +try: + import libraft +except ModuleNotFoundError: + pass +else: + libraft.load_library() + del libraft + +from raft_dask._version import __git_commit__, __version__ diff --git a/python/raft-dask/raft_dask/common/CMakeLists.txt b/python/raft-dask/raft_dask/common/CMakeLists.txt index 65d5f06577..1279d5d501 100644 --- a/python/raft-dask/raft_dask/common/CMakeLists.txt +++ b/python/raft-dask/raft_dask/common/CMakeLists.txt @@ -15,6 +15,5 @@ set(cython_sources comms_utils.pyx nccl.pyx) set(linked_libraries raft::raft raft::distributed) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" ASSOCIATED_TARGETS raft LINKED_LIBRARIES "${linked_libraries}" - CXX + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" CXX ) diff --git a/python/raft-dask/raft_dask/include_test/CMakeLists.txt b/python/raft-dask/raft_dask/include_test/CMakeLists.txt index 2ff1cd9150..8839c57b91 100644 --- a/python/raft-dask/raft_dask/include_test/CMakeLists.txt +++ b/python/raft-dask/raft_dask/include_test/CMakeLists.txt @@ -15,6 +15,5 @@ set(cython_sources raft_include_test.pyx) set(linked_libraries raft::raft) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" ASSOCIATED_TARGETS raft LINKED_LIBRARIES "${linked_libraries}" - CXX + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" CXX ) diff --git a/rapids_config.cmake b/rapids_config.cmake index c8077f7f4b..a40d7130c0 100644 --- a/rapids_config.cmake +++ b/rapids_config.cmake @@ -22,13 +22,15 @@ else() string(REPLACE "\n" "\n " _rapids_version_formatted " ${_rapids_version}") message( FATAL_ERROR - "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}") + "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}" + ) endif() if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") file( DOWNLOAD "https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/RAPIDS.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") + "${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake" + ) endif() include("${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") From 097ac45fd98f61109a943b2f33757d77532edb17 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Jan 2025 23:25:14 -0500 Subject: [PATCH 24/37] Revert "Introduction of the `raft::device_resources_snmg` type (#2487)" (#2543) This reverts commit fb6bfe6ee956a5e40295300d453f1261ece3cedf. Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/2543 --- cpp/include/raft/comms/nccl_clique.hpp | 156 +++++++++++++ .../raft/core/device_resources_snmg.hpp | 217 ------------------ .../raft/core/resource/nccl_clique.hpp | 66 ++++++ cpp/include/raft/core/resources.hpp | 3 +- docs/source/cpp_api/core_resources.rst | 17 -- 5 files changed, 223 insertions(+), 236 deletions(-) create mode 100644 cpp/include/raft/comms/nccl_clique.hpp delete mode 100644 cpp/include/raft/core/device_resources_snmg.hpp create mode 100644 cpp/include/raft/core/resource/nccl_clique.hpp diff --git a/cpp/include/raft/comms/nccl_clique.hpp b/cpp/include/raft/comms/nccl_clique.hpp new file mode 100644 index 0000000000..c6520af753 --- /dev/null +++ b/cpp/include/raft/comms/nccl_clique.hpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +/** + * @brief Error checking macro for NCCL runtime API functions. + * + * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an + * exception detailing the NCCL error that occurred + */ +#define RAFT_NCCL_TRY(call) \ + do { \ + ncclResult_t const status = (call); \ + if (ncclSuccess != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "NCCL error encountered at: ", \ + "call='%s', Reason=%d:%s", \ + #call, \ + status, \ + ncclGetErrorString(status)); \ + throw raft::logic_error(msg); \ + } \ + } while (0); + +namespace raft::comms { +void build_comms_nccl_only(raft::resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank); +} + +namespace raft::comms { + +struct nccl_clique { + using pool_mr = rmm::mr::pool_memory_resource; + + /** + * Instantiates a NCCL clique with all available GPUs + * + * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool + * + */ + nccl_clique(int percent_of_free_memory = 80) + : root_rank_(0), + percent_of_free_memory_(percent_of_free_memory), + per_device_pools_(0), + device_resources_(0) + { + cudaGetDeviceCount(&num_ranks_); + device_ids_.resize(num_ranks_); + std::iota(device_ids_.begin(), device_ids_.end(), 0); + nccl_comms_.resize(num_ranks_); + nccl_clique_init(); + } + + /** + * Instantiates a NCCL clique + * + * Usage example: + * @code{.cpp} + * int n_devices; + * cudaGetDeviceCount(&n_devices); + * std::vector device_ids(n_devices); + * std::iota(device_ids.begin(), device_ids.end(), 0); + * cuvs::neighbors::mg::nccl_clique& clique(device_ids); // first device is the root rank + * @endcode + * + * @param[in] device_ids list of device IDs to be used to initiate the clique + * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool + * + */ + nccl_clique(const std::vector& device_ids, int percent_of_free_memory = 80) + : root_rank_(0), + num_ranks_(device_ids.size()), + percent_of_free_memory_(percent_of_free_memory), + device_ids_(device_ids), + nccl_comms_(device_ids.size()), + per_device_pools_(0), + device_resources_(0) + { + nccl_clique_init(); + } + + void nccl_clique_init() + { + RAFT_NCCL_TRY(ncclCommInitAll(nccl_comms_.data(), num_ranks_, device_ids_.data())); + + for (int rank = 0; rank < num_ranks_; rank++) { + RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); + + // create a pool memory resource for each device + auto old_mr = rmm::mr::get_current_device_resource(); + per_device_pools_.push_back(std::make_unique( + old_mr, rmm::percent_of_free_device_memory(percent_of_free_memory_))); + rmm::cuda_device_id id(device_ids_[rank]); + rmm::mr::set_per_device_resource(id, per_device_pools_.back().get()); + + // create a device resource handle for each device + device_resources_.emplace_back(); + + // add NCCL communications to the device resource handle + raft::comms::build_comms_nccl_only( + &device_resources_[rank], nccl_comms_[rank], num_ranks_, rank); + } + + for (int rank = 0; rank < num_ranks_; rank++) { + RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); + raft::resource::sync_stream(device_resources_[rank]); + } + } + + const raft::device_resources& set_current_device_to_root_rank() const + { + int root_device_id = device_ids_[root_rank_]; + RAFT_CUDA_TRY(cudaSetDevice(root_device_id)); + return device_resources_[root_rank_]; + } + + ~nccl_clique() + { +#pragma omp parallel for // necessary to avoid hangs + for (int rank = 0; rank < num_ranks_; rank++) { + cudaSetDevice(device_ids_[rank]); + ncclCommDestroy(nccl_comms_[rank]); + rmm::cuda_device_id id(device_ids_[rank]); + rmm::mr::set_per_device_resource(id, nullptr); + } + } + + int root_rank_; + int num_ranks_; + int percent_of_free_memory_; + std::vector device_ids_; + std::vector nccl_comms_; + std::vector> per_device_pools_; + std::vector device_resources_; +}; + +} // namespace raft::comms diff --git a/cpp/include/raft/core/device_resources_snmg.hpp b/cpp/include/raft/core/device_resources_snmg.hpp deleted file mode 100644 index f20a81a1c6..0000000000 --- a/cpp/include/raft/core/device_resources_snmg.hpp +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include -#include - -#include -#include - -/** - * @brief Error checking macro for NCCL runtime API functions. - * - * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an - * exception detailing the NCCL error that occurred - */ -#define RAFT_NCCL_TRY(call) \ - do { \ - ncclResult_t const status = (call); \ - if (ncclSuccess != status) { \ - std::string msg{}; \ - SET_ERROR_MSG(msg, \ - "NCCL error encountered at: ", \ - "call='%s', Reason=%d:%s", \ - #call, \ - status, \ - ncclGetErrorString(status)); \ - throw raft::logic_error(msg); \ - } \ - } while (0); - -namespace raft { - -/** - * @brief SNMG (single-node multi-GPU) resource container object that stores a NCCL clique and all - * necessary resources used for calling device functions, cuda kernels, libraries and/or NCCL - * communications on each GPU. Note the `device_resources_snmg` object can also be used as a classic - * `device_resources` object. The associated resources will be the ones of the GPU used during - * object instantiation and a GPU switch operation will be ordered during the retrieval of said - * resources. - * - * The `device_resources_snmg` class is intended to be used in a single process to manage several - * GPUs. Please note that NCCL communications are the responsibility of the user. Blocking NCCL - * calls will sometimes require the use of several threads to avoid hangs. - */ -class device_resources_snmg : public device_resources { - public: - /** - * @brief Construct a SNMG resources instance with all available GPUs - */ - device_resources_snmg() : device_resources(), root_rank_(0) - { - cudaGetDevice(&main_gpu_id_); - - int num_ranks; - RAFT_CUDA_TRY(cudaGetDeviceCount(&num_ranks)); - device_ids_.resize(num_ranks); - std::iota(device_ids_.begin(), device_ids_.end(), 0); - nccl_comms_.resize(num_ranks); - initialize(); - } - - /** - * @brief Construct a SNMG resources instance with a subset of available GPUs - * - * @param[in] device_ids List of device IDs to be used by the NCCL clique - */ - device_resources_snmg(const std::vector& device_ids) - : device_resources(), root_rank_(0), device_ids_(device_ids), nccl_comms_(device_ids.size()) - { - cudaGetDevice(&main_gpu_id_); - - initialize(); - } - - /** - * @brief SNMG resources instance copy constructor - * - * @param[in] clique A SNMG resources instance - */ - device_resources_snmg(const device_resources_snmg& clique) - : device_resources(clique), - root_rank_(clique.root_rank_), - main_gpu_id_(clique.main_gpu_id_), - device_ids_(clique.device_ids_), - nccl_comms_(clique.nccl_comms_), - device_resources_(clique.device_resources_) - { - } - - device_resources_snmg(device_resources_snmg&&) = delete; - device_resources_snmg& operator=(device_resources_snmg&&) = delete; - - /** - * @brief Set root rank of NCCL clique - */ - inline int set_root_rank(int rank) { this->root_rank_ = rank; } - - /** - * @brief Get root rank of NCCL clique - */ - inline int get_root_rank() const { return this->root_rank_; } - - /** - * @brief Get number of ranks in NCCL clique - */ - inline int get_num_ranks() const { return this->device_ids_.size(); } - - /** - * @brief Get device ID of rank in NCCL clique - */ - inline int get_device_id(int rank) const { return this->device_ids_[rank]; } - - /** - * @brief Get NCCL comm object of rank in NCCL clique - */ - inline ncclComm_t get_nccl_comm(int rank) const { return this->nccl_comms_[rank]; } - - /** - * @brief Get raft::device_resources object of rank in NCCL clique - */ - inline const raft::device_resources& get_device_resources(int rank) const - { - return this->device_resources_[rank]; - } - - /** - * @brief Set current device ID to root rank and return its raft::device_resources object - */ - inline const raft::device_resources& set_current_device_to_root_rank() const - { - return set_current_device_to_rank(get_root_rank()); - } - - /** - * @brief Set current device ID to rank and return its raft::device_resources object - */ - inline const raft::device_resources& set_current_device_to_rank(int rank) const - { - RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); - return get_device_resources(rank); - } - - /** - * @brief Set a memory pool on all GPUs of the clique - */ - void set_memory_pool(int percent_of_free_memory) const - { - for (int rank = 0; rank < get_num_ranks(); rank++) { - RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); - size_t limit = - rmm::percent_of_free_device_memory(percent_of_free_memory); // check limit for each device - raft::resource::set_workspace_to_pool_resource(get_device_resources(rank), limit); - } - cudaSetDevice(this->main_gpu_id_); - } - - bool has_resource_factory(resource::resource_type resource_type) const override - { - cudaSetDevice(this->main_gpu_id_); - return raft::resources::has_resource_factory(resource_type); - } - - /** Destroys all held-up resources */ - ~device_resources_snmg() - { -#pragma omp parallel for // necessary to avoid hangs - for (int rank = 0; rank < get_num_ranks(); rank++) { - RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); - RAFT_NCCL_TRY(ncclCommDestroy(get_nccl_comm(rank))); - } - cudaSetDevice(this->main_gpu_id_); - } - - private: - /** - * @brief Initializes the NCCL clique and raft::device_resources objects - */ - void initialize() - { - RAFT_NCCL_TRY(ncclCommInitAll(nccl_comms_.data(), get_num_ranks(), device_ids_.data())); - - for (int rank = 0; rank < get_num_ranks(); rank++) { - RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); - device_resources_.emplace_back(); - - // ideally add the ncclComm_t to the device_resources object with - // raft::comms::build_comms_nccl_only - } - cudaSetDevice(this->main_gpu_id_); - } - - int root_rank_; - int main_gpu_id_; - std::vector device_ids_; - std::vector nccl_comms_; - std::vector device_resources_; - -}; // class device_resources_snmg - -} // namespace raft diff --git a/cpp/include/raft/core/resource/nccl_clique.hpp b/cpp/include/raft/core/resource/nccl_clique.hpp new file mode 100644 index 0000000000..edda5043ae --- /dev/null +++ b/cpp/include/raft/core/resource/nccl_clique.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace raft::resource { + +class nccl_clique_resource : public resource { + public: + nccl_clique_resource() : clique_(std::make_unique()) {} + ~nccl_clique_resource() override {} + void* get_resource() override { return clique_.get(); } + + private: + std::unique_ptr clique_; +}; + +/** Factory that knows how to construct a specific raft::resource to populate the res_t. */ +class nccl_clique_resource_factory : public resource_factory { + public: + resource_type get_resource_type() override { return resource_type::NCCL_CLIQUE; } + resource* make_resource() override { return new nccl_clique_resource(); } +}; + +/** + * @defgroup nccl_clique_resource resource functions + * @{ + */ + +/** + * Retrieves a NCCL clique from raft res if it exists, otherwise initializes it and return it. + * + * @param[in] res the raft resources object + * @return NCCL clique + */ +inline const raft::comms::nccl_clique& get_nccl_clique(resources const& res) +{ + if (!res.has_resource_factory(resource_type::NCCL_CLIQUE)) { + res.add_resource_factory(std::make_shared()); + } + return *res.get_resource(resource_type::NCCL_CLIQUE); +}; + +/** + * @} + */ + +} // namespace raft::resource diff --git a/cpp/include/raft/core/resources.hpp b/cpp/include/raft/core/resources.hpp index 44525edb23..b0827d8e11 100644 --- a/cpp/include/raft/core/resources.hpp +++ b/cpp/include/raft/core/resources.hpp @@ -72,7 +72,6 @@ class resources { resources(const resources& res) : factories_(res.factories_), resources_(res.resources_) {} resources(resources&&) = delete; resources& operator=(resources&&) = delete; - virtual ~resources() {} /** * @brief Returns true if a resource_factory has been registered for the @@ -80,7 +79,7 @@ class resources { * @param resource_type resource type to check * @return true if resource_factory is registered for the given resource_type */ - virtual bool has_resource_factory(resource::resource_type resource_type) const + bool has_resource_factory(resource::resource_type resource_type) const { std::lock_guard _(mutex_); return factories_.at(resource_type).first != resource::resource_type::LAST_KEY; diff --git a/docs/source/cpp_api/core_resources.rst b/docs/source/cpp_api/core_resources.rst index 3c242af848..0da11acae6 100644 --- a/docs/source/cpp_api/core_resources.rst +++ b/docs/source/cpp_api/core_resources.rst @@ -55,23 +55,6 @@ namespace *raft::core* :project: RAFT :members: -SNMG Device Resources ---------------------- - -The `raft::device_resources_snmg` provides a convenient way to design SNMG -(single-node multi-GPU) algorithms. It initiates device-related resources -for a set of devices forming clique. This includes NCCL communications. -GPUs can be addressed and exchanges be made over multiple threads -for performance or convenience. - -``#include `` - -namespace *raft::core* - -.. doxygenclass:: raft::device_resources_snmg - :project: RAFT - :members: - Resource Functions ------------------ From 501c8ce3b7b0ff56792921ced23ab140c6cea677 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 17 Jan 2025 14:11:52 -0600 Subject: [PATCH 25/37] Use GCC 13 in CUDA 12 conda builds. (#2539) conda-forge is using GCC 13 for CUDA 12 builds. This PR updates CUDA 12 conda builds to use GCC 13, for alignment. These PRs should be merged in a specific order, see https://github.com/rapidsai/build-planning/issues/129 for details. Authors: - Bradley Dice (https://github.com/bdice) - James Lamb (https://github.com/jameslamb) - https://github.com/jakirkham Approvers: - James Lamb (https://github.com/jameslamb) - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/2539 --- .../all_cuda-118_arch-aarch64.yaml | 2 +- .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-125_arch-aarch64.yaml | 4 +-- .../all_cuda-125_arch-x86_64.yaml | 4 +-- conda/recipes/libraft/conda_build_config.yaml | 14 +++++----- conda/recipes/libraft/meta.yaml | 28 ++++++------------- .../recipes/pylibraft/conda_build_config.yaml | 14 +++++----- conda/recipes/pylibraft/meta.yaml | 6 ++-- .../recipes/raft-dask/conda_build_config.yaml | 14 +++++----- conda/recipes/raft-dask/meta.yaml | 6 ++-- cpp/test/label/label.cu | 4 +-- dependencies.yaml | 18 ++++++++++-- 12 files changed, 57 insertions(+), 59 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 793ca8dc67..ecd9aa1ece 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -55,6 +55,6 @@ dependencies: - spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index a9f839bd03..2f655ae077 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -55,6 +55,6 @@ dependencies: - spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 9d7286bb8e..d790e985fa 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -24,7 +24,7 @@ dependencies: - dask-cuda==25.2.*,>=0.0.0a0 - distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 -- gcc_linux-aarch64=11.* +- gcc_linux-aarch64=13.* - graphviz - ipython - libcublas-dev @@ -51,6 +51,6 @@ dependencies: - spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables -- sysroot_linux-aarch64==2.17 +- sysroot_linux-aarch64==2.28 - ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-125_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index e4ec074ae5..63808d99c0 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -24,7 +24,7 @@ dependencies: - dask-cuda==25.2.*,>=0.0.0a0 - distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 -- gcc_linux-64=11.* +- gcc_linux-64=13.* - graphviz - ipython - libcublas-dev @@ -51,6 +51,6 @@ dependencies: - spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-125_arch-x86_64 diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml index 4857f12cd1..11b16bc2a8 100644 --- a/conda/recipes/libraft/conda_build_config.yaml +++ b/conda/recipes/libraft/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml index 503c4cb6fb..dbde4e3971 100644 --- a/conda/recipes/libraft/meta.yaml +++ b/conda/recipes/libraft/meta.yaml @@ -39,10 +39,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} - librmm @@ -51,7 +49,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -85,11 +83,7 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} - {% endif %} - librmm requirements: host: @@ -130,10 +124,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -145,7 +137,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -196,10 +188,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} requirements: @@ -207,7 +197,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -258,10 +248,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -273,7 +261,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/conda/recipes/pylibraft/conda_build_config.yaml b/conda/recipes/pylibraft/conda_build_config.yaml index 001878ff25..83f5ebcb15 100644 --- a/conda/recipes/pylibraft/conda_build_config.yaml +++ b/conda/recipes/pylibraft/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/pylibraft/meta.yaml b/conda/recipes/pylibraft/meta.yaml index 0b57432402..8f498c7e50 100644 --- a/conda/recipes/pylibraft/meta.yaml +++ b/conda/recipes/pylibraft/meta.yaml @@ -18,10 +18,8 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} - cuda-python @@ -31,7 +29,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index 68140e6bc0..d567266027 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" ucx_py_version: - "0.42.*" diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index 19155166af..29c7f568f1 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -18,10 +18,8 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} - cuda-python @@ -31,7 +29,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/cpp/test/label/label.cu b/cpp/test/label/label.cu index 4c3479182f..34a336de59 100644 --- a/cpp/test/label/label.cu +++ b/cpp/test/label/label.cu @@ -59,8 +59,8 @@ TEST_F(MakeMonotonicTest, Result) ASSERT_TRUE(devArrMatch(actual.data(), expected.data(), m, raft::Compare(), stream)); - delete data_h; - delete expected_h; + delete[] data_h; + delete[] expected_h; } TEST(labelTest, Classlabels) diff --git a/dependencies.yaml b/dependencies.yaml index 44c240b6ce..a2d75fd3d6 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -184,14 +184,28 @@ dependencies: matrices: - matrix: arch: x86_64 + cuda: "11.8" packages: - gcc_linux-64=11.* - - sysroot_linux-64==2.17 + - sysroot_linux-64==2.28 - matrix: arch: aarch64 + cuda: "11.8" packages: - gcc_linux-aarch64=11.* - - sysroot_linux-aarch64==2.17 + - sysroot_linux-aarch64==2.28 + - matrix: + arch: x86_64 + cuda: "12.*" + packages: + - gcc_linux-64=13.* + - sysroot_linux-64==2.28 + - matrix: + arch: aarch64 + cuda: "12.*" + packages: + - gcc_linux-aarch64=13.* + - sysroot_linux-aarch64==2.28 - output_types: conda matrices: - matrix: {cuda: "12.*"} From 596d4b7338e62a92652503cd76feaeaa187ad740 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 21 Jan 2025 23:55:05 -0600 Subject: [PATCH 26/37] use dynamic CUDA wheels on CUDA 11 (#2548) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Contributes to https://github.com/rapidsai/build-planning/issues/137 Follow-up to #2531 . See the linked issue for many more details, but in short... using a dynamically-loaded libraft which has statically-linked cuBLAS causes issues for other libraries. There are now aarch64 CUDA 11 wheels for cuBLAS and other CUDA libraries, so it's possible to have RAFT wheels dynamically link against them. This PR does that. ## Notes for Reviewers This has other side benefits in addition to fixing runtime issues... it also simplifies the wheel-building scripts and CMake, and makes CUDA 11 wheels noticeably smaller ๐Ÿ˜Š Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/raft/pull/2548 --- ci/build_wheel.sh | 27 +++++++++------------------ ci/build_wheel_libraft.sh | 11 ----------- ci/validate_wheel.sh | 15 --------------- dependencies.yaml | 5 ++++- python/libraft/CMakeLists.txt | 31 +++++++++++-------------------- python/libraft/pyproject.toml | 4 +++- 6 files changed, 27 insertions(+), 66 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 4c295c416e..976da98998 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -21,24 +21,15 @@ rapids-generate-version > ./VERSION cd "${package_dir}" -case "${RAPIDS_CUDA_VERSION}" in - 12.*) - EXCLUDE_ARGS=( - --exclude "libcublas.so.12" - --exclude "libcublasLt.so.12" - --exclude "libcurand.so.10" - --exclude "libcusolver.so.11" - --exclude "libcusparse.so.12" - --exclude "libnvJitLink.so.12" - --exclude "libucp.so.0" - ) - ;; - 11.*) - EXCLUDE_ARGS=( - --exclude "libucp.so.0" - ) - ;; -esac +EXCLUDE_ARGS=( + --exclude "libcublas.so.*" + --exclude "libcublasLt.so.*" + --exclude "libcurand.so.*" + --exclude "libcusolver.so.*" + --exclude "libcusparse.so.*" + --exclude "libnvJitLink.so.*" + --exclude "libucp.so.*" +) if [[ ${package_name} != "libraft" ]]; then EXCLUDE_ARGS+=( diff --git a/ci/build_wheel_libraft.sh b/ci/build_wheel_libraft.sh index 825a5124a8..8ff0da1e9a 100755 --- a/ci/build_wheel_libraft.sh +++ b/ci/build_wheel_libraft.sh @@ -28,16 +28,5 @@ export PIP_NO_BUILD_ISOLATION=0 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -case "${RAPIDS_CUDA_VERSION}" in - 12.*) - EXTRA_CMAKE_ARGS="-DUSE_CUDA_MATH_WHEELS=ON" - ;; - 11.*) - EXTRA_CMAKE_ARGS="-DUSE_CUDA_MATH_WHEELS=OFF" - ;; -esac - -export SKBUILD_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}" - ci/build_wheel.sh libraft ${package_dir} cpp ci/validate_wheel.sh ${package_dir} final_dist libraft diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index ca506af004..ec3867aa30 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -9,27 +9,12 @@ package_name=$3 RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" -# some packages are much larger on CUDA 11 than on CUDA 12 -PYDISTCHECK_ARGS=() -if [[ "${package_name}" == "libraft" ]]; then - if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then - PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '750M' - ) - else - PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '100M' - ) - fi -fi - cd "${package_dir}" rapids-logger "validate packages with 'pydistcheck'" pydistcheck \ --inspect \ - "${PYDISTCHECK_ARGS[@]}" \ "$(echo ${wheel_dir_relative_path}/*.whl)" rapids-logger "validate packages with 'twine'" diff --git a/dependencies.yaml b/dependencies.yaml index a2d75fd3d6..b7a0344b1a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -358,11 +358,14 @@ dependencies: - nvidia-curand-cu12 - nvidia-cusolver-cu12 - nvidia-cusparse-cu12 - # CUDA 11 does not provide wheels, so use the system libraries instead - matrix: cuda: "11.*" use_cuda_wheels: "true" packages: + - nvidia-cublas-cu11 + - nvidia-curand-cu11 + - nvidia-cusolver-cu11 + - nvidia-cusparse-cu11 # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels # (e.g. for DLFW and pip devcontainers) - matrix: diff --git a/python/libraft/CMakeLists.txt b/python/libraft/CMakeLists.txt index 57efcd61ab..db81aa9507 100644 --- a/python/libraft/CMakeLists.txt +++ b/python/libraft/CMakeLists.txt @@ -22,8 +22,6 @@ project( LANGUAGES CXX ) -option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) - # Check if raft is already available. If so, it is the user's responsibility to ensure that the # CMake package is also available at build time of the Python raft package. find_package(raft "${RAPIDS_VERSION}") @@ -35,14 +33,8 @@ endif() unset(raft_FOUND) # --- CUDA --- # -find_package(CUDAToolkit REQUIRED) set(CUDA_STATIC_RUNTIME ON) -set(CUDA_STATIC_MATH_LIBRARIES ON) -if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) - set(CUDA_STATIC_MATH_LIBRARIES OFF) -elseif(USE_CUDA_MATH_WHEELS) - message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") -endif() +set(CUDA_STATIC_MATH_LIBRARIES OFF) # --- RAFT ---# set(BUILD_TESTS OFF) @@ -52,14 +44,13 @@ set(RAFT_COMPILE_LIBRARY ON) add_subdirectory(../../cpp raft-cpp) -if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) - set_property( - TARGET raft_lib - PROPERTY INSTALL_RPATH - "$ORIGIN/../nvidia/cublas/lib" - "$ORIGIN/../nvidia/curand/lib" - "$ORIGIN/../nvidia/cusolver/lib" - "$ORIGIN/../nvidia/cusparse/lib" - "$ORIGIN/../nvidia/nvjitlink/lib" - ) -endif() +# assumes libraft.so is installed 2 levels deep, e.g. site-packages/libraft/lib64/libraft.so +set_property( + TARGET raft_lib + PROPERTY INSTALL_RPATH + "$ORIGIN/../../nvidia/cublas/lib" + "$ORIGIN/../../nvidia/curand/lib" + "$ORIGIN/../../nvidia/cusolver/lib" + "$ORIGIN/../../nvidia/cusparse/lib" + "$ORIGIN/../../nvidia/nvjitlink/lib" +) diff --git a/python/libraft/pyproject.toml b/python/libraft/pyproject.toml index 549a1bf651..89b2834614 100644 --- a/python/libraft/pyproject.toml +++ b/python/libraft/pyproject.toml @@ -110,6 +110,8 @@ matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" [tool.pydistcheck] select = [ - # NOTE: size threshold is managed via CLI args in CI scripts "distro-too-large-compressed", ] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' From e5b657d96c692bedbf12e895fccb2ca3732c9897 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 22 Jan 2025 11:05:10 -0600 Subject: [PATCH 27/37] Use cuda.bindings layout. (#2545) This PR updates RAFT to use the new cuda-python `cuda.bindings` layout. See https://github.com/rapidsai/build-planning/issues/117. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) - https://github.com/jakirkham URL: https://github.com/rapidsai/raft/pull/2545 --- python/pylibraft/pylibraft/common/cuda.pxd | 2 +- python/pylibraft/pylibraft/common/cuda.pyx | 2 +- python/pylibraft/pylibraft/common/handle.pyx | 2 +- python/pylibraft/pylibraft/common/interruptible.pyx | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pylibraft/pylibraft/common/cuda.pxd b/python/pylibraft/pylibraft/common/cuda.pxd index a44d9aeb63..934573b51f 100644 --- a/python/pylibraft/pylibraft/common/cuda.pxd +++ b/python/pylibraft/pylibraft/common/cuda.pxd @@ -14,7 +14,7 @@ # limitations under the License. # -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t cdef class Stream: diff --git a/python/pylibraft/pylibraft/common/cuda.pyx b/python/pylibraft/pylibraft/common/cuda.pyx index c164a463ae..cda0fc7168 100644 --- a/python/pylibraft/pylibraft/common/cuda.pyx +++ b/python/pylibraft/pylibraft/common/cuda.pyx @@ -19,7 +19,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cuda.ccudart cimport ( +from cuda.bindings.cyruntime cimport ( cudaError_t, cudaGetErrorName, cudaGetErrorString, diff --git a/python/pylibraft/pylibraft/common/handle.pyx b/python/pylibraft/pylibraft/common/handle.pyx index d256e671bf..400b667789 100644 --- a/python/pylibraft/pylibraft/common/handle.pyx +++ b/python/pylibraft/pylibraft/common/handle.pyx @@ -21,7 +21,7 @@ import functools -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t from libc.stdint cimport uintptr_t from rmm.librmm.cuda_stream_view cimport ( diff --git a/python/pylibraft/pylibraft/common/interruptible.pyx b/python/pylibraft/pylibraft/common/interruptible.pyx index c489f2ee20..ceac387f58 100644 --- a/python/pylibraft/pylibraft/common/interruptible.pyx +++ b/python/pylibraft/pylibraft/common/interruptible.pyx @@ -22,7 +22,7 @@ import contextlib import signal -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t from cython.operator cimport dereference from rmm.librmm.cuda_stream_view cimport cuda_stream_view From 0eff2358ce97717779854d660ed10bdc921ded03 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 23 Jan 2025 16:40:08 -0600 Subject: [PATCH 28/37] Rename test to tests. (#2546) Renames `test` directories to `tests` for alignment with the rest of RAPIDS. See also: https://github.com/rapidsai/cuvs/issues/587 Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/raft/pull/2546 --- .pre-commit-config.yaml | 2 +- ci/run_pylibraft_pytests.sh | 2 +- ci/run_raft_dask_pytests.sh | 2 +- ci/test_wheel_pylibraft.sh | 2 +- ci/test_wheel_raft_dask.sh | 2 +- cpp/CMakeLists.txt | 2 +- cpp/{test => tests}/CMakeLists.txt | 2 +- cpp/{test => tests}/core/bitmap.cu | 0 cpp/{test => tests}/core/bitset.cu | 0 .../core/device_resources_manager.cpp | 0 cpp/{test => tests}/core/device_setter.cpp | 0 cpp/{test => tests}/core/handle.cpp | 0 cpp/{test => tests}/core/interruptible.cu | 0 cpp/{test => tests}/core/logger.cpp | 0 cpp/{test => tests}/core/math_device.cu | 0 cpp/{test => tests}/core/math_host.cpp | 0 cpp/{test => tests}/core/mdarray.cu | 0 cpp/{test => tests}/core/mdbuffer.cu | 0 cpp/{test => tests}/core/mdspan_copy.cpp | 0 cpp/{test => tests}/core/mdspan_copy.cu | 0 cpp/{test => tests}/core/mdspan_utils.cu | 0 cpp/{test => tests}/core/memory_type.cpp | 0 cpp/{test => tests}/core/numpy_serializer.cu | 0 cpp/{test => tests}/core/nvtx.cpp | 0 cpp/{test => tests}/core/operators_device.cu | 0 cpp/{test => tests}/core/operators_host.cpp | 0 cpp/{test => tests}/core/seive.cu | 0 cpp/{test => tests}/core/span.cpp | 0 cpp/{test => tests}/core/span.cu | 0 cpp/{test => tests}/core/sparse_matrix.cpp | 0 cpp/{test => tests}/core/sparse_matrix.cu | 0 cpp/{test => tests}/core/stream_view.cpp | 0 cpp/{test => tests}/core/temporary_device_buffer.cu | 0 cpp/{test => tests}/core/test_span.hpp | 0 cpp/{test => tests}/ext_headers/00_generate.py | 0 cpp/{test => tests}/ext_headers/raft_core_logger.cpp | 0 .../raft_distance_detail_pairwise_matrix_dispatch.cu | 0 .../ext_headers/raft_distance_distance.cu | 0 .../ext_headers/raft_distance_fused_l2_nn.cu | 0 .../raft_linalg_detail_coalesced_reduction.cu | 0 .../ext_headers/raft_matrix_detail_select_k.cu | 0 .../ext_headers/raft_neighbors_ball_cover.cu | 0 .../ext_headers/raft_neighbors_brute_force.cu | 0 ...aft_neighbors_detail_ivf_flat_interleaved_scan.cu | 0 .../raft_neighbors_detail_ivf_flat_search.cu | 0 ...aft_neighbors_detail_ivf_pq_compute_similarity.cu | 0 .../ext_headers/raft_neighbors_ivf_flat.cu | 0 .../ext_headers/raft_neighbors_ivf_pq.cu | 0 .../ext_headers/raft_neighbors_refine.cu | 0 .../raft_sparse_matrix_detail_select_k.cu | 0 .../raft_spatial_knn_detail_ball_cover_registers.cu | 0 .../raft_spatial_knn_detail_fused_l2_knn.cu | 0 cpp/{test => tests}/label/label.cu | 0 cpp/{test => tests}/label/merge_labels.cu | 0 cpp/{test => tests}/lap/lap.cu | 0 cpp/{test => tests}/linalg/add.cu | 0 cpp/{test => tests}/linalg/add.cuh | 0 cpp/{test => tests}/linalg/axpy.cu | 0 cpp/{test => tests}/linalg/binary_op.cu | 0 cpp/{test => tests}/linalg/binary_op.cuh | 0 cpp/{test => tests}/linalg/cholesky_r1.cu | 0 cpp/{test => tests}/linalg/coalesced_reduction.cu | 0 cpp/{test => tests}/linalg/divide.cu | 0 cpp/{test => tests}/linalg/dot.cu | 0 cpp/{test => tests}/linalg/eig.cu | 0 cpp/{test => tests}/linalg/eig_sel.cu | 0 cpp/{test => tests}/linalg/eigen_solvers.cu | 0 cpp/{test => tests}/linalg/eltwise.cu | 0 cpp/{test => tests}/linalg/gemm_layout.cu | 0 cpp/{test => tests}/linalg/gemv.cu | 0 cpp/{test => tests}/linalg/map.cu | 0 cpp/{test => tests}/linalg/map_then_reduce.cu | 0 cpp/{test => tests}/linalg/matrix_vector.cu | 0 cpp/{test => tests}/linalg/matrix_vector_op.cu | 0 cpp/{test => tests}/linalg/matrix_vector_op.cuh | 0 cpp/{test => tests}/linalg/mean_squared_error.cu | 0 cpp/{test => tests}/linalg/multiply.cu | 0 cpp/{test => tests}/linalg/norm.cu | 0 cpp/{test => tests}/linalg/normalize.cu | 0 cpp/{test => tests}/linalg/power.cu | 0 cpp/{test => tests}/linalg/randomized_svd.cu | 0 cpp/{test => tests}/linalg/reduce.cu | 0 cpp/{test => tests}/linalg/reduce.cuh | 0 cpp/{test => tests}/linalg/reduce_cols_by_key.cu | 0 cpp/{test => tests}/linalg/reduce_rows_by_key.cu | 0 cpp/{test => tests}/linalg/rsvd.cu | 0 cpp/{test => tests}/linalg/sqrt.cu | 0 cpp/{test => tests}/linalg/strided_reduction.cu | 0 cpp/{test => tests}/linalg/subtract.cu | 0 cpp/{test => tests}/linalg/svd.cu | 0 cpp/{test => tests}/linalg/ternary_op.cu | 0 cpp/{test => tests}/linalg/transpose.cu | 0 cpp/{test => tests}/linalg/unary_op.cu | 0 cpp/{test => tests}/linalg/unary_op.cuh | 0 cpp/{test => tests}/matrix/argmax.cu | 0 cpp/{test => tests}/matrix/argmin.cu | 0 cpp/{test => tests}/matrix/columnSort.cu | 0 cpp/{test => tests}/matrix/diagonal.cu | 0 cpp/{test => tests}/matrix/eye.cu | 0 cpp/{test => tests}/matrix/gather.cu | 0 cpp/{test => tests}/matrix/linewise_op.cu | 0 cpp/{test => tests}/matrix/math.cu | 0 cpp/{test => tests}/matrix/matrix.cu | 0 cpp/{test => tests}/matrix/norm.cu | 0 cpp/{test => tests}/matrix/reverse.cu | 0 cpp/{test => tests}/matrix/sample_rows.cu | 0 cpp/{test => tests}/matrix/scatter.cu | 0 cpp/{test => tests}/matrix/select_k.cu | 0 cpp/{test => tests}/matrix/select_k.cuh | 0 cpp/{test => tests}/matrix/select_large_k.cu | 0 cpp/{test => tests}/matrix/slice.cu | 0 cpp/{test => tests}/matrix/triangular.cu | 0 cpp/{test => tests}/mr/device/buffer.cpp | 0 cpp/{test => tests}/mr/host/buffer.cpp | 0 cpp/{test => tests}/neighbors/ball_cover.cu | 0 .../neighbors/epsilon_neighborhood.cu | 0 cpp/{test => tests}/neighbors/haversine.cu | 0 cpp/{test => tests}/neighbors/knn_utils.cuh | 0 cpp/{test => tests}/neighbors/spatial_data.h | 0 cpp/{test => tests}/random/excess_sampling.cu | 0 cpp/{test => tests}/random/make_blobs.cu | 0 cpp/{test => tests}/random/make_regression.cu | 0 .../random/multi_variable_gaussian.cu | 0 cpp/{test => tests}/random/permute.cu | 0 .../random/rmat_rectangular_generator.cu | 0 cpp/{test => tests}/random/rng.cu | 0 cpp/{test => tests}/random/rng_discrete.cu | 0 cpp/{test => tests}/random/rng_int.cu | 0 cpp/{test => tests}/random/rng_pcg_host_api.cu | 0 .../random/sample_without_replacement.cu | 0 cpp/{test => tests}/sparse/add.cu | 0 cpp/{test => tests}/sparse/convert_coo.cu | 0 cpp/{test => tests}/sparse/convert_csr.cu | 0 cpp/{test => tests}/sparse/csr_row_slice.cu | 0 cpp/{test => tests}/sparse/csr_to_dense.cu | 0 cpp/{test => tests}/sparse/csr_transpose.cu | 0 cpp/{test => tests}/sparse/degree.cu | 0 cpp/{test => tests}/sparse/dist_coo_spmv.cu | 0 cpp/{test => tests}/sparse/distance.cu | 0 cpp/{test => tests}/sparse/filter.cu | 0 cpp/{test => tests}/sparse/masked_matmul.cu | 0 cpp/{test => tests}/sparse/mst.cu | 0 cpp/{test => tests}/sparse/norm.cu | 0 cpp/{test => tests}/sparse/normalize.cu | 0 cpp/{test => tests}/sparse/reduce.cu | 0 cpp/{test => tests}/sparse/row_op.cu | 0 cpp/{test => tests}/sparse/sddmm.cu | 0 cpp/{test => tests}/sparse/select_k_csr.cu | 0 cpp/{test => tests}/sparse/solver/lanczos.cu | 0 cpp/{test => tests}/sparse/sort.cu | 0 cpp/{test => tests}/sparse/spectral_matrix.cu | 0 cpp/{test => tests}/sparse/spgemmi.cu | 0 cpp/{test => tests}/sparse/spmm.cu | 0 cpp/{test => tests}/sparse/symmetrize.cu | 0 cpp/{test => tests}/stats/accuracy.cu | 0 cpp/{test => tests}/stats/adjusted_rand_index.cu | 0 cpp/{test => tests}/stats/completeness_score.cu | 0 cpp/{test => tests}/stats/contingencyMatrix.cu | 0 cpp/{test => tests}/stats/cov.cu | 0 cpp/{test => tests}/stats/dispersion.cu | 0 cpp/{test => tests}/stats/entropy.cu | 0 cpp/{test => tests}/stats/histogram.cu | 0 cpp/{test => tests}/stats/homogeneity_score.cu | 0 cpp/{test => tests}/stats/information_criterion.cu | 0 cpp/{test => tests}/stats/kl_divergence.cu | 0 cpp/{test => tests}/stats/mean.cu | 0 cpp/{test => tests}/stats/mean_center.cu | 0 cpp/{test => tests}/stats/meanvar.cu | 0 cpp/{test => tests}/stats/minmax.cu | 0 cpp/{test => tests}/stats/mutual_info_score.cu | 0 cpp/{test => tests}/stats/r2_score.cu | 0 cpp/{test => tests}/stats/rand_index.cu | 0 cpp/{test => tests}/stats/regression_metrics.cu | 0 cpp/{test => tests}/stats/stddev.cu | 0 cpp/{test => tests}/stats/sum.cu | 0 cpp/{test => tests}/stats/v_measure.cu | 0 cpp/{test => tests}/stats/weighted_mean.cu | 0 cpp/{test => tests}/test.cpp | 0 cpp/{test => tests}/test_utils.cuh | 0 cpp/{test => tests}/test_utils.h | 0 cpp/{test => tests}/util/bitonic_sort.cu | 0 cpp/{test => tests}/util/cudart_utils.cpp | 0 cpp/{test => tests}/util/device_atomics.cu | 0 cpp/{test => tests}/util/integer_utils.cpp | 0 cpp/{test => tests}/util/integer_utils.cu | 0 cpp/{test => tests}/util/memory_type_dispatcher.cu | 0 cpp/{test => tests}/util/popc.cu | 0 cpp/{test => tests}/util/pow2_utils.cu | 0 cpp/{test => tests}/util/reduction.cu | 0 docs/source/developer_guide.md | 12 ++++++------ pyproject.toml | 2 +- .../pylibraft/pylibraft/{test => tests}/__init__py | 0 .../pylibraft/pylibraft/{test => tests}/pytest.ini | 0 .../pylibraft/{test => tests}/test_cai_wrapper.py | 0 .../pylibraft/{test => tests}/test_config.py | 0 .../pylibraft/{test => tests}/test_device_ndarray.py | 0 .../pylibraft/{test => tests}/test_doctests.py | 0 .../pylibraft/{test => tests}/test_handle.py | 0 .../{test => tests}/test_mdspan_serializer.py | 0 .../pylibraft/{test => tests}/test_random.py | 0 .../pylibraft/{test => tests}/test_sparse.py | 0 .../pylibraft/{test => tests}/test_version.py | 0 .../{test => tests}/test_z_interruptible.py | 0 .../raft-dask/raft_dask/{test => tests}/conftest.py | 0 .../raft-dask/raft_dask/{test => tests}/pytest.ini | 0 .../raft_dask/{test => tests}/test_comms.py | 0 .../raft-dask/raft_dask/{test => tests}/test_raft.py | 0 .../raft_dask/{test => tests}/test_version.py | 0 208 files changed, 14 insertions(+), 14 deletions(-) rename cpp/{test => tests}/CMakeLists.txt (99%) rename cpp/{test => tests}/core/bitmap.cu (100%) rename cpp/{test => tests}/core/bitset.cu (100%) rename cpp/{test => tests}/core/device_resources_manager.cpp (100%) rename cpp/{test => tests}/core/device_setter.cpp (100%) rename cpp/{test => tests}/core/handle.cpp (100%) rename cpp/{test => tests}/core/interruptible.cu (100%) rename cpp/{test => tests}/core/logger.cpp (100%) rename cpp/{test => tests}/core/math_device.cu (100%) rename cpp/{test => tests}/core/math_host.cpp (100%) rename cpp/{test => tests}/core/mdarray.cu (100%) rename cpp/{test => tests}/core/mdbuffer.cu (100%) rename cpp/{test => tests}/core/mdspan_copy.cpp (100%) rename cpp/{test => tests}/core/mdspan_copy.cu (100%) rename cpp/{test => tests}/core/mdspan_utils.cu (100%) rename cpp/{test => tests}/core/memory_type.cpp (100%) rename cpp/{test => tests}/core/numpy_serializer.cu (100%) rename cpp/{test => tests}/core/nvtx.cpp (100%) rename cpp/{test => tests}/core/operators_device.cu (100%) rename cpp/{test => tests}/core/operators_host.cpp (100%) rename cpp/{test => tests}/core/seive.cu (100%) rename cpp/{test => tests}/core/span.cpp (100%) rename cpp/{test => tests}/core/span.cu (100%) rename cpp/{test => tests}/core/sparse_matrix.cpp (100%) rename cpp/{test => tests}/core/sparse_matrix.cu (100%) rename cpp/{test => tests}/core/stream_view.cpp (100%) rename cpp/{test => tests}/core/temporary_device_buffer.cu (100%) rename cpp/{test => tests}/core/test_span.hpp (100%) rename cpp/{test => tests}/ext_headers/00_generate.py (100%) rename cpp/{test => tests}/ext_headers/raft_core_logger.cpp (100%) rename cpp/{test => tests}/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu (100%) rename cpp/{test => tests}/ext_headers/raft_distance_distance.cu (100%) rename cpp/{test => tests}/ext_headers/raft_distance_fused_l2_nn.cu (100%) rename cpp/{test => tests}/ext_headers/raft_linalg_detail_coalesced_reduction.cu (100%) rename cpp/{test => tests}/ext_headers/raft_matrix_detail_select_k.cu (100%) rename cpp/{test => tests}/ext_headers/raft_neighbors_ball_cover.cu (100%) rename cpp/{test => tests}/ext_headers/raft_neighbors_brute_force.cu (100%) rename cpp/{test => tests}/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu (100%) rename cpp/{test => tests}/ext_headers/raft_neighbors_detail_ivf_flat_search.cu (100%) rename cpp/{test => tests}/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu (100%) rename cpp/{test => tests}/ext_headers/raft_neighbors_ivf_flat.cu (100%) rename cpp/{test => tests}/ext_headers/raft_neighbors_ivf_pq.cu (100%) rename cpp/{test => tests}/ext_headers/raft_neighbors_refine.cu (100%) rename cpp/{test => tests}/ext_headers/raft_sparse_matrix_detail_select_k.cu (100%) rename cpp/{test => tests}/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu (100%) rename cpp/{test => tests}/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu (100%) rename cpp/{test => tests}/label/label.cu (100%) rename cpp/{test => tests}/label/merge_labels.cu (100%) rename cpp/{test => tests}/lap/lap.cu (100%) rename cpp/{test => tests}/linalg/add.cu (100%) rename cpp/{test => tests}/linalg/add.cuh (100%) rename cpp/{test => tests}/linalg/axpy.cu (100%) rename cpp/{test => tests}/linalg/binary_op.cu (100%) rename cpp/{test => tests}/linalg/binary_op.cuh (100%) rename cpp/{test => tests}/linalg/cholesky_r1.cu (100%) rename cpp/{test => tests}/linalg/coalesced_reduction.cu (100%) rename cpp/{test => tests}/linalg/divide.cu (100%) rename cpp/{test => tests}/linalg/dot.cu (100%) rename cpp/{test => tests}/linalg/eig.cu (100%) rename cpp/{test => tests}/linalg/eig_sel.cu (100%) rename cpp/{test => tests}/linalg/eigen_solvers.cu (100%) rename cpp/{test => tests}/linalg/eltwise.cu (100%) rename cpp/{test => tests}/linalg/gemm_layout.cu (100%) rename cpp/{test => tests}/linalg/gemv.cu (100%) rename cpp/{test => tests}/linalg/map.cu (100%) rename cpp/{test => tests}/linalg/map_then_reduce.cu (100%) rename cpp/{test => tests}/linalg/matrix_vector.cu (100%) rename cpp/{test => tests}/linalg/matrix_vector_op.cu (100%) rename cpp/{test => tests}/linalg/matrix_vector_op.cuh (100%) rename cpp/{test => tests}/linalg/mean_squared_error.cu (100%) rename cpp/{test => tests}/linalg/multiply.cu (100%) rename cpp/{test => tests}/linalg/norm.cu (100%) rename cpp/{test => tests}/linalg/normalize.cu (100%) rename cpp/{test => tests}/linalg/power.cu (100%) rename cpp/{test => tests}/linalg/randomized_svd.cu (100%) rename cpp/{test => tests}/linalg/reduce.cu (100%) rename cpp/{test => tests}/linalg/reduce.cuh (100%) rename cpp/{test => tests}/linalg/reduce_cols_by_key.cu (100%) rename cpp/{test => tests}/linalg/reduce_rows_by_key.cu (100%) rename cpp/{test => tests}/linalg/rsvd.cu (100%) rename cpp/{test => tests}/linalg/sqrt.cu (100%) rename cpp/{test => tests}/linalg/strided_reduction.cu (100%) rename cpp/{test => tests}/linalg/subtract.cu (100%) rename cpp/{test => tests}/linalg/svd.cu (100%) rename cpp/{test => tests}/linalg/ternary_op.cu (100%) rename cpp/{test => tests}/linalg/transpose.cu (100%) rename cpp/{test => tests}/linalg/unary_op.cu (100%) rename cpp/{test => tests}/linalg/unary_op.cuh (100%) rename cpp/{test => tests}/matrix/argmax.cu (100%) rename cpp/{test => tests}/matrix/argmin.cu (100%) rename cpp/{test => tests}/matrix/columnSort.cu (100%) rename cpp/{test => tests}/matrix/diagonal.cu (100%) rename cpp/{test => tests}/matrix/eye.cu (100%) rename cpp/{test => tests}/matrix/gather.cu (100%) rename cpp/{test => tests}/matrix/linewise_op.cu (100%) rename cpp/{test => tests}/matrix/math.cu (100%) rename cpp/{test => tests}/matrix/matrix.cu (100%) rename cpp/{test => tests}/matrix/norm.cu (100%) rename cpp/{test => tests}/matrix/reverse.cu (100%) rename cpp/{test => tests}/matrix/sample_rows.cu (100%) rename cpp/{test => tests}/matrix/scatter.cu (100%) rename cpp/{test => tests}/matrix/select_k.cu (100%) rename cpp/{test => tests}/matrix/select_k.cuh (100%) rename cpp/{test => tests}/matrix/select_large_k.cu (100%) rename cpp/{test => tests}/matrix/slice.cu (100%) rename cpp/{test => tests}/matrix/triangular.cu (100%) rename cpp/{test => tests}/mr/device/buffer.cpp (100%) rename cpp/{test => tests}/mr/host/buffer.cpp (100%) rename cpp/{test => tests}/neighbors/ball_cover.cu (100%) rename cpp/{test => tests}/neighbors/epsilon_neighborhood.cu (100%) rename cpp/{test => tests}/neighbors/haversine.cu (100%) rename cpp/{test => tests}/neighbors/knn_utils.cuh (100%) rename cpp/{test => tests}/neighbors/spatial_data.h (100%) rename cpp/{test => tests}/random/excess_sampling.cu (100%) rename cpp/{test => tests}/random/make_blobs.cu (100%) rename cpp/{test => tests}/random/make_regression.cu (100%) rename cpp/{test => tests}/random/multi_variable_gaussian.cu (100%) rename cpp/{test => tests}/random/permute.cu (100%) rename cpp/{test => tests}/random/rmat_rectangular_generator.cu (100%) rename cpp/{test => tests}/random/rng.cu (100%) rename cpp/{test => tests}/random/rng_discrete.cu (100%) rename cpp/{test => tests}/random/rng_int.cu (100%) rename cpp/{test => tests}/random/rng_pcg_host_api.cu (100%) rename cpp/{test => tests}/random/sample_without_replacement.cu (100%) rename cpp/{test => tests}/sparse/add.cu (100%) rename cpp/{test => tests}/sparse/convert_coo.cu (100%) rename cpp/{test => tests}/sparse/convert_csr.cu (100%) rename cpp/{test => tests}/sparse/csr_row_slice.cu (100%) rename cpp/{test => tests}/sparse/csr_to_dense.cu (100%) rename cpp/{test => tests}/sparse/csr_transpose.cu (100%) rename cpp/{test => tests}/sparse/degree.cu (100%) rename cpp/{test => tests}/sparse/dist_coo_spmv.cu (100%) rename cpp/{test => tests}/sparse/distance.cu (100%) rename cpp/{test => tests}/sparse/filter.cu (100%) rename cpp/{test => tests}/sparse/masked_matmul.cu (100%) rename cpp/{test => tests}/sparse/mst.cu (100%) rename cpp/{test => tests}/sparse/norm.cu (100%) rename cpp/{test => tests}/sparse/normalize.cu (100%) rename cpp/{test => tests}/sparse/reduce.cu (100%) rename cpp/{test => tests}/sparse/row_op.cu (100%) rename cpp/{test => tests}/sparse/sddmm.cu (100%) rename cpp/{test => tests}/sparse/select_k_csr.cu (100%) rename cpp/{test => tests}/sparse/solver/lanczos.cu (100%) rename cpp/{test => tests}/sparse/sort.cu (100%) rename cpp/{test => tests}/sparse/spectral_matrix.cu (100%) rename cpp/{test => tests}/sparse/spgemmi.cu (100%) rename cpp/{test => tests}/sparse/spmm.cu (100%) rename cpp/{test => tests}/sparse/symmetrize.cu (100%) rename cpp/{test => tests}/stats/accuracy.cu (100%) rename cpp/{test => tests}/stats/adjusted_rand_index.cu (100%) rename cpp/{test => tests}/stats/completeness_score.cu (100%) rename cpp/{test => tests}/stats/contingencyMatrix.cu (100%) rename cpp/{test => tests}/stats/cov.cu (100%) rename cpp/{test => tests}/stats/dispersion.cu (100%) rename cpp/{test => tests}/stats/entropy.cu (100%) rename cpp/{test => tests}/stats/histogram.cu (100%) rename cpp/{test => tests}/stats/homogeneity_score.cu (100%) rename cpp/{test => tests}/stats/information_criterion.cu (100%) rename cpp/{test => tests}/stats/kl_divergence.cu (100%) rename cpp/{test => tests}/stats/mean.cu (100%) rename cpp/{test => tests}/stats/mean_center.cu (100%) rename cpp/{test => tests}/stats/meanvar.cu (100%) rename cpp/{test => tests}/stats/minmax.cu (100%) rename cpp/{test => tests}/stats/mutual_info_score.cu (100%) rename cpp/{test => tests}/stats/r2_score.cu (100%) rename cpp/{test => tests}/stats/rand_index.cu (100%) rename cpp/{test => tests}/stats/regression_metrics.cu (100%) rename cpp/{test => tests}/stats/stddev.cu (100%) rename cpp/{test => tests}/stats/sum.cu (100%) rename cpp/{test => tests}/stats/v_measure.cu (100%) rename cpp/{test => tests}/stats/weighted_mean.cu (100%) rename cpp/{test => tests}/test.cpp (100%) rename cpp/{test => tests}/test_utils.cuh (100%) rename cpp/{test => tests}/test_utils.h (100%) rename cpp/{test => tests}/util/bitonic_sort.cu (100%) rename cpp/{test => tests}/util/cudart_utils.cpp (100%) rename cpp/{test => tests}/util/device_atomics.cu (100%) rename cpp/{test => tests}/util/integer_utils.cpp (100%) rename cpp/{test => tests}/util/integer_utils.cu (100%) rename cpp/{test => tests}/util/memory_type_dispatcher.cu (100%) rename cpp/{test => tests}/util/popc.cu (100%) rename cpp/{test => tests}/util/pow2_utils.cu (100%) rename cpp/{test => tests}/util/reduction.cu (100%) rename python/pylibraft/pylibraft/{test => tests}/__init__py (100%) rename python/pylibraft/pylibraft/{test => tests}/pytest.ini (100%) rename python/pylibraft/pylibraft/{test => tests}/test_cai_wrapper.py (100%) rename python/pylibraft/pylibraft/{test => tests}/test_config.py (100%) rename python/pylibraft/pylibraft/{test => tests}/test_device_ndarray.py (100%) rename python/pylibraft/pylibraft/{test => tests}/test_doctests.py (100%) rename python/pylibraft/pylibraft/{test => tests}/test_handle.py (100%) rename python/pylibraft/pylibraft/{test => tests}/test_mdspan_serializer.py (100%) rename python/pylibraft/pylibraft/{test => tests}/test_random.py (100%) rename python/pylibraft/pylibraft/{test => tests}/test_sparse.py (100%) rename python/pylibraft/pylibraft/{test => tests}/test_version.py (100%) rename python/pylibraft/pylibraft/{test => tests}/test_z_interruptible.py (100%) rename python/raft-dask/raft_dask/{test => tests}/conftest.py (100%) rename python/raft-dask/raft_dask/{test => tests}/pytest.ini (100%) rename python/raft-dask/raft_dask/{test => tests}/test_comms.py (100%) rename python/raft-dask/raft_dask/{test => tests}/test_raft.py (100%) rename python/raft-dask/raft_dask/{test => tests}/test_version.py (100%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d5456ba30b..ca1efc3abd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -83,7 +83,7 @@ repos: exclude: .*/thirdparty/.* - id: include-check name: include-check - entry: python ./cpp/scripts/include_checker.py cpp/bench cpp/include cpp/test + entry: python ./cpp/scripts/include_checker.py cpp/bench cpp/include cpp/tests pass_filenames: false language: python additional_dependencies: [gitpython] diff --git a/ci/run_pylibraft_pytests.sh b/ci/run_pylibraft_pytests.sh index 1167b89c5f..7f3d1f9cfb 100755 --- a/ci/run_pylibraft_pytests.sh +++ b/ci/run_pylibraft_pytests.sh @@ -6,4 +6,4 @@ set -euo pipefail # Support invoking run_pylibraft_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibraft/pylibraft -pytest --cache-clear "$@" test +pytest --cache-clear "$@" tests diff --git a/ci/run_raft_dask_pytests.sh b/ci/run_raft_dask_pytests.sh index 07d0b5baa0..a9e6a130cd 100755 --- a/ci/run_raft_dask_pytests.sh +++ b/ci/run_raft_dask_pytests.sh @@ -6,4 +6,4 @@ set -euo pipefail # Support invoking run_raft_dask_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/raft-dask/raft_dask -pytest --cache-clear --import-mode=append "$@" test +pytest --cache-clear --import-mode=append "$@" tests diff --git a/ci/test_wheel_pylibraft.sh b/ci/test_wheel_pylibraft.sh index 1e0b34d609..26f4da267f 100755 --- a/ci/test_wheel_pylibraft.sh +++ b/ci/test_wheel_pylibraft.sh @@ -14,4 +14,4 @@ python -m pip install \ ./local-libraft-dep/libraft*.whl \ "$(echo ./dist/pylibraft*.whl)[test]" -python -m pytest ./python/pylibraft/pylibraft/test +python -m pytest ./python/pylibraft/pylibraft/tests diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh index 011de4d409..c394314aac 100755 --- a/ci/test_wheel_raft_dask.sh +++ b/ci/test_wheel_raft_dask.sh @@ -15,7 +15,7 @@ python -m pip install -v \ ./local-pylibraft-dep/pylibraft*.whl \ "$(echo ./dist/raft_dask_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" -test_dir="python/raft-dask/raft_dask/test" +test_dir="python/raft-dask/raft_dask/tests" rapids-logger "pytest raft-dask" python -m pytest --import-mode=append ${test_dir} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index eb7e8540f0..c38471bebd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -586,7 +586,7 @@ endif() # * build test executable ---------------------------------------------------- if(BUILD_TESTS) - add_subdirectory(test) + add_subdirectory(tests) endif() # ################################################################################################## diff --git a/cpp/test/CMakeLists.txt b/cpp/tests/CMakeLists.txt similarity index 99% rename from cpp/test/CMakeLists.txt rename to cpp/tests/CMakeLists.txt index 4cd0a32f51..9f96b93e7a 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -77,7 +77,7 @@ function(ConfigureTest) target_compile_definitions(${TEST_NAME} PRIVATE "RAFT_DISABLE_CUDA") endif() - target_include_directories(${TEST_NAME} PUBLIC "$") + target_include_directories(${TEST_NAME} PUBLIC "$") rapids_test_add( NAME ${TEST_NAME} diff --git a/cpp/test/core/bitmap.cu b/cpp/tests/core/bitmap.cu similarity index 100% rename from cpp/test/core/bitmap.cu rename to cpp/tests/core/bitmap.cu diff --git a/cpp/test/core/bitset.cu b/cpp/tests/core/bitset.cu similarity index 100% rename from cpp/test/core/bitset.cu rename to cpp/tests/core/bitset.cu diff --git a/cpp/test/core/device_resources_manager.cpp b/cpp/tests/core/device_resources_manager.cpp similarity index 100% rename from cpp/test/core/device_resources_manager.cpp rename to cpp/tests/core/device_resources_manager.cpp diff --git a/cpp/test/core/device_setter.cpp b/cpp/tests/core/device_setter.cpp similarity index 100% rename from cpp/test/core/device_setter.cpp rename to cpp/tests/core/device_setter.cpp diff --git a/cpp/test/core/handle.cpp b/cpp/tests/core/handle.cpp similarity index 100% rename from cpp/test/core/handle.cpp rename to cpp/tests/core/handle.cpp diff --git a/cpp/test/core/interruptible.cu b/cpp/tests/core/interruptible.cu similarity index 100% rename from cpp/test/core/interruptible.cu rename to cpp/tests/core/interruptible.cu diff --git a/cpp/test/core/logger.cpp b/cpp/tests/core/logger.cpp similarity index 100% rename from cpp/test/core/logger.cpp rename to cpp/tests/core/logger.cpp diff --git a/cpp/test/core/math_device.cu b/cpp/tests/core/math_device.cu similarity index 100% rename from cpp/test/core/math_device.cu rename to cpp/tests/core/math_device.cu diff --git a/cpp/test/core/math_host.cpp b/cpp/tests/core/math_host.cpp similarity index 100% rename from cpp/test/core/math_host.cpp rename to cpp/tests/core/math_host.cpp diff --git a/cpp/test/core/mdarray.cu b/cpp/tests/core/mdarray.cu similarity index 100% rename from cpp/test/core/mdarray.cu rename to cpp/tests/core/mdarray.cu diff --git a/cpp/test/core/mdbuffer.cu b/cpp/tests/core/mdbuffer.cu similarity index 100% rename from cpp/test/core/mdbuffer.cu rename to cpp/tests/core/mdbuffer.cu diff --git a/cpp/test/core/mdspan_copy.cpp b/cpp/tests/core/mdspan_copy.cpp similarity index 100% rename from cpp/test/core/mdspan_copy.cpp rename to cpp/tests/core/mdspan_copy.cpp diff --git a/cpp/test/core/mdspan_copy.cu b/cpp/tests/core/mdspan_copy.cu similarity index 100% rename from cpp/test/core/mdspan_copy.cu rename to cpp/tests/core/mdspan_copy.cu diff --git a/cpp/test/core/mdspan_utils.cu b/cpp/tests/core/mdspan_utils.cu similarity index 100% rename from cpp/test/core/mdspan_utils.cu rename to cpp/tests/core/mdspan_utils.cu diff --git a/cpp/test/core/memory_type.cpp b/cpp/tests/core/memory_type.cpp similarity index 100% rename from cpp/test/core/memory_type.cpp rename to cpp/tests/core/memory_type.cpp diff --git a/cpp/test/core/numpy_serializer.cu b/cpp/tests/core/numpy_serializer.cu similarity index 100% rename from cpp/test/core/numpy_serializer.cu rename to cpp/tests/core/numpy_serializer.cu diff --git a/cpp/test/core/nvtx.cpp b/cpp/tests/core/nvtx.cpp similarity index 100% rename from cpp/test/core/nvtx.cpp rename to cpp/tests/core/nvtx.cpp diff --git a/cpp/test/core/operators_device.cu b/cpp/tests/core/operators_device.cu similarity index 100% rename from cpp/test/core/operators_device.cu rename to cpp/tests/core/operators_device.cu diff --git a/cpp/test/core/operators_host.cpp b/cpp/tests/core/operators_host.cpp similarity index 100% rename from cpp/test/core/operators_host.cpp rename to cpp/tests/core/operators_host.cpp diff --git a/cpp/test/core/seive.cu b/cpp/tests/core/seive.cu similarity index 100% rename from cpp/test/core/seive.cu rename to cpp/tests/core/seive.cu diff --git a/cpp/test/core/span.cpp b/cpp/tests/core/span.cpp similarity index 100% rename from cpp/test/core/span.cpp rename to cpp/tests/core/span.cpp diff --git a/cpp/test/core/span.cu b/cpp/tests/core/span.cu similarity index 100% rename from cpp/test/core/span.cu rename to cpp/tests/core/span.cu diff --git a/cpp/test/core/sparse_matrix.cpp b/cpp/tests/core/sparse_matrix.cpp similarity index 100% rename from cpp/test/core/sparse_matrix.cpp rename to cpp/tests/core/sparse_matrix.cpp diff --git a/cpp/test/core/sparse_matrix.cu b/cpp/tests/core/sparse_matrix.cu similarity index 100% rename from cpp/test/core/sparse_matrix.cu rename to cpp/tests/core/sparse_matrix.cu diff --git a/cpp/test/core/stream_view.cpp b/cpp/tests/core/stream_view.cpp similarity index 100% rename from cpp/test/core/stream_view.cpp rename to cpp/tests/core/stream_view.cpp diff --git a/cpp/test/core/temporary_device_buffer.cu b/cpp/tests/core/temporary_device_buffer.cu similarity index 100% rename from cpp/test/core/temporary_device_buffer.cu rename to cpp/tests/core/temporary_device_buffer.cu diff --git a/cpp/test/core/test_span.hpp b/cpp/tests/core/test_span.hpp similarity index 100% rename from cpp/test/core/test_span.hpp rename to cpp/tests/core/test_span.hpp diff --git a/cpp/test/ext_headers/00_generate.py b/cpp/tests/ext_headers/00_generate.py similarity index 100% rename from cpp/test/ext_headers/00_generate.py rename to cpp/tests/ext_headers/00_generate.py diff --git a/cpp/test/ext_headers/raft_core_logger.cpp b/cpp/tests/ext_headers/raft_core_logger.cpp similarity index 100% rename from cpp/test/ext_headers/raft_core_logger.cpp rename to cpp/tests/ext_headers/raft_core_logger.cpp diff --git a/cpp/test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu b/cpp/tests/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu similarity index 100% rename from cpp/test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu rename to cpp/tests/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu diff --git a/cpp/test/ext_headers/raft_distance_distance.cu b/cpp/tests/ext_headers/raft_distance_distance.cu similarity index 100% rename from cpp/test/ext_headers/raft_distance_distance.cu rename to cpp/tests/ext_headers/raft_distance_distance.cu diff --git a/cpp/test/ext_headers/raft_distance_fused_l2_nn.cu b/cpp/tests/ext_headers/raft_distance_fused_l2_nn.cu similarity index 100% rename from cpp/test/ext_headers/raft_distance_fused_l2_nn.cu rename to cpp/tests/ext_headers/raft_distance_fused_l2_nn.cu diff --git a/cpp/test/ext_headers/raft_linalg_detail_coalesced_reduction.cu b/cpp/tests/ext_headers/raft_linalg_detail_coalesced_reduction.cu similarity index 100% rename from cpp/test/ext_headers/raft_linalg_detail_coalesced_reduction.cu rename to cpp/tests/ext_headers/raft_linalg_detail_coalesced_reduction.cu diff --git a/cpp/test/ext_headers/raft_matrix_detail_select_k.cu b/cpp/tests/ext_headers/raft_matrix_detail_select_k.cu similarity index 100% rename from cpp/test/ext_headers/raft_matrix_detail_select_k.cu rename to cpp/tests/ext_headers/raft_matrix_detail_select_k.cu diff --git a/cpp/test/ext_headers/raft_neighbors_ball_cover.cu b/cpp/tests/ext_headers/raft_neighbors_ball_cover.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_ball_cover.cu rename to cpp/tests/ext_headers/raft_neighbors_ball_cover.cu diff --git a/cpp/test/ext_headers/raft_neighbors_brute_force.cu b/cpp/tests/ext_headers/raft_neighbors_brute_force.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_brute_force.cu rename to cpp/tests/ext_headers/raft_neighbors_brute_force.cu diff --git a/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu b/cpp/tests/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu rename to cpp/tests/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu diff --git a/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu b/cpp/tests/ext_headers/raft_neighbors_detail_ivf_flat_search.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu rename to cpp/tests/ext_headers/raft_neighbors_detail_ivf_flat_search.cu diff --git a/cpp/test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu b/cpp/tests/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu rename to cpp/tests/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu diff --git a/cpp/test/ext_headers/raft_neighbors_ivf_flat.cu b/cpp/tests/ext_headers/raft_neighbors_ivf_flat.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_ivf_flat.cu rename to cpp/tests/ext_headers/raft_neighbors_ivf_flat.cu diff --git a/cpp/test/ext_headers/raft_neighbors_ivf_pq.cu b/cpp/tests/ext_headers/raft_neighbors_ivf_pq.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_ivf_pq.cu rename to cpp/tests/ext_headers/raft_neighbors_ivf_pq.cu diff --git a/cpp/test/ext_headers/raft_neighbors_refine.cu b/cpp/tests/ext_headers/raft_neighbors_refine.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_refine.cu rename to cpp/tests/ext_headers/raft_neighbors_refine.cu diff --git a/cpp/test/ext_headers/raft_sparse_matrix_detail_select_k.cu b/cpp/tests/ext_headers/raft_sparse_matrix_detail_select_k.cu similarity index 100% rename from cpp/test/ext_headers/raft_sparse_matrix_detail_select_k.cu rename to cpp/tests/ext_headers/raft_sparse_matrix_detail_select_k.cu diff --git a/cpp/test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu b/cpp/tests/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu similarity index 100% rename from cpp/test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu rename to cpp/tests/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu diff --git a/cpp/test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu b/cpp/tests/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu similarity index 100% rename from cpp/test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu rename to cpp/tests/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu diff --git a/cpp/test/label/label.cu b/cpp/tests/label/label.cu similarity index 100% rename from cpp/test/label/label.cu rename to cpp/tests/label/label.cu diff --git a/cpp/test/label/merge_labels.cu b/cpp/tests/label/merge_labels.cu similarity index 100% rename from cpp/test/label/merge_labels.cu rename to cpp/tests/label/merge_labels.cu diff --git a/cpp/test/lap/lap.cu b/cpp/tests/lap/lap.cu similarity index 100% rename from cpp/test/lap/lap.cu rename to cpp/tests/lap/lap.cu diff --git a/cpp/test/linalg/add.cu b/cpp/tests/linalg/add.cu similarity index 100% rename from cpp/test/linalg/add.cu rename to cpp/tests/linalg/add.cu diff --git a/cpp/test/linalg/add.cuh b/cpp/tests/linalg/add.cuh similarity index 100% rename from cpp/test/linalg/add.cuh rename to cpp/tests/linalg/add.cuh diff --git a/cpp/test/linalg/axpy.cu b/cpp/tests/linalg/axpy.cu similarity index 100% rename from cpp/test/linalg/axpy.cu rename to cpp/tests/linalg/axpy.cu diff --git a/cpp/test/linalg/binary_op.cu b/cpp/tests/linalg/binary_op.cu similarity index 100% rename from cpp/test/linalg/binary_op.cu rename to cpp/tests/linalg/binary_op.cu diff --git a/cpp/test/linalg/binary_op.cuh b/cpp/tests/linalg/binary_op.cuh similarity index 100% rename from cpp/test/linalg/binary_op.cuh rename to cpp/tests/linalg/binary_op.cuh diff --git a/cpp/test/linalg/cholesky_r1.cu b/cpp/tests/linalg/cholesky_r1.cu similarity index 100% rename from cpp/test/linalg/cholesky_r1.cu rename to cpp/tests/linalg/cholesky_r1.cu diff --git a/cpp/test/linalg/coalesced_reduction.cu b/cpp/tests/linalg/coalesced_reduction.cu similarity index 100% rename from cpp/test/linalg/coalesced_reduction.cu rename to cpp/tests/linalg/coalesced_reduction.cu diff --git a/cpp/test/linalg/divide.cu b/cpp/tests/linalg/divide.cu similarity index 100% rename from cpp/test/linalg/divide.cu rename to cpp/tests/linalg/divide.cu diff --git a/cpp/test/linalg/dot.cu b/cpp/tests/linalg/dot.cu similarity index 100% rename from cpp/test/linalg/dot.cu rename to cpp/tests/linalg/dot.cu diff --git a/cpp/test/linalg/eig.cu b/cpp/tests/linalg/eig.cu similarity index 100% rename from cpp/test/linalg/eig.cu rename to cpp/tests/linalg/eig.cu diff --git a/cpp/test/linalg/eig_sel.cu b/cpp/tests/linalg/eig_sel.cu similarity index 100% rename from cpp/test/linalg/eig_sel.cu rename to cpp/tests/linalg/eig_sel.cu diff --git a/cpp/test/linalg/eigen_solvers.cu b/cpp/tests/linalg/eigen_solvers.cu similarity index 100% rename from cpp/test/linalg/eigen_solvers.cu rename to cpp/tests/linalg/eigen_solvers.cu diff --git a/cpp/test/linalg/eltwise.cu b/cpp/tests/linalg/eltwise.cu similarity index 100% rename from cpp/test/linalg/eltwise.cu rename to cpp/tests/linalg/eltwise.cu diff --git a/cpp/test/linalg/gemm_layout.cu b/cpp/tests/linalg/gemm_layout.cu similarity index 100% rename from cpp/test/linalg/gemm_layout.cu rename to cpp/tests/linalg/gemm_layout.cu diff --git a/cpp/test/linalg/gemv.cu b/cpp/tests/linalg/gemv.cu similarity index 100% rename from cpp/test/linalg/gemv.cu rename to cpp/tests/linalg/gemv.cu diff --git a/cpp/test/linalg/map.cu b/cpp/tests/linalg/map.cu similarity index 100% rename from cpp/test/linalg/map.cu rename to cpp/tests/linalg/map.cu diff --git a/cpp/test/linalg/map_then_reduce.cu b/cpp/tests/linalg/map_then_reduce.cu similarity index 100% rename from cpp/test/linalg/map_then_reduce.cu rename to cpp/tests/linalg/map_then_reduce.cu diff --git a/cpp/test/linalg/matrix_vector.cu b/cpp/tests/linalg/matrix_vector.cu similarity index 100% rename from cpp/test/linalg/matrix_vector.cu rename to cpp/tests/linalg/matrix_vector.cu diff --git a/cpp/test/linalg/matrix_vector_op.cu b/cpp/tests/linalg/matrix_vector_op.cu similarity index 100% rename from cpp/test/linalg/matrix_vector_op.cu rename to cpp/tests/linalg/matrix_vector_op.cu diff --git a/cpp/test/linalg/matrix_vector_op.cuh b/cpp/tests/linalg/matrix_vector_op.cuh similarity index 100% rename from cpp/test/linalg/matrix_vector_op.cuh rename to cpp/tests/linalg/matrix_vector_op.cuh diff --git a/cpp/test/linalg/mean_squared_error.cu b/cpp/tests/linalg/mean_squared_error.cu similarity index 100% rename from cpp/test/linalg/mean_squared_error.cu rename to cpp/tests/linalg/mean_squared_error.cu diff --git a/cpp/test/linalg/multiply.cu b/cpp/tests/linalg/multiply.cu similarity index 100% rename from cpp/test/linalg/multiply.cu rename to cpp/tests/linalg/multiply.cu diff --git a/cpp/test/linalg/norm.cu b/cpp/tests/linalg/norm.cu similarity index 100% rename from cpp/test/linalg/norm.cu rename to cpp/tests/linalg/norm.cu diff --git a/cpp/test/linalg/normalize.cu b/cpp/tests/linalg/normalize.cu similarity index 100% rename from cpp/test/linalg/normalize.cu rename to cpp/tests/linalg/normalize.cu diff --git a/cpp/test/linalg/power.cu b/cpp/tests/linalg/power.cu similarity index 100% rename from cpp/test/linalg/power.cu rename to cpp/tests/linalg/power.cu diff --git a/cpp/test/linalg/randomized_svd.cu b/cpp/tests/linalg/randomized_svd.cu similarity index 100% rename from cpp/test/linalg/randomized_svd.cu rename to cpp/tests/linalg/randomized_svd.cu diff --git a/cpp/test/linalg/reduce.cu b/cpp/tests/linalg/reduce.cu similarity index 100% rename from cpp/test/linalg/reduce.cu rename to cpp/tests/linalg/reduce.cu diff --git a/cpp/test/linalg/reduce.cuh b/cpp/tests/linalg/reduce.cuh similarity index 100% rename from cpp/test/linalg/reduce.cuh rename to cpp/tests/linalg/reduce.cuh diff --git a/cpp/test/linalg/reduce_cols_by_key.cu b/cpp/tests/linalg/reduce_cols_by_key.cu similarity index 100% rename from cpp/test/linalg/reduce_cols_by_key.cu rename to cpp/tests/linalg/reduce_cols_by_key.cu diff --git a/cpp/test/linalg/reduce_rows_by_key.cu b/cpp/tests/linalg/reduce_rows_by_key.cu similarity index 100% rename from cpp/test/linalg/reduce_rows_by_key.cu rename to cpp/tests/linalg/reduce_rows_by_key.cu diff --git a/cpp/test/linalg/rsvd.cu b/cpp/tests/linalg/rsvd.cu similarity index 100% rename from cpp/test/linalg/rsvd.cu rename to cpp/tests/linalg/rsvd.cu diff --git a/cpp/test/linalg/sqrt.cu b/cpp/tests/linalg/sqrt.cu similarity index 100% rename from cpp/test/linalg/sqrt.cu rename to cpp/tests/linalg/sqrt.cu diff --git a/cpp/test/linalg/strided_reduction.cu b/cpp/tests/linalg/strided_reduction.cu similarity index 100% rename from cpp/test/linalg/strided_reduction.cu rename to cpp/tests/linalg/strided_reduction.cu diff --git a/cpp/test/linalg/subtract.cu b/cpp/tests/linalg/subtract.cu similarity index 100% rename from cpp/test/linalg/subtract.cu rename to cpp/tests/linalg/subtract.cu diff --git a/cpp/test/linalg/svd.cu b/cpp/tests/linalg/svd.cu similarity index 100% rename from cpp/test/linalg/svd.cu rename to cpp/tests/linalg/svd.cu diff --git a/cpp/test/linalg/ternary_op.cu b/cpp/tests/linalg/ternary_op.cu similarity index 100% rename from cpp/test/linalg/ternary_op.cu rename to cpp/tests/linalg/ternary_op.cu diff --git a/cpp/test/linalg/transpose.cu b/cpp/tests/linalg/transpose.cu similarity index 100% rename from cpp/test/linalg/transpose.cu rename to cpp/tests/linalg/transpose.cu diff --git a/cpp/test/linalg/unary_op.cu b/cpp/tests/linalg/unary_op.cu similarity index 100% rename from cpp/test/linalg/unary_op.cu rename to cpp/tests/linalg/unary_op.cu diff --git a/cpp/test/linalg/unary_op.cuh b/cpp/tests/linalg/unary_op.cuh similarity index 100% rename from cpp/test/linalg/unary_op.cuh rename to cpp/tests/linalg/unary_op.cuh diff --git a/cpp/test/matrix/argmax.cu b/cpp/tests/matrix/argmax.cu similarity index 100% rename from cpp/test/matrix/argmax.cu rename to cpp/tests/matrix/argmax.cu diff --git a/cpp/test/matrix/argmin.cu b/cpp/tests/matrix/argmin.cu similarity index 100% rename from cpp/test/matrix/argmin.cu rename to cpp/tests/matrix/argmin.cu diff --git a/cpp/test/matrix/columnSort.cu b/cpp/tests/matrix/columnSort.cu similarity index 100% rename from cpp/test/matrix/columnSort.cu rename to cpp/tests/matrix/columnSort.cu diff --git a/cpp/test/matrix/diagonal.cu b/cpp/tests/matrix/diagonal.cu similarity index 100% rename from cpp/test/matrix/diagonal.cu rename to cpp/tests/matrix/diagonal.cu diff --git a/cpp/test/matrix/eye.cu b/cpp/tests/matrix/eye.cu similarity index 100% rename from cpp/test/matrix/eye.cu rename to cpp/tests/matrix/eye.cu diff --git a/cpp/test/matrix/gather.cu b/cpp/tests/matrix/gather.cu similarity index 100% rename from cpp/test/matrix/gather.cu rename to cpp/tests/matrix/gather.cu diff --git a/cpp/test/matrix/linewise_op.cu b/cpp/tests/matrix/linewise_op.cu similarity index 100% rename from cpp/test/matrix/linewise_op.cu rename to cpp/tests/matrix/linewise_op.cu diff --git a/cpp/test/matrix/math.cu b/cpp/tests/matrix/math.cu similarity index 100% rename from cpp/test/matrix/math.cu rename to cpp/tests/matrix/math.cu diff --git a/cpp/test/matrix/matrix.cu b/cpp/tests/matrix/matrix.cu similarity index 100% rename from cpp/test/matrix/matrix.cu rename to cpp/tests/matrix/matrix.cu diff --git a/cpp/test/matrix/norm.cu b/cpp/tests/matrix/norm.cu similarity index 100% rename from cpp/test/matrix/norm.cu rename to cpp/tests/matrix/norm.cu diff --git a/cpp/test/matrix/reverse.cu b/cpp/tests/matrix/reverse.cu similarity index 100% rename from cpp/test/matrix/reverse.cu rename to cpp/tests/matrix/reverse.cu diff --git a/cpp/test/matrix/sample_rows.cu b/cpp/tests/matrix/sample_rows.cu similarity index 100% rename from cpp/test/matrix/sample_rows.cu rename to cpp/tests/matrix/sample_rows.cu diff --git a/cpp/test/matrix/scatter.cu b/cpp/tests/matrix/scatter.cu similarity index 100% rename from cpp/test/matrix/scatter.cu rename to cpp/tests/matrix/scatter.cu diff --git a/cpp/test/matrix/select_k.cu b/cpp/tests/matrix/select_k.cu similarity index 100% rename from cpp/test/matrix/select_k.cu rename to cpp/tests/matrix/select_k.cu diff --git a/cpp/test/matrix/select_k.cuh b/cpp/tests/matrix/select_k.cuh similarity index 100% rename from cpp/test/matrix/select_k.cuh rename to cpp/tests/matrix/select_k.cuh diff --git a/cpp/test/matrix/select_large_k.cu b/cpp/tests/matrix/select_large_k.cu similarity index 100% rename from cpp/test/matrix/select_large_k.cu rename to cpp/tests/matrix/select_large_k.cu diff --git a/cpp/test/matrix/slice.cu b/cpp/tests/matrix/slice.cu similarity index 100% rename from cpp/test/matrix/slice.cu rename to cpp/tests/matrix/slice.cu diff --git a/cpp/test/matrix/triangular.cu b/cpp/tests/matrix/triangular.cu similarity index 100% rename from cpp/test/matrix/triangular.cu rename to cpp/tests/matrix/triangular.cu diff --git a/cpp/test/mr/device/buffer.cpp b/cpp/tests/mr/device/buffer.cpp similarity index 100% rename from cpp/test/mr/device/buffer.cpp rename to cpp/tests/mr/device/buffer.cpp diff --git a/cpp/test/mr/host/buffer.cpp b/cpp/tests/mr/host/buffer.cpp similarity index 100% rename from cpp/test/mr/host/buffer.cpp rename to cpp/tests/mr/host/buffer.cpp diff --git a/cpp/test/neighbors/ball_cover.cu b/cpp/tests/neighbors/ball_cover.cu similarity index 100% rename from cpp/test/neighbors/ball_cover.cu rename to cpp/tests/neighbors/ball_cover.cu diff --git a/cpp/test/neighbors/epsilon_neighborhood.cu b/cpp/tests/neighbors/epsilon_neighborhood.cu similarity index 100% rename from cpp/test/neighbors/epsilon_neighborhood.cu rename to cpp/tests/neighbors/epsilon_neighborhood.cu diff --git a/cpp/test/neighbors/haversine.cu b/cpp/tests/neighbors/haversine.cu similarity index 100% rename from cpp/test/neighbors/haversine.cu rename to cpp/tests/neighbors/haversine.cu diff --git a/cpp/test/neighbors/knn_utils.cuh b/cpp/tests/neighbors/knn_utils.cuh similarity index 100% rename from cpp/test/neighbors/knn_utils.cuh rename to cpp/tests/neighbors/knn_utils.cuh diff --git a/cpp/test/neighbors/spatial_data.h b/cpp/tests/neighbors/spatial_data.h similarity index 100% rename from cpp/test/neighbors/spatial_data.h rename to cpp/tests/neighbors/spatial_data.h diff --git a/cpp/test/random/excess_sampling.cu b/cpp/tests/random/excess_sampling.cu similarity index 100% rename from cpp/test/random/excess_sampling.cu rename to cpp/tests/random/excess_sampling.cu diff --git a/cpp/test/random/make_blobs.cu b/cpp/tests/random/make_blobs.cu similarity index 100% rename from cpp/test/random/make_blobs.cu rename to cpp/tests/random/make_blobs.cu diff --git a/cpp/test/random/make_regression.cu b/cpp/tests/random/make_regression.cu similarity index 100% rename from cpp/test/random/make_regression.cu rename to cpp/tests/random/make_regression.cu diff --git a/cpp/test/random/multi_variable_gaussian.cu b/cpp/tests/random/multi_variable_gaussian.cu similarity index 100% rename from cpp/test/random/multi_variable_gaussian.cu rename to cpp/tests/random/multi_variable_gaussian.cu diff --git a/cpp/test/random/permute.cu b/cpp/tests/random/permute.cu similarity index 100% rename from cpp/test/random/permute.cu rename to cpp/tests/random/permute.cu diff --git a/cpp/test/random/rmat_rectangular_generator.cu b/cpp/tests/random/rmat_rectangular_generator.cu similarity index 100% rename from cpp/test/random/rmat_rectangular_generator.cu rename to cpp/tests/random/rmat_rectangular_generator.cu diff --git a/cpp/test/random/rng.cu b/cpp/tests/random/rng.cu similarity index 100% rename from cpp/test/random/rng.cu rename to cpp/tests/random/rng.cu diff --git a/cpp/test/random/rng_discrete.cu b/cpp/tests/random/rng_discrete.cu similarity index 100% rename from cpp/test/random/rng_discrete.cu rename to cpp/tests/random/rng_discrete.cu diff --git a/cpp/test/random/rng_int.cu b/cpp/tests/random/rng_int.cu similarity index 100% rename from cpp/test/random/rng_int.cu rename to cpp/tests/random/rng_int.cu diff --git a/cpp/test/random/rng_pcg_host_api.cu b/cpp/tests/random/rng_pcg_host_api.cu similarity index 100% rename from cpp/test/random/rng_pcg_host_api.cu rename to cpp/tests/random/rng_pcg_host_api.cu diff --git a/cpp/test/random/sample_without_replacement.cu b/cpp/tests/random/sample_without_replacement.cu similarity index 100% rename from cpp/test/random/sample_without_replacement.cu rename to cpp/tests/random/sample_without_replacement.cu diff --git a/cpp/test/sparse/add.cu b/cpp/tests/sparse/add.cu similarity index 100% rename from cpp/test/sparse/add.cu rename to cpp/tests/sparse/add.cu diff --git a/cpp/test/sparse/convert_coo.cu b/cpp/tests/sparse/convert_coo.cu similarity index 100% rename from cpp/test/sparse/convert_coo.cu rename to cpp/tests/sparse/convert_coo.cu diff --git a/cpp/test/sparse/convert_csr.cu b/cpp/tests/sparse/convert_csr.cu similarity index 100% rename from cpp/test/sparse/convert_csr.cu rename to cpp/tests/sparse/convert_csr.cu diff --git a/cpp/test/sparse/csr_row_slice.cu b/cpp/tests/sparse/csr_row_slice.cu similarity index 100% rename from cpp/test/sparse/csr_row_slice.cu rename to cpp/tests/sparse/csr_row_slice.cu diff --git a/cpp/test/sparse/csr_to_dense.cu b/cpp/tests/sparse/csr_to_dense.cu similarity index 100% rename from cpp/test/sparse/csr_to_dense.cu rename to cpp/tests/sparse/csr_to_dense.cu diff --git a/cpp/test/sparse/csr_transpose.cu b/cpp/tests/sparse/csr_transpose.cu similarity index 100% rename from cpp/test/sparse/csr_transpose.cu rename to cpp/tests/sparse/csr_transpose.cu diff --git a/cpp/test/sparse/degree.cu b/cpp/tests/sparse/degree.cu similarity index 100% rename from cpp/test/sparse/degree.cu rename to cpp/tests/sparse/degree.cu diff --git a/cpp/test/sparse/dist_coo_spmv.cu b/cpp/tests/sparse/dist_coo_spmv.cu similarity index 100% rename from cpp/test/sparse/dist_coo_spmv.cu rename to cpp/tests/sparse/dist_coo_spmv.cu diff --git a/cpp/test/sparse/distance.cu b/cpp/tests/sparse/distance.cu similarity index 100% rename from cpp/test/sparse/distance.cu rename to cpp/tests/sparse/distance.cu diff --git a/cpp/test/sparse/filter.cu b/cpp/tests/sparse/filter.cu similarity index 100% rename from cpp/test/sparse/filter.cu rename to cpp/tests/sparse/filter.cu diff --git a/cpp/test/sparse/masked_matmul.cu b/cpp/tests/sparse/masked_matmul.cu similarity index 100% rename from cpp/test/sparse/masked_matmul.cu rename to cpp/tests/sparse/masked_matmul.cu diff --git a/cpp/test/sparse/mst.cu b/cpp/tests/sparse/mst.cu similarity index 100% rename from cpp/test/sparse/mst.cu rename to cpp/tests/sparse/mst.cu diff --git a/cpp/test/sparse/norm.cu b/cpp/tests/sparse/norm.cu similarity index 100% rename from cpp/test/sparse/norm.cu rename to cpp/tests/sparse/norm.cu diff --git a/cpp/test/sparse/normalize.cu b/cpp/tests/sparse/normalize.cu similarity index 100% rename from cpp/test/sparse/normalize.cu rename to cpp/tests/sparse/normalize.cu diff --git a/cpp/test/sparse/reduce.cu b/cpp/tests/sparse/reduce.cu similarity index 100% rename from cpp/test/sparse/reduce.cu rename to cpp/tests/sparse/reduce.cu diff --git a/cpp/test/sparse/row_op.cu b/cpp/tests/sparse/row_op.cu similarity index 100% rename from cpp/test/sparse/row_op.cu rename to cpp/tests/sparse/row_op.cu diff --git a/cpp/test/sparse/sddmm.cu b/cpp/tests/sparse/sddmm.cu similarity index 100% rename from cpp/test/sparse/sddmm.cu rename to cpp/tests/sparse/sddmm.cu diff --git a/cpp/test/sparse/select_k_csr.cu b/cpp/tests/sparse/select_k_csr.cu similarity index 100% rename from cpp/test/sparse/select_k_csr.cu rename to cpp/tests/sparse/select_k_csr.cu diff --git a/cpp/test/sparse/solver/lanczos.cu b/cpp/tests/sparse/solver/lanczos.cu similarity index 100% rename from cpp/test/sparse/solver/lanczos.cu rename to cpp/tests/sparse/solver/lanczos.cu diff --git a/cpp/test/sparse/sort.cu b/cpp/tests/sparse/sort.cu similarity index 100% rename from cpp/test/sparse/sort.cu rename to cpp/tests/sparse/sort.cu diff --git a/cpp/test/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu similarity index 100% rename from cpp/test/sparse/spectral_matrix.cu rename to cpp/tests/sparse/spectral_matrix.cu diff --git a/cpp/test/sparse/spgemmi.cu b/cpp/tests/sparse/spgemmi.cu similarity index 100% rename from cpp/test/sparse/spgemmi.cu rename to cpp/tests/sparse/spgemmi.cu diff --git a/cpp/test/sparse/spmm.cu b/cpp/tests/sparse/spmm.cu similarity index 100% rename from cpp/test/sparse/spmm.cu rename to cpp/tests/sparse/spmm.cu diff --git a/cpp/test/sparse/symmetrize.cu b/cpp/tests/sparse/symmetrize.cu similarity index 100% rename from cpp/test/sparse/symmetrize.cu rename to cpp/tests/sparse/symmetrize.cu diff --git a/cpp/test/stats/accuracy.cu b/cpp/tests/stats/accuracy.cu similarity index 100% rename from cpp/test/stats/accuracy.cu rename to cpp/tests/stats/accuracy.cu diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/tests/stats/adjusted_rand_index.cu similarity index 100% rename from cpp/test/stats/adjusted_rand_index.cu rename to cpp/tests/stats/adjusted_rand_index.cu diff --git a/cpp/test/stats/completeness_score.cu b/cpp/tests/stats/completeness_score.cu similarity index 100% rename from cpp/test/stats/completeness_score.cu rename to cpp/tests/stats/completeness_score.cu diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/tests/stats/contingencyMatrix.cu similarity index 100% rename from cpp/test/stats/contingencyMatrix.cu rename to cpp/tests/stats/contingencyMatrix.cu diff --git a/cpp/test/stats/cov.cu b/cpp/tests/stats/cov.cu similarity index 100% rename from cpp/test/stats/cov.cu rename to cpp/tests/stats/cov.cu diff --git a/cpp/test/stats/dispersion.cu b/cpp/tests/stats/dispersion.cu similarity index 100% rename from cpp/test/stats/dispersion.cu rename to cpp/tests/stats/dispersion.cu diff --git a/cpp/test/stats/entropy.cu b/cpp/tests/stats/entropy.cu similarity index 100% rename from cpp/test/stats/entropy.cu rename to cpp/tests/stats/entropy.cu diff --git a/cpp/test/stats/histogram.cu b/cpp/tests/stats/histogram.cu similarity index 100% rename from cpp/test/stats/histogram.cu rename to cpp/tests/stats/histogram.cu diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/tests/stats/homogeneity_score.cu similarity index 100% rename from cpp/test/stats/homogeneity_score.cu rename to cpp/tests/stats/homogeneity_score.cu diff --git a/cpp/test/stats/information_criterion.cu b/cpp/tests/stats/information_criterion.cu similarity index 100% rename from cpp/test/stats/information_criterion.cu rename to cpp/tests/stats/information_criterion.cu diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/tests/stats/kl_divergence.cu similarity index 100% rename from cpp/test/stats/kl_divergence.cu rename to cpp/tests/stats/kl_divergence.cu diff --git a/cpp/test/stats/mean.cu b/cpp/tests/stats/mean.cu similarity index 100% rename from cpp/test/stats/mean.cu rename to cpp/tests/stats/mean.cu diff --git a/cpp/test/stats/mean_center.cu b/cpp/tests/stats/mean_center.cu similarity index 100% rename from cpp/test/stats/mean_center.cu rename to cpp/tests/stats/mean_center.cu diff --git a/cpp/test/stats/meanvar.cu b/cpp/tests/stats/meanvar.cu similarity index 100% rename from cpp/test/stats/meanvar.cu rename to cpp/tests/stats/meanvar.cu diff --git a/cpp/test/stats/minmax.cu b/cpp/tests/stats/minmax.cu similarity index 100% rename from cpp/test/stats/minmax.cu rename to cpp/tests/stats/minmax.cu diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/tests/stats/mutual_info_score.cu similarity index 100% rename from cpp/test/stats/mutual_info_score.cu rename to cpp/tests/stats/mutual_info_score.cu diff --git a/cpp/test/stats/r2_score.cu b/cpp/tests/stats/r2_score.cu similarity index 100% rename from cpp/test/stats/r2_score.cu rename to cpp/tests/stats/r2_score.cu diff --git a/cpp/test/stats/rand_index.cu b/cpp/tests/stats/rand_index.cu similarity index 100% rename from cpp/test/stats/rand_index.cu rename to cpp/tests/stats/rand_index.cu diff --git a/cpp/test/stats/regression_metrics.cu b/cpp/tests/stats/regression_metrics.cu similarity index 100% rename from cpp/test/stats/regression_metrics.cu rename to cpp/tests/stats/regression_metrics.cu diff --git a/cpp/test/stats/stddev.cu b/cpp/tests/stats/stddev.cu similarity index 100% rename from cpp/test/stats/stddev.cu rename to cpp/tests/stats/stddev.cu diff --git a/cpp/test/stats/sum.cu b/cpp/tests/stats/sum.cu similarity index 100% rename from cpp/test/stats/sum.cu rename to cpp/tests/stats/sum.cu diff --git a/cpp/test/stats/v_measure.cu b/cpp/tests/stats/v_measure.cu similarity index 100% rename from cpp/test/stats/v_measure.cu rename to cpp/tests/stats/v_measure.cu diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/tests/stats/weighted_mean.cu similarity index 100% rename from cpp/test/stats/weighted_mean.cu rename to cpp/tests/stats/weighted_mean.cu diff --git a/cpp/test/test.cpp b/cpp/tests/test.cpp similarity index 100% rename from cpp/test/test.cpp rename to cpp/tests/test.cpp diff --git a/cpp/test/test_utils.cuh b/cpp/tests/test_utils.cuh similarity index 100% rename from cpp/test/test_utils.cuh rename to cpp/tests/test_utils.cuh diff --git a/cpp/test/test_utils.h b/cpp/tests/test_utils.h similarity index 100% rename from cpp/test/test_utils.h rename to cpp/tests/test_utils.h diff --git a/cpp/test/util/bitonic_sort.cu b/cpp/tests/util/bitonic_sort.cu similarity index 100% rename from cpp/test/util/bitonic_sort.cu rename to cpp/tests/util/bitonic_sort.cu diff --git a/cpp/test/util/cudart_utils.cpp b/cpp/tests/util/cudart_utils.cpp similarity index 100% rename from cpp/test/util/cudart_utils.cpp rename to cpp/tests/util/cudart_utils.cpp diff --git a/cpp/test/util/device_atomics.cu b/cpp/tests/util/device_atomics.cu similarity index 100% rename from cpp/test/util/device_atomics.cu rename to cpp/tests/util/device_atomics.cu diff --git a/cpp/test/util/integer_utils.cpp b/cpp/tests/util/integer_utils.cpp similarity index 100% rename from cpp/test/util/integer_utils.cpp rename to cpp/tests/util/integer_utils.cpp diff --git a/cpp/test/util/integer_utils.cu b/cpp/tests/util/integer_utils.cu similarity index 100% rename from cpp/test/util/integer_utils.cu rename to cpp/tests/util/integer_utils.cu diff --git a/cpp/test/util/memory_type_dispatcher.cu b/cpp/tests/util/memory_type_dispatcher.cu similarity index 100% rename from cpp/test/util/memory_type_dispatcher.cu rename to cpp/tests/util/memory_type_dispatcher.cu diff --git a/cpp/test/util/popc.cu b/cpp/tests/util/popc.cu similarity index 100% rename from cpp/test/util/popc.cu rename to cpp/tests/util/popc.cu diff --git a/cpp/test/util/pow2_utils.cu b/cpp/tests/util/pow2_utils.cu similarity index 100% rename from cpp/test/util/pow2_utils.cu rename to cpp/tests/util/pow2_utils.cu diff --git a/cpp/test/util/reduction.cu b/cpp/tests/util/reduction.cu similarity index 100% rename from cpp/test/util/reduction.cu rename to cpp/tests/util/reduction.cu diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index 6240b2638b..1a2626f2b2 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -211,7 +211,7 @@ This will bring up an interactive prompt to select which spelling fixes to apply Manually, run the following to bulk-fix include style issues: ```bash -python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list of folders which you want to fix] +python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/tests ... list of folders which you want to fix] ``` ### Copyright header @@ -298,9 +298,9 @@ RAFT is a heavily templated library. Several core functions are expensive to com **Macros.** We define the macros `RAFT_COMPILED` and `RAFT_EXPLICIT_INSTANTIATE_ONLY`. The `RAFT_COMPILED` macro is defined by `CMake` when compiling code that (1) is part of `libraft.so` or (2) is linked with `libraft.so`. It indicates that a precompiled `libraft.so` is present at runtime. -The `RAFT_EXPLICIT_INSTANTIATE_ONLY` macro is defined by `CMake` during compilation of `libraft.so` itself. When defined, it indicates that implicit instantiations of expensive function templates are forbidden (they result in a compiler error). In the RAFT project, we additionally define this macro during compilation of the tests and benchmarks. +The `RAFT_EXPLICIT_INSTANTIATE_ONLY` macro is defined by `CMake` during compilation of `libraft.so` itself. When defined, it indicates that implicit instantiations of expensive function templates are forbidden (they result in a compiler error). In the RAFT project, we additionally define this macro during compilation of the tests and benchmarks. -Below, we summarize which combinations of `RAFT_COMPILED` and `RAFT_EXPLICIT_INSTANTIATE_ONLY` are used in practice and what the effect of the combination is. +Below, we summarize which combinations of `RAFT_COMPILED` and `RAFT_EXPLICIT_INSTANTIATE_ONLY` are used in practice and what the effect of the combination is. | RAFT_COMPILED | RAFT_EXPLICIT_INSTANTIATE_ONLY | Which targets | |---------------|--------------------------------|------------------------------------------------------------------------------------------------------| @@ -349,7 +349,7 @@ The file `expensive-ext.cuh` contains the following: #ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY namespace raft { -// (1) define templates to raise an error in case of accidental instantiation +// (1) define templates to raise an error in case of accidental instantiation template void expensive(T arg) RAFT_EXPLICIT; } // namespace raft #endif //RAFT_EXPLICIT_INSTANTIATE_ONLY @@ -371,7 +371,7 @@ template void raft::expensive(int); template void raft::expensive(float); ``` -**Design considerations**: +**Design considerations**: 1. In the `-ext.cuh` header, do not include implementation headers. Only include function parameter types and types that are used to instantiate the templates. If a primitive takes custom parameter types, define them in a separate header called `_types.hpp`. (see [Common Design Considerations](https://github.com/rapidsai/raft/blob/7b065aff81a0b1976e2a9e2f3de6690361a1111b/docs/source/developer_guide.md#common-design-considerations)). @@ -381,7 +381,7 @@ template void raft::expensive(float); 4. If a header file defines multiple expensive templates, it can be that one of them is not instantiated. In this case, **do define** the template with `RAFT_EXPLICIT` in the `-ext` header. This way, when the template is instantiated, the developer gets a helpful error message instead of a confusing "function not found". -This header structure was proposed in [issue #1416](https://github.com/rapidsai/raft/issues/1416), which contains more background on the motivation of this structure and the mechanics of C++ template instantiation. +This header structure was proposed in [issue #1416](https://github.com/rapidsai/raft/issues/1416), which contains more background on the motivation of this structure and the mechanics of C++ template instantiation. ## Testing diff --git a/pyproject.toml b/pyproject.toml index 2f23debfbe..460c0312a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ ignore_missing_imports = true # they are imported by a checked file. follow_imports = "skip" exclude = [ - "pylibraft/pylibraft/test", + "pylibraft/pylibraft/tests", ] [tool.codespell] diff --git a/python/pylibraft/pylibraft/test/__init__py b/python/pylibraft/pylibraft/tests/__init__py similarity index 100% rename from python/pylibraft/pylibraft/test/__init__py rename to python/pylibraft/pylibraft/tests/__init__py diff --git a/python/pylibraft/pylibraft/test/pytest.ini b/python/pylibraft/pylibraft/tests/pytest.ini similarity index 100% rename from python/pylibraft/pylibraft/test/pytest.ini rename to python/pylibraft/pylibraft/tests/pytest.ini diff --git a/python/pylibraft/pylibraft/test/test_cai_wrapper.py b/python/pylibraft/pylibraft/tests/test_cai_wrapper.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_cai_wrapper.py rename to python/pylibraft/pylibraft/tests/test_cai_wrapper.py diff --git a/python/pylibraft/pylibraft/test/test_config.py b/python/pylibraft/pylibraft/tests/test_config.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_config.py rename to python/pylibraft/pylibraft/tests/test_config.py diff --git a/python/pylibraft/pylibraft/test/test_device_ndarray.py b/python/pylibraft/pylibraft/tests/test_device_ndarray.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_device_ndarray.py rename to python/pylibraft/pylibraft/tests/test_device_ndarray.py diff --git a/python/pylibraft/pylibraft/test/test_doctests.py b/python/pylibraft/pylibraft/tests/test_doctests.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_doctests.py rename to python/pylibraft/pylibraft/tests/test_doctests.py diff --git a/python/pylibraft/pylibraft/test/test_handle.py b/python/pylibraft/pylibraft/tests/test_handle.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_handle.py rename to python/pylibraft/pylibraft/tests/test_handle.py diff --git a/python/pylibraft/pylibraft/test/test_mdspan_serializer.py b/python/pylibraft/pylibraft/tests/test_mdspan_serializer.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_mdspan_serializer.py rename to python/pylibraft/pylibraft/tests/test_mdspan_serializer.py diff --git a/python/pylibraft/pylibraft/test/test_random.py b/python/pylibraft/pylibraft/tests/test_random.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_random.py rename to python/pylibraft/pylibraft/tests/test_random.py diff --git a/python/pylibraft/pylibraft/test/test_sparse.py b/python/pylibraft/pylibraft/tests/test_sparse.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_sparse.py rename to python/pylibraft/pylibraft/tests/test_sparse.py diff --git a/python/pylibraft/pylibraft/test/test_version.py b/python/pylibraft/pylibraft/tests/test_version.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_version.py rename to python/pylibraft/pylibraft/tests/test_version.py diff --git a/python/pylibraft/pylibraft/test/test_z_interruptible.py b/python/pylibraft/pylibraft/tests/test_z_interruptible.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_z_interruptible.py rename to python/pylibraft/pylibraft/tests/test_z_interruptible.py diff --git a/python/raft-dask/raft_dask/test/conftest.py b/python/raft-dask/raft_dask/tests/conftest.py similarity index 100% rename from python/raft-dask/raft_dask/test/conftest.py rename to python/raft-dask/raft_dask/tests/conftest.py diff --git a/python/raft-dask/raft_dask/test/pytest.ini b/python/raft-dask/raft_dask/tests/pytest.ini similarity index 100% rename from python/raft-dask/raft_dask/test/pytest.ini rename to python/raft-dask/raft_dask/tests/pytest.ini diff --git a/python/raft-dask/raft_dask/test/test_comms.py b/python/raft-dask/raft_dask/tests/test_comms.py similarity index 100% rename from python/raft-dask/raft_dask/test/test_comms.py rename to python/raft-dask/raft_dask/tests/test_comms.py diff --git a/python/raft-dask/raft_dask/test/test_raft.py b/python/raft-dask/raft_dask/tests/test_raft.py similarity index 100% rename from python/raft-dask/raft_dask/test/test_raft.py rename to python/raft-dask/raft_dask/tests/test_raft.py diff --git a/python/raft-dask/raft_dask/test/test_version.py b/python/raft-dask/raft_dask/tests/test_version.py similarity index 100% rename from python/raft-dask/raft_dask/test/test_version.py rename to python/raft-dask/raft_dask/tests/test_version.py From 85fd74dd32cd10c9ff6bfa73077b7e693a5e22dd Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 24 Jan 2025 03:38:29 -0500 Subject: [PATCH 29/37] Add cuda 12.8 support (#2551) CUDA 12.8 introduces sm_120 that requires a reduced number of threads per sm. We also need to pass `-static-global-template-stub=false` when building with 12.8 as we violate CUDA ODR kernel rules Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Corey J. Nolet (https://github.com/cjnolet) - https://github.com/jakirkham URL: https://github.com/rapidsai/raft/pull/2551 --- cpp/cmake/modules/ConfigureCUDA.cmake | 8 +++++++- cpp/include/raft/neighbors/detail/nn_descent.cuh | 5 +++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index 25b9b0ddf8..fbf4428650 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -29,6 +29,12 @@ if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0) list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings) endif() + + # Allow invalid CUDA kernels in the short term + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0) + list(APPEND RAFT_CUDA_FLAGS -static-global-template-stub=false) + endif() + endif() if(CUDA_LOG_COMPILE_TIME) diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 02610f9afb..64e4a3ea7a 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -709,7 +709,8 @@ template > RAFT_KERNEL #ifdef __CUDA_ARCH__ -#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) +#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) || \ + (__CUDA_ARCH__) == 1200 __launch_bounds__(BLOCK_SIZE) #else __launch_bounds__(BLOCK_SIZE, 4) From 14c92cc37561944fbc76b7d511122b8d2bca627e Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 24 Jan 2025 08:48:58 -0600 Subject: [PATCH 30/37] Update pip devcontainers to UCX 1.18 (#2550) Contributes to https://github.com/rapidsai/build-planning/issues/138 Updates to using UCX 1.18 in pip devcontainers here. Also updates `rapids-dependency-file-generator` and `pre-commit-hooks` hooks to their latest versions. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - https://github.com/jakirkham - Gil Forsyth (https://github.com/gforsyth) URL: https://github.com/rapidsai/raft/pull/2550 --- .devcontainer/cuda11.8-pip/devcontainer.json | 2 +- .devcontainer/cuda12.5-pip/devcontainer.json | 2 +- .pre-commit-config.yaml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index c691ed6007..94b0909f6c 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index bc43900ef3..2bcfa8733f 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ca1efc3abd..4e0cf53c4d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -98,7 +98,7 @@ repos: ^CHANGELOG[.]md$| ^cpp/cmake/patches/cutlass/build-export[.]patch$ - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: check-json - repo: https://github.com/rapidsai/pre-commit-hooks @@ -118,7 +118,7 @@ repos: docs/source/sphinxext/github_link[.]py| - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.16.0 + rev: v1.17.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] From fc4c49010499f2d6954c995495ee14afbb7c4a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Malte=20F=C3=B6rster?= <97973773+mfoerste4@users.noreply.github.com> Date: Fri, 24 Jan 2025 19:17:11 +0100 Subject: [PATCH 31/37] Fix bit order of RMAT Rectangular Generator to match expectation (#2542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the decimal indices of rows/columns of the adjacency matrix did not align with the node-ids created by the algorithm. This PR fixes the bits set for each decision during the computation as described by the docstring. FYI @tfeher Authors: - Malte Fรถrster (https://github.com/mfoerste4) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/raft/pull/2542 --- .../detail/rmat_rectangular_generator.cuh | 4 +- .../random/rmat_rectangular_generator.cuh | 27 +++-- .../random/rmat_rectangular_generator.cu | 98 ++++++++++++++++++- 3 files changed, 116 insertions(+), 13 deletions(-) diff --git a/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh index 24207ba6db..12c01fc5d7 100644 --- a/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh +++ b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh @@ -54,8 +54,8 @@ DI void gen_and_update_bits(IdxT& src_id, } else { src_bit = dst_bit = true; } - if (curr_depth < r_scale) { src_id |= (IdxT(src_bit) << (r_scale - curr_depth - 1)); } - if (curr_depth < c_scale) { dst_id |= (IdxT(dst_bit) << (c_scale - curr_depth - 1)); } + if (curr_depth < r_scale) { src_id |= (IdxT(src_bit) << (curr_depth)); } + if (curr_depth < c_scale) { dst_id |= (IdxT(dst_bit) << (curr_depth)); } } template diff --git a/cpp/include/raft/random/rmat_rectangular_generator.cuh b/cpp/include/raft/random/rmat_rectangular_generator.cuh index 5598b25c8e..cdd89f40dd 100644 --- a/cpp/include/raft/random/rmat_rectangular_generator.cuh +++ b/cpp/include/raft/random/rmat_rectangular_generator.cuh @@ -30,8 +30,18 @@ namespace raft::random { /** * @brief Generate a bipartite RMAT graph for a rectangular adjacency matrix. * - * This is the most general of several overloads of `rmat_rectangular_gen` - * in this file, and thus has the most detailed documentation. + * This function generates a random graph represented by a (sparse) adjacency matrix. As described + * in [1], to generate connections, we recursively subdivide the adjacency matrix into four + * equal-sized partitions, and distribute edges within these partitions with a unequal + * probabilities. The probabilities are described by numbers [a, b, c, d]. We chose the upper left + * partition with probability `a`. The chosen partition is again subdivided into four smaller + * partitions, and the procedure is repeated until we reach a single element (1 x 1 partition). + * + * We can prescribe different probability distribution at each iteariton. The `theta` array stores + * the probability values for each level. + * + * [1] "R-MAT: A Recursive Model for Graph Mining" Deepayan Chakrabarti, Yiping Zhan, Christos + * Faloutsos (2004) https://doi.org/10.1137/1.9781611972740.43 * * @tparam IdxT Type of each node index * @tparam ProbT Data type used for probability distributions (either fp32 or fp64) @@ -49,11 +59,14 @@ namespace raft::random { * @param[out] out_dst Destination node id's [on device]. `out_src` and `out_dst` * together form the struct-of-arrays representation of the same * output data as `out`. - * @param[in] theta distribution of each quadrant at each level of resolution. - * Since these are probabilities, each of the 2x2 matrices for - * each level of the RMAT must sum to one. [on device] - * [dim = max(r_scale, c_scale) x 2 x 2]. Of course, it is assumed - * that each of the group of 2 x 2 numbers all sum up to 1. + * @param[in] theta array [on device] with the distribution of each quadrant at each level of + * resolution. theta = [a0, b0, c0, d0, a1, b1, c1, d1, ...], where + * [a0, b0, c0, d0] defines the probability at the finest level (2x2). + * The last four elements in the array describe the probability in the + * coarsest level (where matrix size = [2^r_scale, 2^c_scale]). + * Since these are probabilities, the four [a_i, b_i, c_i, d_i] values for + * each level of the RMAT must sum to one. + * [dim = max(r_scale, c_scale) x 2 x 2]. * @param[in] r_scale 2^r_scale represents the number of source nodes * @param[in] c_scale 2^c_scale represents the number of destination nodes * diff --git a/cpp/tests/random/rmat_rectangular_generator.cu b/cpp/tests/random/rmat_rectangular_generator.cu index 8d668f7a8a..10c00051b6 100644 --- a/cpp/tests/random/rmat_rectangular_generator.cu +++ b/cpp/tests/random/rmat_rectangular_generator.cu @@ -155,10 +155,10 @@ RAFT_KERNEL compute_hist( size_t idx = (threadIdx.x + blockIdx.x * blockDim.x) * 2; if (idx + 1 < len) { auto src = out[idx], dst = out[idx + 1]; - for (size_t j = 0; j < max_scale; ++j) { - bool src_bit = j < r_scale ? src & (1 << (r_scale - j - 1)) : 0; - bool dst_bit = j < c_scale ? dst & (1 << (c_scale - j - 1)) : 0; - auto idx = j * 4 + src_bit * 2 + dst_bit; + for (size_t bit_pos = 0; bit_pos < max_scale; ++bit_pos) { + bool src_bit = bit_pos < r_scale ? src & (1 << bit_pos) : 0; + bool dst_bit = bit_pos < c_scale ? dst & (1 << bit_pos) : 0; + auto idx = bit_pos * 4 + src_bit * 2 + dst_bit; atomicAdd(hist + idx, 1); } } @@ -393,11 +393,101 @@ const std::vector inputs = { {18, 16, 200000, false, 456789ULL, TOLERANCE}, {18, 16, 200000, true, 456789ULL, TOLERANCE}}; +struct RmatForcedOutputs { + size_t r_scale; + size_t c_scale; + size_t r_node_id; + size_t c_node_id; +}; + +class RmatGenForceTest : public ::testing::TestWithParam { + public: + RmatGenForceTest() + : handle{}, + stream{resource::get_cuda_stream(handle)}, + params{::testing::TestWithParam::GetParam()}, + out{2, stream}, + out_src{1, stream}, + out_dst{1, stream}, + theta{0, stream}, + h_theta{}, + state{0, GeneratorType::GenPC}, + max_scale(std::max(params.r_scale, params.c_scale)) + { + theta.resize(4 * max_scale, stream); + h_theta.resize(theta.size(), 0.f); + for (size_t bit_pos = 0; bit_pos < max_scale; ++bit_pos) { + size_t row_bit = ((params.r_node_id & (1 << bit_pos)) != 0); + size_t col_bit = ((params.c_node_id & (1 << bit_pos)) != 0); + + // now force theta for bit -- 2x2 matrix row major + h_theta[4 * bit_pos + row_bit * 2 + col_bit] = 1.f; + } + + raft::update_device(theta.data(), h_theta.data(), max_scale * 4, stream); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + } + + protected: + void SetUp() override + { + rmat_rectangular_gen(out.data(), + out_src.data(), + out_dst.data(), + theta.data(), + params.r_scale, + params.c_scale, + size_t(1), + stream, + state); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + } + + void validate() + { + std::vector h_out(2, size_t(0)); + raft::update_host(h_out.data(), out.data(), 2, stream); + RAFT_CUDA_TRY(cudaGetLastError()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + + std::vector h_out_expect; + h_out_expect.push_back(params.r_node_id); + h_out_expect.push_back(params.c_node_id); + + ASSERT_TRUE(hostVecMatch(h_out_expect, h_out, raft::Compare())); + } + + protected: + raft::resources handle; + cudaStream_t stream; + + RmatForcedOutputs params; + size_t max_scale; + std::vector h_theta; + rmm::device_uvector out, out_src, out_dst; + rmm::device_uvector theta; + RngState state; +}; + +const std::vector forcedInputs = {{16, 16, 12425, 1233}, + {16, 16, 12, 424}, + {5, 5, 15, 15}, + {5, 6, 15, 15}, + {5, 15, 15, 15}, + {6, 5, 15, 15}, + {15, 5, 15, 15}, + {32, 16, 1253163, 60000}, + {16, 16, 12, 0}, + {16, 16, 0, 1255}}; + TEST_P(RmatGenTest, Result) { validate(); } INSTANTIATE_TEST_SUITE_P(RmatGenTests, RmatGenTest, ::testing::ValuesIn(inputs)); TEST_P(RmatGenMdspanTest, Result) { validate(); } INSTANTIATE_TEST_SUITE_P(RmatGenMdspanTests, RmatGenMdspanTest, ::testing::ValuesIn(inputs)); +TEST_P(RmatGenForceTest, Result) { validate(); } +INSTANTIATE_TEST_SUITE_P(RmatGenForceTests, RmatGenForceTest, ::testing::ValuesIn(forcedInputs)); + } // namespace random } // namespace raft From ef4a7e1acd5151e60d2489ecef4991605891f008 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 24 Jan 2025 19:44:20 -0600 Subject: [PATCH 32/37] Normalize whitespace (#2547) This PR applies `pre-commit` hooks to normalize whitespace (trimming trailing whitespace and enforcing consistent end-of-file newlines). These rules are already applied to most other RAPIDS repos, so this PR aligns with the norm in RAPIDS. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/raft/pull/2547 --- .pre-commit-config.yaml | 7 ++++++- build.sh | 1 - cpp/cmake/patches/cutlass/build-export.patch | 5 ++--- cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh | 2 +- cpp/include/raft/cluster/detail/mst.cuh | 2 +- cpp/include/raft/cluster/detail/single_linkage.cuh | 2 +- cpp/include/raft/common/nvtx.hpp | 2 +- cpp/include/raft/core/coo_matrix.hpp | 2 +- cpp/include/raft/core/csr_matrix.hpp | 2 +- cpp/include/raft/core/detail/mdspan_util.cuh | 2 +- cpp/include/raft/core/device_coo_matrix.hpp | 2 +- cpp/include/raft/core/device_csr_matrix.hpp | 2 +- cpp/include/raft/core/device_span.hpp | 2 +- cpp/include/raft/core/host_coo_matrix.hpp | 2 +- cpp/include/raft/core/host_csr_matrix.hpp | 2 +- cpp/include/raft/core/host_mdarray.hpp | 2 +- cpp/include/raft/core/host_span.hpp | 2 +- cpp/include/raft/core/resource/device_id.hpp | 2 +- cpp/include/raft/core/resource/device_properties.hpp | 2 +- cpp/include/raft/core/resource/sub_comms.hpp | 2 +- cpp/include/raft/core/sparse_types.hpp | 2 +- .../raft/distance/detail/fused_distance_nn/gemm.h | 2 +- .../raft/distance/detail/pairwise_distance_gemm.h | 2 +- cpp/include/raft/distance/fused_distance_nn.cuh | 2 +- cpp/include/raft/label/classlabels.cuh | 2 +- cpp/include/raft/label/detail/merge_labels.cuh | 2 +- cpp/include/raft/label/merge_labels.cuh | 2 +- cpp/include/raft/linalg/cholesky_r1_update.cuh | 2 +- cpp/include/raft/linalg/coalesced_reduction.cuh | 2 +- cpp/include/raft/linalg/detail/add.cuh | 2 +- cpp/include/raft/linalg/divide.cuh | 2 +- cpp/include/raft/linalg/eig.cuh | 2 +- cpp/include/raft/linalg/eltwise.cuh | 2 +- cpp/include/raft/linalg/gemv.cuh | 2 +- cpp/include/raft/linalg/linalg_types.hpp | 2 +- cpp/include/raft/linalg/lstsq.cuh | 2 +- cpp/include/raft/linalg/map_reduce.cuh | 2 +- cpp/include/raft/linalg/map_then_reduce.cuh | 2 +- cpp/include/raft/linalg/matrix_vector.cuh | 2 +- cpp/include/raft/linalg/multiply.cuh | 2 +- cpp/include/raft/linalg/power.cuh | 2 +- cpp/include/raft/linalg/qr.cuh | 2 +- cpp/include/raft/linalg/reduce.cuh | 2 +- cpp/include/raft/linalg/reduce_cols_by_key.cuh | 2 +- cpp/include/raft/linalg/reduce_rows_by_key.cuh | 2 +- cpp/include/raft/linalg/rsvd.cuh | 2 +- cpp/include/raft/linalg/sqrt.cuh | 2 +- cpp/include/raft/linalg/strided_reduction.cuh | 2 +- cpp/include/raft/linalg/subtract.cuh | 2 +- cpp/include/raft/linalg/svd.cuh | 2 +- cpp/include/raft/matrix/col_wise_sort.cuh | 2 +- cpp/include/raft/matrix/detail/gather_inplace.cuh | 2 +- cpp/include/raft/matrix/detail/scatter_inplace.cuh | 2 +- cpp/include/raft/matrix/math.hpp | 2 +- cpp/include/raft/matrix/norm.cuh | 2 +- cpp/include/raft/matrix/reverse.cuh | 2 +- cpp/include/raft/matrix/scatter.cuh | 2 +- .../neighbors/detail/cagra/compute_distance_vpq.cuh | 2 +- cpp/include/raft/neighbors/detail/div_utils.hpp | 2 +- cpp/include/raft/neighbors/ivf_flat_codepacker.hpp | 2 +- cpp/include/raft/random/detail/curand_wrappers.hpp | 2 +- cpp/include/raft/random/detail/permute.cuh | 2 +- cpp/include/raft/random/make_blobs.cuh | 2 +- cpp/include/raft/random/sample_without_replacement.cuh | 2 +- cpp/include/raft/solver/linear_assignment.cuh | 2 +- cpp/include/raft/sparse/convert/coo.cuh | 2 +- cpp/include/raft/sparse/convert/dense.cuh | 2 +- cpp/include/raft/sparse/convert/detail/coo.cuh | 2 +- cpp/include/raft/sparse/convert/detail/dense.cuh | 2 +- cpp/include/raft/sparse/detail/cusparse_macros.h | 2 +- cpp/include/raft/sparse/distance/detail/common.hpp | 2 +- .../coo_spmv_strategies/coo_mask_row_iterators.cuh | 2 +- .../detail/coo_spmv_strategies/dense_smem_strategy.cuh | 2 +- cpp/include/raft/sparse/distance/distance.cuh | 2 +- cpp/include/raft/sparse/linalg/add.cuh | 2 +- cpp/include/raft/sparse/linalg/degree.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/norm.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/transpose.h | 2 +- cpp/include/raft/sparse/linalg/norm.cuh | 2 +- cpp/include/raft/sparse/linalg/spectral.cuh | 2 +- cpp/include/raft/sparse/linalg/symmetrize.cuh | 2 +- cpp/include/raft/sparse/linalg/transpose.cuh | 2 +- .../raft/sparse/neighbors/cross_component_nn.cuh | 2 +- cpp/include/raft/sparse/op/filter.cuh | 2 +- cpp/include/raft/sparse/op/reduce.cuh | 2 +- cpp/include/raft/sparse/op/row_op.cuh | 2 +- cpp/include/raft/sparse/op/slice.cuh | 2 +- cpp/include/raft/sparse/solver/lanczos.cuh | 2 +- cpp/include/raft/spectral/cluster_solvers.cuh | 2 +- .../raft/spectral/cluster_solvers_deprecated.cuh | 2 +- cpp/include/raft/spectral/modularity_maximization.cuh | 2 +- cpp/include/raft/spectral/partition.cuh | 2 +- cpp/include/raft/stats/accuracy.cuh | 2 +- cpp/include/raft/stats/adjusted_rand_index.cuh | 2 +- cpp/include/raft/stats/completeness_score.cuh | 2 +- cpp/include/raft/stats/contingency_matrix.cuh | 2 +- cpp/include/raft/stats/cov.cuh | 2 +- cpp/include/raft/stats/detail/mean.cuh | 2 +- cpp/include/raft/stats/detail/stddev.cuh | 2 +- cpp/include/raft/stats/detail/sum.cuh | 2 +- cpp/include/raft/stats/detail/weighted_mean.cuh | 2 +- cpp/include/raft/stats/dispersion.cuh | 2 +- cpp/include/raft/stats/entropy.cuh | 2 +- cpp/include/raft/stats/homogeneity_score.cuh | 2 +- cpp/include/raft/stats/mean.cuh | 2 +- cpp/include/raft/stats/mean_center.cuh | 2 +- cpp/include/raft/stats/minmax.cuh | 2 +- cpp/include/raft/stats/mutual_info_score.cuh | 2 +- cpp/include/raft/stats/r2_score.cuh | 2 +- cpp/include/raft/stats/rand_index.cuh | 2 +- cpp/include/raft/stats/regression_metrics.cuh | 2 +- cpp/include/raft/stats/silhouette_score.cuh | 2 +- cpp/include/raft/stats/stddev.cuh | 2 +- cpp/include/raft/stats/sum.cuh | 2 +- cpp/include/raft/stats/trustworthiness_score.cuh | 2 +- cpp/include/raft/stats/v_measure.cuh | 2 +- cpp/include/raft/stats/weighted_mean.cuh | 2 +- .../raft/thirdparty/mdspan/.github/workflows/cmake.yml | 10 +++++----- cpp/include/raft/thirdparty/mdspan/LICENSE | 8 ++++---- cpp/include/raft/thirdparty/mdspan/README.md | 1 - .../mdspan/benchmarks/sum/cuda/CMakeLists.txt | 2 +- .../mdspan/benchmarks/sum/openmp/CMakeLists.txt | 2 +- .../mdspan/benchmarks/sum/openmp/sum_3d_openmp.cpp | 1 - .../mdspan/benchmarks/sum/sum_submdspan_right.cpp | 1 - .../compilation_tests/ctest_compressed_pair_layout.cpp | 1 - .../mdspan/compilation_tests/ctest_extents_ctors.cpp | 1 - .../compilation_tests/ctest_layout_convertible.cpp | 2 -- .../compilation_tests/ctest_mdspan_convertible.cpp | 1 - .../compilation_tests/ctest_no_unique_address.cpp | 2 -- .../mdspan/compilation_tests/ctest_standard_layout.cpp | 3 --- .../compilation_tests/ctest_trivially_copyable.cpp | 3 --- .../examples/tiled_layout/simple_tiled_layout.cpp | 1 - .../experimental/__p0009_bits/aligned_accessor.hpp | 2 +- .../include/experimental/__p0009_bits/extents.hpp | 2 +- .../include/experimental/__p0009_bits/layout_left.hpp | 1 - .../experimental/__p0009_bits/layout_padded.hpp | 4 ++-- .../include/experimental/__p0009_bits/layout_right.hpp | 1 - .../experimental/__p0009_bits/no_unique_address.hpp | 4 ++-- .../include/experimental/__p0009_bits/type_list.hpp | 1 - .../thirdparty/mdspan/include/experimental/mdarray | 1 - .../raft/thirdparty/mdspan/make_single_header.py | 1 - .../raft/thirdparty/mdspan/tests/CMakeLists.txt | 1 - .../mdspan/tests/test_exhaustive_layouts.cpp | 1 - .../thirdparty/mdspan/tests/test_layout_stride.cpp | 1 - .../thirdparty/mdspan/tests/test_mdarray_ctors.cpp | 2 +- .../raft/thirdparty/mdspan/tests/test_mdspan_ctors.cpp | 2 +- cpp/include/raft/util/detail/popc.cuh | 2 +- cpp/include/raft/util/input_validation.hpp | 2 +- cpp/include/raft/util/warp_primitives.cuh | 2 +- cpp/scripts/run-clang-compile.py | 4 ++-- cpp/scripts/run-clang-tidy.py | 4 ++-- cpp/scripts/run-cmake-format.sh | 2 +- cpp/tests/linalg/cholesky_r1.cu | 2 +- cpp/tests/matrix/argmax.cu | 2 +- cpp/tests/matrix/argmin.cu | 2 +- cpp/tests/matrix/diagonal.cu | 2 +- cpp/tests/matrix/gather.cu | 2 +- cpp/tests/matrix/scatter.cu | 2 +- cpp/tests/mr/device/buffer.cpp | 2 +- cpp/tests/mr/host/buffer.cpp | 2 +- cpp/tests/neighbors/spatial_data.h | 2 +- cpp/tests/stats/weighted_mean.cu | 2 +- cpp/tests/test_utils.cuh | 2 +- docs/README.md | 2 +- docs/source/_static/references.css | 2 +- docs/source/contributing.md | 2 -- docs/source/cpp_api.rst | 2 +- docs/source/cpp_api/core.rst | 2 +- docs/source/cpp_api/core_bitmap.rst | 2 +- docs/source/cpp_api/core_bitset.rst | 2 +- docs/source/cpp_api/core_kvp.rst | 1 - docs/source/cpp_api/core_logger.rst | 1 - docs/source/cpp_api/core_nvtx.rst | 2 -- docs/source/cpp_api/linalg.rst | 4 ++-- docs/source/cpp_api/linalg_arithmetic.rst | 1 - docs/source/cpp_api/linalg_matrix.rst | 1 - docs/source/cpp_api/linalg_matrix_vector.rst | 1 - docs/source/cpp_api/matrix_manipulation.rst | 1 - docs/source/cpp_api/matrix_reduction.rst | 2 +- docs/source/cpp_api/mdspan_representation.rst | 2 -- docs/source/cpp_api/mdspan_span.rst | 1 - docs/source/cpp_api/mnmg.rst | 1 - docs/source/cpp_api/random.rst | 1 - docs/source/cpp_api/random_datagen.rst | 1 - .../cpp_api/random_sampling_without_replacement.rst | 2 -- docs/source/cpp_api/sparse.rst | 1 - docs/source/cpp_api/sparse_types_coo_matrix.rst | 1 - docs/source/cpp_api/sparse_types_csr_matrix.rst | 1 - docs/source/cpp_api/stats_classification.rst | 1 - docs/source/cpp_api/stats_probability.rst | 1 - docs/source/cpp_api/stats_regression.rst | 2 -- docs/source/pylibraft_api/random.rst | 2 +- docs/source/pylibraft_api/sparse.rst | 2 +- python/pylibraft/.coveragerc | 2 +- python/pylibraft/pylibraft/tests/pytest.ini | 1 - python/raft-dask/.coveragerc | 2 +- python/raft-dask/raft_dask/tests/pytest.ini | 1 - thirdparty/LICENSES/LICENSE.ann-benchmark | 2 +- thirdparty/LICENSES/LICENSE.faiss | 2 +- thirdparty/LICENSES/LICENSE.pytorch | 2 +- thirdparty/LICENSES/mdarray.license | 2 +- 201 files changed, 177 insertions(+), 226 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4e0cf53c4d..6dfcc72417 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,11 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: diff --git a/build.sh b/build.sh index de3ebfa3c5..8f388e549c 100755 --- a/build.sh +++ b/build.sh @@ -473,4 +473,3 @@ if hasArg docs; then cd ${SPHINX_BUILD_DIR} sphinx-build -b html source _html fi - diff --git a/cpp/cmake/patches/cutlass/build-export.patch b/cpp/cmake/patches/cutlass/build-export.patch index a6423e9c08..31bbd25102 100644 --- a/cpp/cmake/patches/cutlass/build-export.patch +++ b/cpp/cmake/patches/cutlass/build-export.patch @@ -20,8 +20,7 @@ index 7419bdf5e..545384d82 100755 - $ - $ ) - + # Mark CTK headers as system to supress warnings from them --- +-- 2.34.1 - diff --git a/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh b/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh index 97755351c4..f3e2c78584 100644 --- a/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh @@ -227,4 +227,4 @@ void find_k(raft::resources const& handle, n_iter); } } -} // namespace raft::cluster::detail \ No newline at end of file +} // namespace raft::cluster::detail diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 55becc8e15..2b77ca9963 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -204,4 +204,4 @@ void build_sorted_mst( raft::copy_async(mst_weight, mst_coo.weights.data(), mst_coo.n_edges, stream); } -}; // namespace raft::cluster::detail \ No newline at end of file +}; // namespace raft::cluster::detail diff --git a/cpp/include/raft/cluster/detail/single_linkage.cuh b/cpp/include/raft/cluster/detail/single_linkage.cuh index ccc6472684..0a21271271 100644 --- a/cpp/include/raft/cluster/detail/single_linkage.cuh +++ b/cpp/include/raft/cluster/detail/single_linkage.cuh @@ -122,4 +122,4 @@ void single_linkage(raft::resources const& handle, out->n_leaves = m; out->n_connected_components = 1; } -}; // namespace raft::cluster::detail \ No newline at end of file +}; // namespace raft::cluster::detail diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index 385bc544b0..1cd77ca665 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -21,4 +21,4 @@ #pragma once -#include \ No newline at end of file +#include diff --git a/cpp/include/raft/core/coo_matrix.hpp b/cpp/include/raft/core/coo_matrix.hpp index 52ac69f163..b812e28206 100644 --- a/cpp/include/raft/core/coo_matrix.hpp +++ b/cpp/include/raft/core/coo_matrix.hpp @@ -297,4 +297,4 @@ class coo_matrix /** @} */ -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/core/csr_matrix.hpp b/cpp/include/raft/core/csr_matrix.hpp index 1113cc2023..4f7679bbae 100644 --- a/cpp/include/raft/core/csr_matrix.hpp +++ b/cpp/include/raft/core/csr_matrix.hpp @@ -309,4 +309,4 @@ class csr_matrix /** @} */ -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/core/detail/mdspan_util.cuh b/cpp/include/raft/core/detail/mdspan_util.cuh index ded95c2f31..d3438bc07d 100644 --- a/cpp/include/raft/core/detail/mdspan_util.cuh +++ b/cpp/include/raft/core/detail/mdspan_util.cuh @@ -67,4 +67,4 @@ MDSPAN_INLINE_FUNCTION auto popc(uint64_t v) -> int32_t #endif // compiler } -} // end namespace raft::detail \ No newline at end of file +} // end namespace raft::detail diff --git a/cpp/include/raft/core/device_coo_matrix.hpp b/cpp/include/raft/core/device_coo_matrix.hpp index 41da605ff0..4ed67d5fc5 100644 --- a/cpp/include/raft/core/device_coo_matrix.hpp +++ b/cpp/include/raft/core/device_coo_matrix.hpp @@ -395,4 +395,4 @@ auto make_device_coordinate_structure_view(raft::device_span rows, /** @} */ -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/core/device_csr_matrix.hpp b/cpp/include/raft/core/device_csr_matrix.hpp index 1d23c8912d..b0dbfa000d 100644 --- a/cpp/include/raft/core/device_csr_matrix.hpp +++ b/cpp/include/raft/core/device_csr_matrix.hpp @@ -422,4 +422,4 @@ auto make_device_compressed_structure_view(raft::device_span indptr, /** @} */ -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/core/device_span.hpp b/cpp/include/raft/core/device_span.hpp index d3350b5e3a..abf72b6b2e 100644 --- a/cpp/include/raft/core/device_span.hpp +++ b/cpp/include/raft/core/device_span.hpp @@ -34,4 +34,4 @@ using device_span = span; /** * @} */ -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/include/raft/core/host_coo_matrix.hpp b/cpp/include/raft/core/host_coo_matrix.hpp index 7a216dc8a2..e0f95d2a77 100644 --- a/cpp/include/raft/core/host_coo_matrix.hpp +++ b/cpp/include/raft/core/host_coo_matrix.hpp @@ -393,4 +393,4 @@ auto make_host_coordinate_structure_view(raft::host_span rows, /** @} */ -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/core/host_csr_matrix.hpp b/cpp/include/raft/core/host_csr_matrix.hpp index e3cea3cd27..8a29d957f6 100644 --- a/cpp/include/raft/core/host_csr_matrix.hpp +++ b/cpp/include/raft/core/host_csr_matrix.hpp @@ -423,4 +423,4 @@ auto make_host_compressed_structure_view(raft::host_span indptr, /** @} */ -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/core/host_mdarray.hpp b/cpp/include/raft/core/host_mdarray.hpp index 3020cde32d..229619999d 100644 --- a/cpp/include/raft/core/host_mdarray.hpp +++ b/cpp/include/raft/core/host_mdarray.hpp @@ -253,4 +253,4 @@ auto make_host_vector(IndexType n) return make_host_mdarray(make_extents(n)); } -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/include/raft/core/host_span.hpp b/cpp/include/raft/core/host_span.hpp index 36978dfca4..d31f8b4c30 100644 --- a/cpp/include/raft/core/host_span.hpp +++ b/cpp/include/raft/core/host_span.hpp @@ -35,4 +35,4 @@ using host_span = span; * @} */ -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/include/raft/core/resource/device_id.hpp b/cpp/include/raft/core/resource/device_id.hpp index 570d815780..a371f9ddde 100644 --- a/cpp/include/raft/core/resource/device_id.hpp +++ b/cpp/include/raft/core/resource/device_id.hpp @@ -73,4 +73,4 @@ inline int get_device_id(resources const& res) /** * @} */ -} // namespace raft::resource \ No newline at end of file +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/device_properties.hpp b/cpp/include/raft/core/resource/device_properties.hpp index a87c29f709..7ac780ef16 100644 --- a/cpp/include/raft/core/resource/device_properties.hpp +++ b/cpp/include/raft/core/resource/device_properties.hpp @@ -75,4 +75,4 @@ inline cudaDeviceProp& get_device_properties(resources const& res) /** * @} */ -} // namespace raft::resource \ No newline at end of file +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/sub_comms.hpp b/cpp/include/raft/core/resource/sub_comms.hpp index 11d2aed1e0..b4fef75d57 100644 --- a/cpp/include/raft/core/resource/sub_comms.hpp +++ b/cpp/include/raft/core/resource/sub_comms.hpp @@ -79,4 +79,4 @@ inline void set_subcomm(resources const& res, * @} */ -} // namespace raft::resource \ No newline at end of file +} // namespace raft::resource diff --git a/cpp/include/raft/core/sparse_types.hpp b/cpp/include/raft/core/sparse_types.hpp index 55da3037a9..6e5092f50f 100644 --- a/cpp/include/raft/core/sparse_types.hpp +++ b/cpp/include/raft/core/sparse_types.hpp @@ -222,4 +222,4 @@ class sparse_matrix { /* @} */ -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h b/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h index 42de4860a0..56cce4de8b 100644 --- a/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h +++ b/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h @@ -406,4 +406,4 @@ struct FusedDistanceNNGemm /** @} */ // end of group matrix_norm -} // namespace raft::matrix \ No newline at end of file +} // namespace raft::matrix diff --git a/cpp/include/raft/matrix/reverse.cuh b/cpp/include/raft/matrix/reverse.cuh index 42057bb0f5..c10fa8f5f0 100644 --- a/cpp/include/raft/matrix/reverse.cuh +++ b/cpp/include/raft/matrix/reverse.cuh @@ -69,4 +69,4 @@ void row_reverse(raft::resources const& handle, } /** @} */ // end group matrix_reverse -} // namespace raft::matrix \ No newline at end of file +} // namespace raft::matrix diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index cd2d76a863..072f0c18ac 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -55,4 +55,4 @@ void scatter(raft::resources const& handle, detail::scatter(handle, inout, map, col_batch_size); } -} // namespace raft::matrix \ No newline at end of file +} // namespace raft::matrix diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index c922a0d7f4..caff6ea341 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -228,4 +228,4 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t \ No newline at end of file +#include diff --git a/cpp/include/raft/sparse/distance/detail/common.hpp b/cpp/include/raft/sparse/distance/detail/common.hpp index 0f463dac80..19fe9c1786 100644 --- a/cpp/include/raft/sparse/distance/detail/common.hpp +++ b/cpp/include/raft/sparse/distance/detail/common.hpp @@ -56,4 +56,4 @@ class distances_t { }; // namespace detail }; // namespace distance }; // namespace sparse -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/coo_mask_row_iterators.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/coo_mask_row_iterators.cuh index 38aa106d78..59cfcfa186 100644 --- a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/coo_mask_row_iterators.cuh +++ b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/coo_mask_row_iterators.cuh @@ -229,4 +229,4 @@ class chunked_mask_row_it : public mask_row_it { } // namespace detail } // namespace distance } // namespace sparse -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/dense_smem_strategy.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/dense_smem_strategy.cuh index 5a1c152bd0..4a075cf530 100644 --- a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/dense_smem_strategy.cuh +++ b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/dense_smem_strategy.cuh @@ -116,4 +116,4 @@ class dense_smem_strategy : public coo_spmv_strategy { } // namespace detail } // namespace distance } // namespace sparse -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/sparse/distance/distance.cuh b/cpp/include/raft/sparse/distance/distance.cuh index ead44f0c51..5bcd1ff005 100644 --- a/cpp/include/raft/sparse/distance/distance.cuh +++ b/cpp/include/raft/sparse/distance/distance.cuh @@ -221,4 +221,4 @@ void pairwise_distance(raft::resources const& handle, }; // namespace sparse }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/add.cuh b/cpp/include/raft/sparse/linalg/add.cuh index def305afb2..a97b935f58 100644 --- a/cpp/include/raft/sparse/linalg/add.cuh +++ b/cpp/include/raft/sparse/linalg/add.cuh @@ -96,4 +96,4 @@ void csr_add_finalize(const int* a_ind, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index 57c9b986b4..8ac97259da 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -120,4 +120,4 @@ void coo_degree_nz(COO* in, int* results, cudaStream_t stream) }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 3702111f83..2619048388 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -232,4 +232,4 @@ void rowNormCsrCaller(const IdxType* ia, }; // end NAMESPACE detail }; // end NAMESPACE linalg }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/linalg/detail/transpose.h b/cpp/include/raft/sparse/linalg/detail/transpose.h index 3a646b9a6e..579ee88d38 100644 --- a/cpp/include/raft/sparse/linalg/detail/transpose.h +++ b/cpp/include/raft/sparse/linalg/detail/transpose.h @@ -107,4 +107,4 @@ void csr_transpose(cusparseHandle_t handle, }; // end NAMESPACE detail }; // end NAMESPACE linalg }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index 43dd182fe5..7adf245abc 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -104,4 +104,4 @@ void rowNormCsr(raft::resources const& handle, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/spectral.cuh b/cpp/include/raft/sparse/linalg/spectral.cuh index 4c0595bf91..276a64c125 100644 --- a/cpp/include/raft/sparse/linalg/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/spectral.cuh @@ -40,4 +40,4 @@ void fit_embedding(raft::resources const& handle, }; // namespace sparse }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/symmetrize.cuh b/cpp/include/raft/sparse/linalg/symmetrize.cuh index 1de8d5b426..8ee53cd3ae 100644 --- a/cpp/include/raft/sparse/linalg/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/symmetrize.cuh @@ -165,4 +165,4 @@ void symmetrize(raft::resources const& handle, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/transpose.cuh b/cpp/include/raft/sparse/linalg/transpose.cuh index 4333060ad9..304cbf4936 100644 --- a/cpp/include/raft/sparse/linalg/transpose.cuh +++ b/cpp/include/raft/sparse/linalg/transpose.cuh @@ -68,4 +68,4 @@ void csr_transpose(raft::resources const& handle, }; // end NAMESPACE linalg }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh b/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh index c94c6254c3..ed4aa4c98f 100644 --- a/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh +++ b/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh @@ -96,4 +96,4 @@ void cross_component_nn( metric); } -}; // end namespace raft::sparse::neighbors \ No newline at end of file +}; // end namespace raft::sparse::neighbors diff --git a/cpp/include/raft/sparse/op/filter.cuh b/cpp/include/raft/sparse/op/filter.cuh index c64c05ae4e..4b329325ca 100644 --- a/cpp/include/raft/sparse/op/filter.cuh +++ b/cpp/include/raft/sparse/op/filter.cuh @@ -91,4 +91,4 @@ void coo_remove_zeros(COO* in, COO* out, cudaStream_t stream) }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/op/reduce.cuh b/cpp/include/raft/sparse/op/reduce.cuh index 52f1d3b239..b03192f111 100644 --- a/cpp/include/raft/sparse/op/reduce.cuh +++ b/cpp/include/raft/sparse/op/reduce.cuh @@ -84,4 +84,4 @@ void max_duplicates(raft::resources const& handle, }; // END namespace sparse }; // END namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/op/row_op.cuh b/cpp/include/raft/sparse/op/row_op.cuh index a799093226..b8d5a49d9f 100644 --- a/cpp/include/raft/sparse/op/row_op.cuh +++ b/cpp/include/raft/sparse/op/row_op.cuh @@ -45,4 +45,4 @@ void csr_row_op(const Index_* row_ind, Index_ n_rows, Index_ nnz, Lambda op, cud }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/op/slice.cuh b/cpp/include/raft/sparse/op/slice.cuh index 2da6dad4fc..e8a456d23e 100644 --- a/cpp/include/raft/sparse/op/slice.cuh +++ b/cpp/include/raft/sparse/op/slice.cuh @@ -78,4 +78,4 @@ void csr_row_slice_populate(value_idx start_offset, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index fed31e6a9c..4c45a28cc6 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -230,4 +230,4 @@ int computeLargestEigenvectors( } // namespace raft::sparse::solver -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/spectral/cluster_solvers.cuh b/cpp/include/raft/spectral/cluster_solvers.cuh index b693ac4af3..c273808cf8 100644 --- a/cpp/include/raft/spectral/cluster_solvers.cuh +++ b/cpp/include/raft/spectral/cluster_solvers.cuh @@ -97,4 +97,4 @@ struct kmeans_solver_t { } // namespace spectral } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh b/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh index 40b0324548..139df1d27f 100644 --- a/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh +++ b/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh @@ -87,4 +87,4 @@ struct kmeans_solver_deprecated_t { } // namespace spectral } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/spectral/modularity_maximization.cuh b/cpp/include/raft/spectral/modularity_maximization.cuh index ab1398a2a1..6514f7ef21 100644 --- a/cpp/include/raft/spectral/modularity_maximization.cuh +++ b/cpp/include/raft/spectral/modularity_maximization.cuh @@ -83,4 +83,4 @@ void analyzeModularity(raft::resources const& handle, } // namespace spectral } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/spectral/partition.cuh b/cpp/include/raft/spectral/partition.cuh index f7ea456ac5..a2ac328aa1 100644 --- a/cpp/include/raft/spectral/partition.cuh +++ b/cpp/include/raft/spectral/partition.cuh @@ -92,4 +92,4 @@ void analyzePartition(raft::resources const& handle, } // namespace spectral } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index 6625d38a7a..0b352e185b 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -75,4 +75,4 @@ float accuracy(raft::resources const& handle, } // namespace stats } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 1f97cd5f76..6822e069a2 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -86,4 +86,4 @@ double adjusted_rand_index(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index b669e0de32..f4667b37dc 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -88,4 +88,4 @@ double completeness_score(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 16f0998435..03fa0d4924 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -214,4 +214,4 @@ void contingency_matrix(Args... args) }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index ad5d233c0e..096ec4bc1c 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -119,4 +119,4 @@ void cov(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/detail/mean.cuh b/cpp/include/raft/stats/detail/mean.cuh index ee39c87a68..a7d4f2b877 100644 --- a/cpp/include/raft/stats/detail/mean.cuh +++ b/cpp/include/raft/stats/detail/mean.cuh @@ -47,4 +47,4 @@ void mean( } // namespace detail } // namespace stats -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/stats/detail/stddev.cuh b/cpp/include/raft/stats/detail/stddev.cuh index 4c861b49fb..c758584ec9 100644 --- a/cpp/include/raft/stats/detail/stddev.cuh +++ b/cpp/include/raft/stats/detail/stddev.cuh @@ -120,4 +120,4 @@ void vars(Type* var, } // namespace detail } // namespace stats -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/stats/detail/sum.cuh b/cpp/include/raft/stats/detail/sum.cuh index 39bd2c3b6c..4f5438b133 100644 --- a/cpp/include/raft/stats/detail/sum.cuh +++ b/cpp/include/raft/stats/detail/sum.cuh @@ -34,4 +34,4 @@ void sum(Type* output, const Type* input, IdxType D, IdxType N, bool rowMajor, c } // namespace detail } // namespace stats -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/stats/detail/weighted_mean.cuh b/cpp/include/raft/stats/detail/weighted_mean.cuh index ada0995f7d..9b96ed5949 100644 --- a/cpp/include/raft/stats/detail/weighted_mean.cuh +++ b/cpp/include/raft/stats/detail/weighted_mean.cuh @@ -72,4 +72,4 @@ void weightedMean(Type* mu, } }; // end namespace detail }; // end namespace stats -}; // end namespace raft \ No newline at end of file +}; // end namespace raft diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index ded7c8178b..444cc04bca 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -131,4 +131,4 @@ value_t cluster_dispersion( } // end namespace stats } // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index fe432569ee..a0c6ae5bdb 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -83,4 +83,4 @@ double entropy(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index 311cd599f8..3095d2c724 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -91,4 +91,4 @@ double homogeneity_score(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 43d39cfd6c..bc3cf184c6 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -96,4 +96,4 @@ void mean(raft::resources const& handle, }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 83f9a8a941..fb9da4dd39 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -163,4 +163,4 @@ void mean_add(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index d2c410dab1..930a6f8b9e 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -141,4 +141,4 @@ void minmax(raft::resources const& handle, }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh index 5a334e9280..c895a911e9 100644 --- a/cpp/include/raft/stats/mutual_info_score.cuh +++ b/cpp/include/raft/stats/mutual_info_score.cuh @@ -89,4 +89,4 @@ double mutual_info_score(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/r2_score.cuh b/cpp/include/raft/stats/r2_score.cuh index c98b4bc93a..4ff9f491d8 100644 --- a/cpp/include/raft/stats/r2_score.cuh +++ b/cpp/include/raft/stats/r2_score.cuh @@ -90,4 +90,4 @@ value_t r2_score(raft::resources const& handle, } // namespace stats } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh index a21a0c0dc5..1230d615eb 100644 --- a/cpp/include/raft/stats/rand_index.cuh +++ b/cpp/include/raft/stats/rand_index.cuh @@ -75,4 +75,4 @@ double rand_index(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/regression_metrics.cuh b/cpp/include/raft/stats/regression_metrics.cuh index 718170f716..74763de2fc 100644 --- a/cpp/include/raft/stats/regression_metrics.cuh +++ b/cpp/include/raft/stats/regression_metrics.cuh @@ -104,4 +104,4 @@ void regression_metrics(raft::resources const& handle, } // namespace stats } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh index 23eef84604..15d86969af 100644 --- a/cpp/include/raft/stats/silhouette_score.cuh +++ b/cpp/include/raft/stats/silhouette_score.cuh @@ -223,4 +223,4 @@ value_t silhouette_score_batched( }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh index 0a67bd2325..62668b3ddd 100644 --- a/cpp/include/raft/stats/stddev.cuh +++ b/cpp/include/raft/stats/stddev.cuh @@ -185,4 +185,4 @@ void vars(raft::resources const& handle, }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh index 2c3ed1b83e..6c18a21988 100644 --- a/cpp/include/raft/stats/sum.cuh +++ b/cpp/include/raft/stats/sum.cuh @@ -88,4 +88,4 @@ void sum(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/trustworthiness_score.cuh b/cpp/include/raft/stats/trustworthiness_score.cuh index 3f4464f4d3..2435cb4ef9 100644 --- a/cpp/include/raft/stats/trustworthiness_score.cuh +++ b/cpp/include/raft/stats/trustworthiness_score.cuh @@ -98,4 +98,4 @@ double trustworthiness_score( } // namespace stats } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh index 041adb5e38..1df3eab460 100644 --- a/cpp/include/raft/stats/v_measure.cuh +++ b/cpp/include/raft/stats/v_measure.cuh @@ -95,4 +95,4 @@ double v_measure(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index da22f0163c..a3e38f7168 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -189,4 +189,4 @@ void col_weighted_mean(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/thirdparty/mdspan/.github/workflows/cmake.yml b/cpp/include/raft/thirdparty/mdspan/.github/workflows/cmake.yml index a5411082af..4357c207a1 100644 --- a/cpp/include/raft/thirdparty/mdspan/.github/workflows/cmake.yml +++ b/cpp/include/raft/thirdparty/mdspan/.github/workflows/cmake.yml @@ -37,27 +37,27 @@ jobs: - name: Create Build Environment run: cmake -E make_directory ${{github.workspace}}/mdspan-build - + - name: Check Out uses: actions/checkout@v2 with: path: ${{github.workspace}}/mdspan-src - + - name: Configure CMake shell: bash working-directory: ${{github.workspace}}/mdspan-build run: CXX=${{ matrix.compiler_prefix}}/${{ matrix.compiler_driver }} cmake $GITHUB_WORKSPACE/mdspan-src -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/mdspan-install -DMDSPAN_ENABLE_TESTS=ON -DMDSPAN_ENABLE_EXAMPLES=ON - + - name: Build shell: bash working-directory: ${{github.workspace}}/mdspan-build run: make -j - + - name: Test working-directory: ${{github.workspace}}/mdspan-build shell: bash run: ctest - + - name: Install shell: bash working-directory: ${{github.workspace}}/mdspan-build diff --git a/cpp/include/raft/thirdparty/mdspan/LICENSE b/cpp/include/raft/thirdparty/mdspan/LICENSE index c68a8a2a9f..db92c208da 100644 --- a/cpp/include/raft/thirdparty/mdspan/LICENSE +++ b/cpp/include/raft/thirdparty/mdspan/LICENSE @@ -1,14 +1,14 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Kokkos is licensed under 3-clause BSD terms of use: -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -37,6 +37,6 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// +// // ************************************************************************ //@HEADER diff --git a/cpp/include/raft/thirdparty/mdspan/README.md b/cpp/include/raft/thirdparty/mdspan/README.md index a062777261..15af4dd4a9 100644 --- a/cpp/include/raft/thirdparty/mdspan/README.md +++ b/cpp/include/raft/thirdparty/mdspan/README.md @@ -70,4 +70,3 @@ Acknowledgements ================ This work was undertaken as part of the [Kokkos project](https://github.com/kokkos/kokkos) at Sandia National Laboratories. Sandia National Laboratories is a multimission laboratory managed and operated by National Technology & Engineering Solutions of Sandia, LLC, a wholly owned subsidiary of Honeywell International Inc., for the U. S. Department of Energy's National Nuclear Security Administration under contract DE-NA0003525. - diff --git a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/cuda/CMakeLists.txt b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/cuda/CMakeLists.txt index 30391b3d70..3d5cbb955a 100644 --- a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/cuda/CMakeLists.txt +++ b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/cuda/CMakeLists.txt @@ -2,4 +2,4 @@ mdspan_add_cuda_benchmark(sum_3d_cuda) target_include_directories(sum_3d_cuda PUBLIC $ -) \ No newline at end of file +) diff --git a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/CMakeLists.txt b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/CMakeLists.txt index 566c47c9ab..ccab58bfa1 100644 --- a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/CMakeLists.txt +++ b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/CMakeLists.txt @@ -4,4 +4,4 @@ if(OpenMP_CXX_FOUND) target_include_directories(sum_3d_openmp PUBLIC $ ) -endif() \ No newline at end of file +endif() diff --git a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/sum_3d_openmp.cpp b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/sum_3d_openmp.cpp index 9ab6a0ddf4..ef75349925 100644 --- a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/sum_3d_openmp.cpp +++ b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/sum_3d_openmp.cpp @@ -174,4 +174,3 @@ BENCHMARK_CAPTURE( //================================================================================ BENCHMARK_MAIN(); - diff --git a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/sum_submdspan_right.cpp b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/sum_submdspan_right.cpp index f106e2f5ff..4cbfe029c7 100644 --- a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/sum_submdspan_right.cpp +++ b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/sum_submdspan_right.cpp @@ -223,4 +223,3 @@ BENCHMARK_CAPTURE( //================================================================================ BENCHMARK_MAIN(); - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_compressed_pair_layout.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_compressed_pair_layout.cpp index ea2bad164c..ef45c9d18f 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_compressed_pair_layout.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_compressed_pair_layout.cpp @@ -169,4 +169,3 @@ test, CP>, 4 * sizeof(int*), non_empty>(); // end compressed pair layout: 2 nested pairs, 4 leaf elements }}}1 //============================================================================== } - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_extents_ctors.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_extents_ctors.cpp index 00126691aa..64d71d650c 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_extents_ctors.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_extents_ctors.cpp @@ -176,4 +176,3 @@ MDSPAN_STATIC_TEST( stdex::extents >::value ); - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_layout_convertible.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_layout_convertible.cpp index e293734444..fc30fa25e5 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_layout_convertible.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_layout_convertible.cpp @@ -117,5 +117,3 @@ MDSPAN_STATIC_TEST( MDSPAN_STATIC_TEST( !std::is_constructible>::value ); - - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_mdspan_convertible.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_mdspan_convertible.cpp index fa1136b9d6..c64fcdbabd 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_mdspan_convertible.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_mdspan_convertible.cpp @@ -68,4 +68,3 @@ MDSPAN_STATIC_TEST( // end mdspan }}}1 //============================================================================== - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_no_unique_address.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_no_unique_address.cpp index 9f7c6c052d..c44b02bf76 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_no_unique_address.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_no_unique_address.cpp @@ -109,5 +109,3 @@ MDSPAN_STATIC_TEST( // end layouts }}}1 //============================================================================== - - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_standard_layout.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_standard_layout.cpp index d8edf31ab2..6e41433d6a 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_standard_layout.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_standard_layout.cpp @@ -216,6 +216,3 @@ MDSPAN_STATIC_TEST( // end mdspan }}}1 //============================================================================== - - - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_trivially_copyable.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_trivially_copyable.cpp index 73ab426afa..f6457234d7 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_trivially_copyable.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_trivially_copyable.cpp @@ -212,6 +212,3 @@ MDSPAN_STATIC_TEST( // end mdspan }}}1 //============================================================================== - - - diff --git a/cpp/include/raft/thirdparty/mdspan/examples/tiled_layout/simple_tiled_layout.cpp b/cpp/include/raft/thirdparty/mdspan/examples/tiled_layout/simple_tiled_layout.cpp index b8740d5227..ba481c3144 100644 --- a/cpp/include/raft/thirdparty/mdspan/examples/tiled_layout/simple_tiled_layout.cpp +++ b/cpp/include/raft/thirdparty/mdspan/examples/tiled_layout/simple_tiled_layout.cpp @@ -207,4 +207,3 @@ int main() { std::cout << "Success! SimpleTiledLayout2D works as expected." << std::endl; } } - diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/aligned_accessor.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/aligned_accessor.hpp index 67356785c0..02e386e3aa 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/aligned_accessor.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/aligned_accessor.hpp @@ -42,7 +42,7 @@ */ -// NOTE: This code is prematurely taken from an example based on +// NOTE: This code is prematurely taken from an example based on // https://github.com/kokkos/mdspan/pull/176 #pragma once diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/extents.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/extents.hpp index 6be71b432c..3b4d69d63e 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/extents.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/extents.hpp @@ -531,7 +531,7 @@ struct __extents_to_partially_static_sizes; template struct __extents_to_partially_static_sizes<::std::experimental::extents> { using type = detail::__partially_static_sizes< - typename ::std::experimental::extents::index_type, size_t, + typename ::std::experimental::extents::index_type, size_t, ExtentsPack...>; }; diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_left.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_left.hpp index ed1478dc8b..92a291e915 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_left.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_left.hpp @@ -237,4 +237,3 @@ class layout_left::mapping { } // end namespace experimental } // end namespace std - diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_padded.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_padded.hpp index cd9c9c19bf..c761146874 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_padded.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_padded.hpp @@ -45,7 +45,7 @@ // NOTE: This code is prematurely taken from https://github.com/kokkos/mdspan/pull/180 // and matches requirements described in https://github.com/ORNL/cpp-proposals-pub/pull/296 // Some parts (as submdspan integration) are missing -// EDIT: the meaning of the template argument 'padding_stride' was adjusted from a +// EDIT: the meaning of the template argument 'padding_stride' was adjusted from a // fixed stride to a padding alignment, allowing dimensions > padding_stride to be padded // to multiples of 'padding_stride' @@ -140,7 +140,7 @@ namespace details { // layout_padded_left implementation namespace details { - + // The *_helper functions work around not having C++20 // templated lambdas: []{} . diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_right.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_right.hpp index a9b64ca36a..d4b71efae1 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_right.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_right.hpp @@ -237,4 +237,3 @@ class layout_right::mapping { } // end namespace experimental } // end namespace std - diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp index 904dd40a75..90b1a46288 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp @@ -74,13 +74,13 @@ struct __no_unique_address_emulation< // If the type isn't trivially destructible, its destructor // won't be called at the right time, so don't use this // specialization - _MDSPAN_TRAIT(is_trivially_destructible, _T)>> : + _MDSPAN_TRAIT(is_trivially_destructible, _T)>> : #ifdef _MDSPAN_COMPILER_MSVC // MSVC doesn't allow you to access public static member functions of a type // when you *happen* to privately inherit from that type. protected #else - // But we still want this to be private if possible so that we don't accidentally + // But we still want this to be private if possible so that we don't accidentally // access members of _T directly rather than calling __ref() first, which wouldn't // work if _T happens to be stateful and thus we're using the unspecialized definition // of __no_unique_address_emulation above. diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/type_list.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/type_list.hpp index 7de72e6537..64845190ae 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/type_list.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/type_list.hpp @@ -114,4 +114,3 @@ struct __type_at<3, __type_list<_T0, _T1, _T2, _T3, _Ts...>> { } // end namespace experimental } // end namespace std - diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/mdarray b/cpp/include/raft/thirdparty/mdspan/include/experimental/mdarray index fa710a59b6..60e06dd68e 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/mdarray +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/mdarray @@ -45,4 +45,3 @@ #include "mdspan" #include "__p1684_bits/mdarray.hpp" - diff --git a/cpp/include/raft/thirdparty/mdspan/make_single_header.py b/cpp/include/raft/thirdparty/mdspan/make_single_header.py index 1b562c7176..98ab3526db 100755 --- a/cpp/include/raft/thirdparty/mdspan/make_single_header.py +++ b/cpp/include/raft/thirdparty/mdspan/make_single_header.py @@ -49,4 +49,3 @@ def process_file(file_path, out_lines=[], front_matter_lines=[], back_matter_lin "#define _MDSPAN_SINGLE_HEADER_INCLUDE_GUARD_\n"], ["#endif // _MDSPAN_SINGLE_HEADER_INCLUDE_GUARD_\n"], [abspath(sys.argv[1])])) - diff --git a/cpp/include/raft/thirdparty/mdspan/tests/CMakeLists.txt b/cpp/include/raft/thirdparty/mdspan/tests/CMakeLists.txt index d92834beb7..a30ce2c198 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/CMakeLists.txt +++ b/cpp/include/raft/thirdparty/mdspan/tests/CMakeLists.txt @@ -57,4 +57,3 @@ mdspan_add_test(test_layout_ctors) mdspan_add_test(test_layout_stride) mdspan_add_test(test_submdspan) mdspan_add_test(test_mdarray_ctors) - diff --git a/cpp/include/raft/thirdparty/mdspan/tests/test_exhaustive_layouts.cpp b/cpp/include/raft/thirdparty/mdspan/tests/test_exhaustive_layouts.cpp index f09b799684..e91896c1c4 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/test_exhaustive_layouts.cpp +++ b/cpp/include/raft/thirdparty/mdspan/tests/test_exhaustive_layouts.cpp @@ -424,4 +424,3 @@ TYPED_TEST(TestLayoutConversion, implicit_conversion) { ASSERT_EQ(map1.stride(r), map2.stride(r)); } } - diff --git a/cpp/include/raft/thirdparty/mdspan/tests/test_layout_stride.cpp b/cpp/include/raft/thirdparty/mdspan/tests/test_layout_stride.cpp index 3a3e1c2696..12008f05cf 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/test_layout_stride.cpp +++ b/cpp/include/raft/thirdparty/mdspan/tests/test_layout_stride.cpp @@ -164,4 +164,3 @@ TEST(TestLayoutStrideCTAD, test_ctad) { */ } #endif - diff --git a/cpp/include/raft/thirdparty/mdspan/tests/test_mdarray_ctors.cpp b/cpp/include/raft/thirdparty/mdspan/tests/test_mdarray_ctors.cpp index 781a12a697..3dcb61d454 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/test_mdarray_ctors.cpp +++ b/cpp/include/raft/thirdparty/mdspan/tests/test_mdarray_ctors.cpp @@ -740,7 +740,7 @@ TEST(TestMdarrayCTAD, layout_stride) { ASSERT_EQ(m0.stride(1), 128); ASSERT_FALSE(m0.is_exhaustive()); - /* + /* stdex::mdarray m1{d.data(), stdex::layout_stride::mapping{stdex::extents{16, 32}, stdex::extents{1, 128}}}; ASSERT_EQ(m1.data(), d.data()); ASSERT_EQ(m1.rank(), 2); diff --git a/cpp/include/raft/thirdparty/mdspan/tests/test_mdspan_ctors.cpp b/cpp/include/raft/thirdparty/mdspan/tests/test_mdspan_ctors.cpp index 81d3fdb983..14ae51a259 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/test_mdspan_ctors.cpp +++ b/cpp/include/raft/thirdparty/mdspan/tests/test_mdspan_ctors.cpp @@ -346,7 +346,7 @@ TEST(TestMdspanCTAD, layout_stride) { ASSERT_EQ(m0.stride(1), 128); ASSERT_FALSE(m0.is_exhaustive()); - /* + /* stdex::mdspan m1{d.data(), stdex::layout_stride::mapping{stdex::extents{16, 32}, stdex::extents{1, 128}}}; ASSERT_EQ(m1.data(), d.data()); ASSERT_EQ(m1.rank(), 2); diff --git a/cpp/include/raft/util/detail/popc.cuh b/cpp/include/raft/util/detail/popc.cuh index f335be6fd0..9638a261a5 100644 --- a/cpp/include/raft/util/detail/popc.cuh +++ b/cpp/include/raft/util/detail/popc.cuh @@ -73,4 +73,4 @@ void popc(const raft::resources& res, }); } -} // end namespace raft::detail \ No newline at end of file +} // end namespace raft::detail diff --git a/cpp/include/raft/util/input_validation.hpp b/cpp/include/raft/util/input_validation.hpp index 17bb53f22b..119fd9d2e2 100644 --- a/cpp/include/raft/util/input_validation.hpp +++ b/cpp/include/raft/util/input_validation.hpp @@ -129,4 +129,4 @@ constexpr bool is_scalar_view(mdspan m) return false; } -}; // end namespace raft \ No newline at end of file +}; // end namespace raft diff --git a/cpp/include/raft/util/warp_primitives.cuh b/cpp/include/raft/util/warp_primitives.cuh index 953c137cdf..2a7c4e9127 100644 --- a/cpp/include/raft/util/warp_primitives.cuh +++ b/cpp/include/raft/util/warp_primitives.cuh @@ -256,4 +256,4 @@ DI std::enable_if_t, T> shfl_xor(T val, return output; } -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/scripts/run-clang-compile.py b/cpp/scripts/run-clang-compile.py index 123f0e4075..8ed9aa00f0 100644 --- a/cpp/scripts/run-clang-compile.py +++ b/cpp/scripts/run-clang-compile.py @@ -253,12 +253,12 @@ def run_clang_command(clang_cmd, cwd): class LockContext(object): def __init__(self, lock=None) -> None: self._lock = lock - + def __enter__(self): if self._lock: self._lock.acquire() return self - + def __exit__(self, _, __, ___): if self._lock: self._lock.release() diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 3d8bbcec4a..cad08ca551 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -296,12 +296,12 @@ def run_clang_tidy_command(tidy_cmd, cwd): class LockContext(object): def __init__(self, lock=None) -> None: self._lock = lock - + def __enter__(self): if self._lock: self._lock.acquire() return self - + def __exit__(self, _, __, ___): if self._lock: self._lock.release() diff --git a/cpp/scripts/run-cmake-format.sh b/cpp/scripts/run-cmake-format.sh index db5a8b5804..e08481fbd6 100755 --- a/cpp/scripts/run-cmake-format.sh +++ b/cpp/scripts/run-cmake-format.sh @@ -17,7 +17,7 @@ # and exits gracefully if the file is not found. If a user wishes to specify a # config file at a nonstandard location, they may do so by setting the # environment variable RAPIDS_CMAKE_FORMAT_FILE. -# +# # This script can be invoked directly anywhere within the project repository. # Alternatively, it may be invoked as a pre-commit hook via # `pre-commit run (cmake-format)|(cmake-lint)`. diff --git a/cpp/tests/linalg/cholesky_r1.cu b/cpp/tests/linalg/cholesky_r1.cu index f87e07402f..e506c89a79 100644 --- a/cpp/tests/linalg/cholesky_r1.cu +++ b/cpp/tests/linalg/cholesky_r1.cu @@ -170,4 +170,4 @@ TYPED_TEST(CholeskyR1Test, update) { this->testR1Update(); } TYPED_TEST(CholeskyR1Test, throwError) { this->testR1Error(); } }; // namespace linalg -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/tests/matrix/argmax.cu b/cpp/tests/matrix/argmax.cu index cb3fd4a3fb..c0cf85cd38 100644 --- a/cpp/tests/matrix/argmax.cu +++ b/cpp/tests/matrix/argmax.cu @@ -110,4 +110,4 @@ INSTANTIATE_TEST_SUITE_P(ArgMaxTest, ArgMaxTestF, ::testing::ValuesIn(inputsf)); INSTANTIATE_TEST_SUITE_P(ArgMaxTest, ArgMaxTestD, ::testing::ValuesIn(inputsd)); } // namespace matrix -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/tests/matrix/argmin.cu b/cpp/tests/matrix/argmin.cu index 060b4a78db..f0cacacf3a 100644 --- a/cpp/tests/matrix/argmin.cu +++ b/cpp/tests/matrix/argmin.cu @@ -110,4 +110,4 @@ INSTANTIATE_TEST_SUITE_P(ArgMinTest, ArgMinTestF, ::testing::ValuesIn(inputsf)); INSTANTIATE_TEST_SUITE_P(ArgMinTest, ArgMinTestD, ::testing::ValuesIn(inputsd)); } // namespace matrix -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/tests/matrix/diagonal.cu b/cpp/tests/matrix/diagonal.cu index c6e1f1a0d2..0a1f2af825 100644 --- a/cpp/tests/matrix/diagonal.cu +++ b/cpp/tests/matrix/diagonal.cu @@ -116,4 +116,4 @@ INSTANTIATE_TEST_SUITE_P(DiagonalTest, DiagonalTestF, ::testing::ValuesIn(inputs INSTANTIATE_TEST_SUITE_P(DiagonalTest, DiagonalTestD, ::testing::ValuesIn(inputsd)); } // namespace matrix -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/tests/matrix/gather.cu b/cpp/tests/matrix/gather.cu index 4c13d0c1e9..f62805b2b8 100644 --- a/cpp/tests/matrix/gather.cu +++ b/cpp/tests/matrix/gather.cu @@ -246,4 +246,4 @@ GATHER_TEST((GatherTest), GATHER_TEST((GatherTest), GatherInplaceTestFI64I64, inplace_inputs_i64); -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/tests/matrix/scatter.cu b/cpp/tests/matrix/scatter.cu index 7f478c7b93..f539b9759a 100644 --- a/cpp/tests/matrix/scatter.cu +++ b/cpp/tests/matrix/scatter.cu @@ -140,4 +140,4 @@ const std::vector> inputs_i64 = SCATTER_TEST((ScatterTest), ScatterTestFI32, inputs_i32); SCATTER_TEST((ScatterTest), ScatterTestFI64, inputs_i64); -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/tests/mr/device/buffer.cpp b/cpp/tests/mr/device/buffer.cpp index d14aa09b7a..3d5652a591 100644 --- a/cpp/tests/mr/device/buffer.cpp +++ b/cpp/tests/mr/device/buffer.cpp @@ -92,4 +92,4 @@ TEST(Raft, DeviceBufferZeroResize) } // namespace device } // namespace mr -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/tests/mr/host/buffer.cpp b/cpp/tests/mr/host/buffer.cpp index 5688ff6376..792160eb89 100644 --- a/cpp/tests/mr/host/buffer.cpp +++ b/cpp/tests/mr/host/buffer.cpp @@ -69,4 +69,4 @@ TEST(Raft, DeviceToHostBuffer) } // namespace host } // namespace mr -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/tests/neighbors/spatial_data.h b/cpp/tests/neighbors/spatial_data.h index d71b47cf1e..b4352f706d 100644 --- a/cpp/tests/neighbors/spatial_data.h +++ b/cpp/tests/neighbors/spatial_data.h @@ -35,4 +35,4 @@ std::vector spatial_data = { 31.968599, -99.901813, 39.32098, -111.093731, 37.431573, -78.656894, 44.558803, -72.577841, 47.751074, -120.740139, 43.78444, -88.787868, 38.597626, -80.454903, 43.075968, -107.290284}; }; // namespace spatial -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/tests/stats/weighted_mean.cu b/cpp/tests/stats/weighted_mean.cu index 407f3f14ea..e125fbc71e 100644 --- a/cpp/tests/stats/weighted_mean.cu +++ b/cpp/tests/stats/weighted_mean.cu @@ -340,4 +340,4 @@ TEST_P(WeightedMeanTestD, Result) INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestD, ::testing::ValuesIn(inputsd)); }; // end namespace stats -}; // end namespace raft \ No newline at end of file +}; // end namespace raft diff --git a/cpp/tests/test_utils.cuh b/cpp/tests/test_utils.cuh index 810a0d7985..ac4ed4d24e 100644 --- a/cpp/tests/test_utils.cuh +++ b/cpp/tests/test_utils.cuh @@ -330,4 +330,4 @@ inline std::vector read_csv(std::string filename, bool skip_first_n_colum return result; } -}; // end namespace raft \ No newline at end of file +}; // end namespace raft diff --git a/docs/README.md b/docs/README.md index a09ccf41eb..aa5e114347 100644 --- a/docs/README.md +++ b/docs/README.md @@ -11,4 +11,4 @@ bash build.sh docs #### Once the process finishes, documentation can be found in build/html ```shell script xdg-open build/html/index.html` -``` \ No newline at end of file +``` diff --git a/docs/source/_static/references.css b/docs/source/_static/references.css index 225cf13ba9..d1f647233a 100644 --- a/docs/source/_static/references.css +++ b/docs/source/_static/references.css @@ -20,4 +20,4 @@ dl.citation > dt.label > span::before { /* Add closing bracket */ dl.citation > dt.label > span::after { content: "]"; -} \ No newline at end of file +} diff --git a/docs/source/contributing.md b/docs/source/contributing.md index 1b4071d0a5..446e7b2a7b 100755 --- a/docs/source/contributing.md +++ b/docs/source/contributing.md @@ -89,5 +89,3 @@ implementation of the issue, ask them in the issue instead of the PR. ## Attribution Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md - - diff --git a/docs/source/cpp_api.rst b/docs/source/cpp_api.rst index 74f706bf46..837cfa0cb0 100644 --- a/docs/source/cpp_api.rst +++ b/docs/source/cpp_api.rst @@ -16,4 +16,4 @@ C++ API cpp_api/solver.rst cpp_api/sparse.rst cpp_api/stats.rst - cpp_api/utils.rst \ No newline at end of file + cpp_api/utils.rst diff --git a/docs/source/cpp_api/core.rst b/docs/source/cpp_api/core.rst index 4122a18506..f159c85af8 100644 --- a/docs/source/cpp_api/core.rst +++ b/docs/source/cpp_api/core.rst @@ -22,4 +22,4 @@ expose in public APIs. core_operators.rst core_math.rst core_bitset.rst - core_bitmap.rst \ No newline at end of file + core_bitmap.rst diff --git a/docs/source/cpp_api/core_bitmap.rst b/docs/source/cpp_api/core_bitmap.rst index 6c1dc607bf..532da58e71 100644 --- a/docs/source/cpp_api/core_bitmap.rst +++ b/docs/source/cpp_api/core_bitmap.rst @@ -12,4 +12,4 @@ namespace *raft::core* .. doxygengroup:: bitmap :project: RAFT :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/docs/source/cpp_api/core_bitset.rst b/docs/source/cpp_api/core_bitset.rst index af1cff6d37..117efc5466 100644 --- a/docs/source/cpp_api/core_bitset.rst +++ b/docs/source/cpp_api/core_bitset.rst @@ -12,4 +12,4 @@ namespace *raft::core* .. doxygengroup:: bitset :project: RAFT :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/docs/source/cpp_api/core_kvp.rst b/docs/source/cpp_api/core_kvp.rst index 60a0da078b..5f0cfd800a 100644 --- a/docs/source/cpp_api/core_kvp.rst +++ b/docs/source/cpp_api/core_kvp.rst @@ -12,4 +12,3 @@ namespace *raft::core* .. doxygenstruct:: raft::KeyValuePair :project: RAFT :members: - diff --git a/docs/source/cpp_api/core_logger.rst b/docs/source/cpp_api/core_logger.rst index 60714a63ea..569f17fac3 100644 --- a/docs/source/cpp_api/core_logger.rst +++ b/docs/source/cpp_api/core_logger.rst @@ -12,4 +12,3 @@ namespace *raft::core* .. doxygenclass:: raft::logger :project: RAFT :members: - diff --git a/docs/source/cpp_api/core_nvtx.rst b/docs/source/cpp_api/core_nvtx.rst index addcbdda30..051c66da0c 100644 --- a/docs/source/cpp_api/core_nvtx.rst +++ b/docs/source/cpp_api/core_nvtx.rst @@ -13,5 +13,3 @@ namespace *raft::core* :project: RAFT :members: :content-only: - - diff --git a/docs/source/cpp_api/linalg.rst b/docs/source/cpp_api/linalg.rst index 3cd928c9db..b9da44e431 100644 --- a/docs/source/cpp_api/linalg.rst +++ b/docs/source/cpp_api/linalg.rst @@ -4,7 +4,7 @@ Linear Algebra This page provides C++ class references for the publicly-exposed elements of the `raft/linalg` (dense) linear algebra headers. In addition to providing highly optimized arithmetic and matrix/vector operations, RAFT provides a consistent user experience by providing common BLAS routines, standard linear system solvers, factorization and eigenvalue solvers. Some of these routines -hide the complexities of lower-level C-based libraries provided in the CUDA toolkit +hide the complexities of lower-level C-based libraries provided in the CUDA toolkit .. role:: py(code) :language: c++ @@ -19,4 +19,4 @@ hide the complexities of lower-level C-based libraries provided in the CUDA tool linalg_map_reduce.rst linalg_matrix.rst linalg_matrix_vector.rst - linalg_solver.rst \ No newline at end of file + linalg_solver.rst diff --git a/docs/source/cpp_api/linalg_arithmetic.rst b/docs/source/cpp_api/linalg_arithmetic.rst index 7bc428b9f0..badb9f31a5 100644 --- a/docs/source/cpp_api/linalg_arithmetic.rst +++ b/docs/source/cpp_api/linalg_arithmetic.rst @@ -114,4 +114,3 @@ namespace *raft::linalg* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/linalg_matrix.rst b/docs/source/cpp_api/linalg_matrix.rst index e6024bcd02..30eef5f64f 100644 --- a/docs/source/cpp_api/linalg_matrix.rst +++ b/docs/source/cpp_api/linalg_matrix.rst @@ -16,4 +16,3 @@ namespace *raft::linalg* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/linalg_matrix_vector.rst b/docs/source/cpp_api/linalg_matrix_vector.rst index d92a3c9874..cc22327c74 100644 --- a/docs/source/cpp_api/linalg_matrix_vector.rst +++ b/docs/source/cpp_api/linalg_matrix_vector.rst @@ -29,4 +29,3 @@ namespace *raft::linalg* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/matrix_manipulation.rst b/docs/source/cpp_api/matrix_manipulation.rst index d0da51e4b7..5437ced99f 100644 --- a/docs/source/cpp_api/matrix_manipulation.rst +++ b/docs/source/cpp_api/matrix_manipulation.rst @@ -41,4 +41,3 @@ namespace *raft::matrix* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/matrix_reduction.rst b/docs/source/cpp_api/matrix_reduction.rst index 440a1528b4..92dcea6428 100644 --- a/docs/source/cpp_api/matrix_reduction.rst +++ b/docs/source/cpp_api/matrix_reduction.rst @@ -16,4 +16,4 @@ namespace *raft::matrix* .. doxygengroup:: matrix_norm :project: RAFT :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/docs/source/cpp_api/mdspan_representation.rst b/docs/source/cpp_api/mdspan_representation.rst index 386e6f14e9..939f1d51be 100644 --- a/docs/source/cpp_api/mdspan_representation.rst +++ b/docs/source/cpp_api/mdspan_representation.rst @@ -66,5 +66,3 @@ Accessors .. doxygentypedef:: raft::managed_accessor :project: RAFT - - diff --git a/docs/source/cpp_api/mdspan_span.rst b/docs/source/cpp_api/mdspan_span.rst index 870c4329d0..1b7d749810 100644 --- a/docs/source/cpp_api/mdspan_span.rst +++ b/docs/source/cpp_api/mdspan_span.rst @@ -25,4 +25,3 @@ span: One-dimensional Non-owning View :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/mnmg.rst b/docs/source/cpp_api/mnmg.rst index 9543cbb4ee..1f9f75dd46 100644 --- a/docs/source/cpp_api/mnmg.rst +++ b/docs/source/cpp_api/mnmg.rst @@ -47,4 +47,3 @@ NCCL+UCX Comms :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/random.rst b/docs/source/cpp_api/random.rst index 9f5cdc7a74..8eaa82c0b0 100644 --- a/docs/source/cpp_api/random.rst +++ b/docs/source/cpp_api/random.rst @@ -26,4 +26,3 @@ namespace *raft::random* random_sampling_univariate.rst random_sampling_multivariable.rst random_sampling_without_replacement.rst - diff --git a/docs/source/cpp_api/random_datagen.rst b/docs/source/cpp_api/random_datagen.rst index a07f5e0154..e97283598e 100644 --- a/docs/source/cpp_api/random_datagen.rst +++ b/docs/source/cpp_api/random_datagen.rst @@ -43,4 +43,3 @@ namespace *raft::random* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/random_sampling_without_replacement.rst b/docs/source/cpp_api/random_sampling_without_replacement.rst index ac0d3bea86..af5281a48b 100644 --- a/docs/source/cpp_api/random_sampling_without_replacement.rst +++ b/docs/source/cpp_api/random_sampling_without_replacement.rst @@ -22,5 +22,3 @@ namespace *raft::random* :project: RAFT :members: :content-only: - - diff --git a/docs/source/cpp_api/sparse.rst b/docs/source/cpp_api/sparse.rst index 64197accaf..ee170b3721 100644 --- a/docs/source/cpp_api/sparse.rst +++ b/docs/source/cpp_api/sparse.rst @@ -16,4 +16,3 @@ Core to RAFT's computational patterns for sparse data is its vocabulary of spars sparse_linalg.rst sparse_matrix.rst sparse_solver.rst - diff --git a/docs/source/cpp_api/sparse_types_coo_matrix.rst b/docs/source/cpp_api/sparse_types_coo_matrix.rst index 855d89fdea..c1d8748a64 100644 --- a/docs/source/cpp_api/sparse_types_coo_matrix.rst +++ b/docs/source/cpp_api/sparse_types_coo_matrix.rst @@ -36,4 +36,3 @@ Host COO Matrix :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/sparse_types_csr_matrix.rst b/docs/source/cpp_api/sparse_types_csr_matrix.rst index b704846c4e..22898a6399 100644 --- a/docs/source/cpp_api/sparse_types_csr_matrix.rst +++ b/docs/source/cpp_api/sparse_types_csr_matrix.rst @@ -36,4 +36,3 @@ Host CSR Matrix :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/stats_classification.rst b/docs/source/cpp_api/stats_classification.rst index 929d2808f3..bc472c831d 100644 --- a/docs/source/cpp_api/stats_classification.rst +++ b/docs/source/cpp_api/stats_classification.rst @@ -17,4 +17,3 @@ namespace *raft::stats* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/stats_probability.rst b/docs/source/cpp_api/stats_probability.rst index 457879d87c..a77a0d9132 100644 --- a/docs/source/cpp_api/stats_probability.rst +++ b/docs/source/cpp_api/stats_probability.rst @@ -53,4 +53,3 @@ namespace *raft::stats* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/stats_regression.rst b/docs/source/cpp_api/stats_regression.rst index 8c172b441d..fed5f806a4 100644 --- a/docs/source/cpp_api/stats_regression.rst +++ b/docs/source/cpp_api/stats_regression.rst @@ -41,5 +41,3 @@ namespace *raft::stats* :project: RAFT :members: :content-only: - - diff --git a/docs/source/pylibraft_api/random.rst b/docs/source/pylibraft_api/random.rst index 538d932757..dbfd7b2fa1 100644 --- a/docs/source/pylibraft_api/random.rst +++ b/docs/source/pylibraft_api/random.rst @@ -9,4 +9,4 @@ This page provides pylibraft class references for the publicly-exposed elements :class: highlight -.. autofunction:: pylibraft.random.rmat \ No newline at end of file +.. autofunction:: pylibraft.random.rmat diff --git a/docs/source/pylibraft_api/sparse.rst b/docs/source/pylibraft_api/sparse.rst index b2c3f7a2b1..9ba265c6c9 100644 --- a/docs/source/pylibraft_api/sparse.rst +++ b/docs/source/pylibraft_api/sparse.rst @@ -8,4 +8,4 @@ This page provides pylibraft class references for the publicly-exposed elements :language: python :class: highlight -.. autofunction:: pylibraft.sparse.linalg.eigsh \ No newline at end of file +.. autofunction:: pylibraft.sparse.linalg.eigsh diff --git a/python/pylibraft/.coveragerc b/python/pylibraft/.coveragerc index fc087fb9c5..3269e10b8a 100644 --- a/python/pylibraft/.coveragerc +++ b/python/pylibraft/.coveragerc @@ -1,3 +1,3 @@ # Configuration file for Python coverage tests [run] -source = pylibraft \ No newline at end of file +source = pylibraft diff --git a/python/pylibraft/pylibraft/tests/pytest.ini b/python/pylibraft/pylibraft/tests/pytest.ini index bf70c06f84..7b0a9f29fb 100644 --- a/python/pylibraft/pylibraft/tests/pytest.ini +++ b/python/pylibraft/pylibraft/tests/pytest.ini @@ -2,4 +2,3 @@ [pytest] addopts = --tb=native - diff --git a/python/raft-dask/.coveragerc b/python/raft-dask/.coveragerc index 968c4b898a..8077c9ae90 100644 --- a/python/raft-dask/.coveragerc +++ b/python/raft-dask/.coveragerc @@ -1,3 +1,3 @@ # Configuration file for Python coverage tests [run] -source = raft_dask \ No newline at end of file +source = raft_dask diff --git a/python/raft-dask/raft_dask/tests/pytest.ini b/python/raft-dask/raft_dask/tests/pytest.ini index bf70c06f84..7b0a9f29fb 100644 --- a/python/raft-dask/raft_dask/tests/pytest.ini +++ b/python/raft-dask/raft_dask/tests/pytest.ini @@ -2,4 +2,3 @@ [pytest] addopts = --tb=native - diff --git a/thirdparty/LICENSES/LICENSE.ann-benchmark b/thirdparty/LICENSES/LICENSE.ann-benchmark index 9f8e4222f6..4d04745ab4 100644 --- a/thirdparty/LICENSES/LICENSE.ann-benchmark +++ b/thirdparty/LICENSES/LICENSE.ann-benchmark @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/thirdparty/LICENSES/LICENSE.faiss b/thirdparty/LICENSES/LICENSE.faiss index 87cbf536c6..b96dcb0480 100644 --- a/thirdparty/LICENSES/LICENSE.faiss +++ b/thirdparty/LICENSES/LICENSE.faiss @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/thirdparty/LICENSES/LICENSE.pytorch b/thirdparty/LICENSES/LICENSE.pytorch index 7ad3d737a5..04f9ad1105 100644 --- a/thirdparty/LICENSES/LICENSE.pytorch +++ b/thirdparty/LICENSES/LICENSE.pytorch @@ -74,4 +74,4 @@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +POSSIBILITY OF SUCH DAMAGE. diff --git a/thirdparty/LICENSES/mdarray.license b/thirdparty/LICENSES/mdarray.license index e636b86032..5a491b0879 100644 --- a/thirdparty/LICENSES/mdarray.license +++ b/thirdparty/LICENSES/mdarray.license @@ -39,4 +39,4 @@ // // ************************************************************************ //@HEADER -*/ \ No newline at end of file +*/ From 31d31518ece63793ef4bdc2dab8ffac92fc6d6aa Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 28 Jan 2025 16:30:05 -0600 Subject: [PATCH 33/37] Build and test with CUDA 12.8.0 (#2555) This PR uses CUDA 12.8.0 to build and test. xref: https://github.com/rapidsai/build-planning/issues/139 Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/raft/pull/2555 --- .../devcontainer.json | 8 ++--- .../devcontainer.json | 12 +++---- .github/workflows/build.yaml | 20 ++++++------ .github/workflows/pr.yaml | 32 +++++++++---------- .github/workflows/test.yaml | 10 +++--- .../trigger-breaking-change-alert.yaml | 2 +- README.md | 4 +-- ...64.yaml => all_cuda-128_arch-aarch64.yaml} | 4 +-- ..._64.yaml => all_cuda-128_arch-x86_64.yaml} | 4 +-- dependencies.yaml | 6 +++- docs/source/build.md | 6 ++-- 11 files changed, 56 insertions(+), 52 deletions(-) rename .devcontainer/{cuda12.5-conda => cuda12.8-conda}/devcontainer.json (91%) rename .devcontainer/{cuda12.5-pip => cuda12.8-pip}/devcontainer.json (88%) rename conda/environments/{all_cuda-125_arch-aarch64.yaml => all_cuda-128_arch-aarch64.yaml} (95%) rename conda/environments/{all_cuda-125_arch-x86_64.yaml => all_cuda-128_arch-x86_64.yaml} (95%) diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json similarity index 91% rename from .devcontainer/cuda12.5-conda/devcontainer.json rename to .devcontainer/cuda12.8-conda/devcontainer.json index dc4fcd02fd..0995e354af 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.8-conda/devcontainer.json @@ -3,7 +3,7 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "conda", "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { @@ -20,7 +20,7 @@ "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", @@ -29,7 +29,7 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json similarity index 88% rename from .devcontainer/cuda12.5-pip/devcontainer.json rename to .devcontainer/cuda12.8-pip/devcontainer.json index 2bcfa8733f..137699dc5f 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.8-pip/devcontainer.json @@ -3,20 +3,20 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.8-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { - "version": "12.5", + "version": "12.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, @@ -29,7 +29,7 @@ "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", @@ -37,7 +37,7 @@ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index d484bcae22..cdcb95efad 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -56,7 +56,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -68,7 +68,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-libraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -80,7 +80,7 @@ jobs: wheel-publish-libraft: needs: wheel-build-libraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -91,7 +91,7 @@ jobs: wheel-build-pylibraft: needs: wheel-build-libraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -101,7 +101,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -112,7 +112,7 @@ jobs: wheel-build-raft-dask: needs: wheel-build-libraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -122,7 +122,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 9a51c783e9..af963bbc8a 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -28,7 +28,7 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda-12.8.0 if: always() with: needs: ${{ toJSON(needs) }} @@ -46,7 +46,7 @@ jobs: repo: raft changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda-12.8.0 with: files_yaml: | test_cpp: @@ -70,47 +70,47 @@ jobs: - '!thirdparty/LICENSES/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda-12.8.0 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0 with: build_type: pull-request enable_check_symbols: true conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -120,7 +120,7 @@ jobs: wheel-build-libraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: pull-request branch: ${{ inputs.branch }} @@ -132,14 +132,14 @@ jobs: wheel-build-pylibraft: needs: [checks, wheel-build-libraft] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: [wheel-build-pylibraft, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -147,24 +147,24 @@ jobs: wheel-build-raft-dask: needs: [checks, wheel-build-libraft] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: [wheel-build-raft-dask, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda-12.8.0 with: arch: '["amd64"]' - cuda: '["12.5"]' + cuda: '["12.8"]' build_command: | sccache -z; build-all -DBUILD_PRIMS_BENCH=ON --verbose; diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 178c6f677c..8a4d8a5eb4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} @@ -25,7 +25,7 @@ jobs: enable_check_symbols: true conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} @@ -33,7 +33,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} @@ -41,7 +41,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} @@ -50,7 +50,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 01dd2436be..07f0f83cc9 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -12,7 +12,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda-12.8.0 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} diff --git a/README.md b/README.md index 898c5c22c3..2807ab50cc 100755 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-vers ```bash # for CUDA 12.5 -mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-version=12.5 +mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-version=12.8 ``` Note that the above commands will also install `libraft-headers` and `libraft`. @@ -248,7 +248,7 @@ Note that the above commands will also install `libraft-headers` and `libraft`. You can also install the conda packages individually using the `mamba` command above. For example, if you'd like to install RAFT's headers and pre-compiled shared library to use in your project: ```bash # for CUDA 12.5 -mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.5 +mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.8 ``` ### Installing Python through Pip diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml similarity index 95% rename from conda/environments/all_cuda-125_arch-aarch64.yaml rename to conda/environments/all_cuda-128_arch-aarch64.yaml index d790e985fa..1915a3f0f0 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-128_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvtx-dev - cuda-profiler-api - cuda-python>=12.6.2,<13.0a0 -- cuda-version=12.5 +- cuda-version=12.8 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 @@ -53,4 +53,4 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-aarch64==2.28 - ucx-py==0.42.*,>=0.0.0a0 -name: all_cuda-125_arch-aarch64 +name: all_cuda-128_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml similarity index 95% rename from conda/environments/all_cuda-125_arch-x86_64.yaml rename to conda/environments/all_cuda-128_arch-x86_64.yaml index 63808d99c0..c8119ff7d5 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvtx-dev - cuda-profiler-api - cuda-python>=12.6.2,<13.0a0 -- cuda-version=12.5 +- cuda-version=12.8 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 @@ -53,4 +53,4 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-64==2.28 - ucx-py==0.42.*,>=0.0.0a0 -name: all_cuda-125_arch-x86_64 +name: all_cuda-128_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index b7a0344b1a..c9befcb53a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,7 +3,7 @@ files: all: output: conda matrix: - cuda: ["11.8", "12.5"] + cuda: ["11.8", "12.8"] arch: [x86_64, aarch64] includes: - build_common @@ -274,6 +274,10 @@ dependencies: cuda: "12.5" packages: - cuda-version=12.5 + - matrix: + cuda: "12.8" + packages: + - cuda-version=12.8 cuda: specific: - output_types: conda diff --git a/docs/source/build.md b/docs/source/build.md index 5a0dbf7e11..237c54ce6b 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -42,7 +42,7 @@ mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-vers ```bash # for CUDA 12.0 -mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-version=12.0 +mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-version=12.8 ``` Note that the above commands will also install `libraft-headers` and `libraft`. @@ -50,7 +50,7 @@ Note that the above commands will also install `libraft-headers` and `libraft`. You can also install the conda packages individually using the `mamba` command above. For example, if you'd like to install RAFT's headers to use in your project: ```bash # for CUDA 12.0 -mamba install -c rapidsai -c conda-forge -c nvidia libraft-headers cuda-version=12.0 +mamba install -c rapidsai -c conda-forge -c nvidia libraft-headers cuda-version=12.8 ``` ## Installing Python through Pip @@ -99,7 +99,7 @@ In addition to the libraries included with cudatoolkit 11.8+, there are some oth Conda environment scripts are provided for installing the necessary dependencies to build both the C++ and Python libraries from source. It is preferred to use `mamba`, as it provides significant speedup over `conda`: ```bash -mamba env create --name rapids_raft -f conda/environments/all_cuda-125_arch-x86_64.yaml +mamba env create --name rapids_raft -f conda/environments/all_cuda-128_arch-x86_64.yaml mamba activate rapids_raft ``` From cceb37d953e1b4230f73157e3dba604176481547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Malte=20F=C3=B6rster?= <97973773+mfoerste4@users.noreply.github.com> Date: Thu, 30 Jan 2025 04:45:50 +0100 Subject: [PATCH 34/37] Remove 'sample' parameter from stats::mean API (#2389) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR removes the sample-parameter from the `raft::stats::mean` API to prevent people from using it by accident when for example computing the mean for a sampled variance computation. This also invalidates some of the testcases. Within raft only test-code is affected by this change as the active usage of the sample parameter was already removed in #2381. This PR is based on #2381 but was separated for tracking purposes. ~~Note that this requires adaption of downstream libraries using the API. I am aware of at least one occurrence in `cuml`.~~ The old API remains in the code marked as deprecated which allows us to adapt downstream libraries at least for the duration of one release cycle. Authors: - Malte Fรถrster (https://github.com/mfoerste4) - Tamas Bela Feher (https://github.com/tfeher) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2389 --- cpp/include/raft/stats/detail/mean.cuh | 20 ++- cpp/include/raft/stats/detail/scores.cuh | 2 +- cpp/include/raft/stats/mean.cuh | 66 +++++++++- cpp/tests/random/rng.cu | 3 +- cpp/tests/stats/cov.cu | 4 +- cpp/tests/stats/mean.cu | 121 ++++++++---------- cpp/tests/stats/mean_center.cu | 149 ++++++++--------------- cpp/tests/stats/stddev.cu | 6 +- 8 files changed, 184 insertions(+), 187 deletions(-) diff --git a/cpp/include/raft/stats/detail/mean.cuh b/cpp/include/raft/stats/detail/mean.cuh index a7d4f2b877..1262d538c8 100644 --- a/cpp/include/raft/stats/detail/mean.cuh +++ b/cpp/include/raft/stats/detail/mean.cuh @@ -27,7 +27,25 @@ namespace stats { namespace detail { template -void mean( +void mean(Type* mu, const Type* data, IdxType D, IdxType N, bool rowMajor, cudaStream_t stream) +{ + Type ratio = Type(1) / Type(N); + raft::linalg::reduce(mu, + data, + D, + N, + Type(0), + rowMajor, + false, + stream, + false, + raft::identity_op(), + raft::add_op(), + raft::mul_const_op(ratio)); +} + +template +[[deprecated]] void mean( Type* mu, const Type* data, IdxType D, IdxType N, bool sample, bool rowMajor, cudaStream_t stream) { Type ratio = Type(1) / ((sample) ? Type(N - 1) : Type(N)); diff --git a/cpp/include/raft/stats/detail/scores.cuh b/cpp/include/raft/stats/detail/scores.cuh index 947df6848a..66951f52ab 100644 --- a/cpp/include/raft/stats/detail/scores.cuh +++ b/cpp/include/raft/stats/detail/scores.cuh @@ -59,7 +59,7 @@ math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) { rmm::device_scalar y_bar(stream); - raft::stats::mean(y_bar.data(), y, 1, n, false, false, stream); + raft::stats::mean(y_bar.data(), y, 1, n, false, stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); rmm::device_uvector sse_arr(n, stream); diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index bc3cf184c6..b76b945400 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,27 @@ namespace stats { * @param data: the input matrix * @param D: number of columns of data * @param N: number of rows of data + * @param rowMajor: whether the input data is row or col major + * @param stream: cuda stream + */ +template +void mean(Type* mu, const Type* data, IdxType D, IdxType N, bool rowMajor, cudaStream_t stream) +{ + detail::mean(mu, data, D, N, rowMajor, stream); +} + +/** + * @brief Compute mean of the input matrix + * + * Mean operation is assumed to be performed on a given column. + * Note: This call is deprecated, please use `mean` call without `sample` parameter. + * + * @tparam Type: the data type + * @tparam IdxType Integer type used to for addressing + * @param mu: the output mean vector + * @param data: the input matrix + * @param D: number of columns of data + * @param N: number of rows of data * @param sample: whether to evaluate sample mean or not. In other words, * whether * to normalize the output using N-1 or N, for true or false, respectively @@ -45,7 +66,7 @@ namespace stats { * @param stream: cuda stream */ template -void mean( +[[deprecated("'sample' parameter deprecated")]] void mean( Type* mu, const Type* data, IdxType D, IdxType N, bool sample, bool rowMajor, cudaStream_t stream) { detail::mean(mu, data, D, N, sample, rowMajor, stream); @@ -67,14 +88,47 @@ void mean( * @param[in] handle the raft handle * @param[in] data: the input matrix * @param[out] mu: the output mean vector - * @param[in] sample: whether to evaluate sample mean or not. In other words, whether - * to normalize the output using N-1 or N, for true or false, respectively */ template void mean(raft::resources const& handle, raft::device_matrix_view data, - raft::device_vector_view mu, - bool sample) + raft::device_vector_view mu) +{ + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); + RAFT_EXPECTS(data.extent(1) == mu.extent(0), "Size mismatch between data and mu"); + RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + detail::mean(mu.data_handle(), + data.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + resource::get_cuda_stream(handle)); +} + +/** + * @brief Compute mean of the input matrix + * + * Mean operation is assumed to be performed on a given column. + * Note: This call is deprecated, please use `mean` call without `sample` parameter. + * + * @tparam value_t the data type + * @tparam idx_t index type + * @tparam layout_t Layout type of the input matrix. + * @param[in] handle the raft handle + * @param[in] data: the input matrix + * @param[out] mu: the output mean vector + * @param[in] sample: whether to evaluate sample mean or not. In other words, whether + * to normalize the output using N-1 or N, for true or false, respectively + */ +template +[[deprecated("'sample' parameter deprecated")]] void mean( + raft::resources const& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + bool sample) { static_assert( std::is_same_v || std::is_same_v, diff --git a/cpp/tests/random/rng.cu b/cpp/tests/random/rng.cu index a37f150d4c..172f94ae50 100644 --- a/cpp/tests/random/rng.cu +++ b/cpp/tests/random/rng.cu @@ -407,8 +407,7 @@ TEST(Rng, MeanError) RngState r(seed, rtype); normal(handle, r, data.data(), len, 3.3f, 0.23f); // uniform(r, data, len, -1.0, 2.0); - raft::stats::mean( - mean_result.data(), data.data(), num_samples, num_experiments, false, false, stream); + raft::stats::mean(mean_result.data(), data.data(), num_samples, num_experiments, false, stream); raft::stats::stddev(std_result.data(), data.data(), mean_result.data(), diff --git a/cpp/tests/stats/cov.cu b/cpp/tests/stats/cov.cu index 602f356b9f..3f2a3dcebf 100644 --- a/cpp/tests/stats/cov.cu +++ b/cpp/tests/stats/cov.cu @@ -72,7 +72,7 @@ class CovTest : public ::testing::TestWithParam> { cov_act.resize(cols * cols, stream); normal(handle, r, data.data(), len, params.mean, var); - raft::stats::mean(mean_act.data(), data.data(), cols, rows, false, params.rowMajor, stream); + raft::stats::mean(mean_act.data(), data.data(), cols, rows, params.rowMajor, stream); if (params.rowMajor) { using layout = raft::row_major; cov(handle, @@ -102,7 +102,7 @@ class CovTest : public ::testing::TestWithParam> { raft::update_device(data_cm.data(), data_h, 6, stream); raft::update_device(cov_cm_ref.data(), cov_cm_ref_h, 4, stream); - raft::stats::mean(mean_cm.data(), data_cm.data(), 2, 3, false, false, stream); + raft::stats::mean(mean_cm.data(), data_cm.data(), 2, 3, false, stream); cov(handle, cov_cm.data(), data_cm.data(), mean_cm.data(), 2, 3, true, false, true, stream); } diff --git a/cpp/tests/stats/mean.cu b/cpp/tests/stats/mean.cu index c5fe83d95b..e72d4eaf74 100644 --- a/cpp/tests/stats/mean.cu +++ b/cpp/tests/stats/mean.cu @@ -33,7 +33,7 @@ template struct MeanInputs { T tolerance, mean; int rows, cols; - bool sample, rowMajor; + bool rowMajor; unsigned long long int seed; T stddev = (T)1.0; }; @@ -42,7 +42,7 @@ template ::std::ostream& operator<<(::std::ostream& os, const MeanInputs& dims) { return os << "{ " << dims.tolerance << ", " << dims.rows << ", " << dims.cols << ", " - << dims.sample << ", " << dims.rowMajor << ", " << dims.stddev << "}" << std::endl; + << ", " << dims.rowMajor << ", " << dims.stddev << "}" << std::endl; } template @@ -74,14 +74,12 @@ class MeanTest : public ::testing::TestWithParam> { using layout = raft::row_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - params.sample); + raft::make_device_vector_view(mean_act.data(), cols)); } else { using layout = raft::col_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - params.sample); + raft::make_device_vector_view(mean_act.data(), cols)); } } @@ -98,72 +96,51 @@ class MeanTest : public ::testing::TestWithParam> { // measured mean (of a normal distribution) will fall outside of an epsilon of // 0.15 only 4/10000 times. (epsilon of 0.1 will fail 30/100 times) const std::vector> inputsf = { - {0.15f, 1.f, 1024, 32, true, false, 1234ULL}, - {0.15f, 1.f, 1024, 64, true, false, 1234ULL}, - {0.15f, 1.f, 1024, 128, true, false, 1234ULL}, - {0.15f, 1.f, 1024, 256, true, false, 1234ULL}, - {0.15f, -1.f, 1024, 32, false, false, 1234ULL}, - {0.15f, -1.f, 1024, 64, false, false, 1234ULL}, - {0.15f, -1.f, 1024, 128, false, false, 1234ULL}, - {0.15f, -1.f, 1024, 256, false, false, 1234ULL}, - {0.15f, 1.f, 1024, 32, true, true, 1234ULL}, - {0.15f, 1.f, 1024, 64, true, true, 1234ULL}, - {0.15f, 1.f, 1024, 128, true, true, 1234ULL}, - {0.15f, 1.f, 1024, 256, true, true, 1234ULL}, - {0.15f, -1.f, 1024, 32, false, true, 1234ULL}, - {0.15f, -1.f, 1024, 64, false, true, 1234ULL}, - {0.15f, -1.f, 1024, 128, false, true, 1234ULL}, - {0.15f, -1.f, 1024, 256, false, true, 1234ULL}, - {0.15f, -1.f, 1030, 1, false, false, 1234ULL}, - {0.15f, -1.f, 1030, 60, true, false, 1234ULL}, - {2.0f, -1.f, 31, 120, false, false, 1234ULL}, - {2.0f, -1.f, 1, 130, false, false, 1234ULL}, - {0.15f, -1.f, 1030, 1, false, true, 1234ULL}, - {0.15f, -1.f, 1030, 60, true, true, 1234ULL}, - {2.0f, -1.f, 31, 120, false, true, 1234ULL}, - {2.0f, -1.f, 1, 130, false, true, 1234ULL}, - {2.0f, -1.f, 1, 1, false, false, 1234ULL}, - {2.0f, -1.f, 1, 1, false, true, 1234ULL}, - {2.0f, -1.f, 7, 23, false, false, 1234ULL}, - {2.0f, -1.f, 7, 23, false, true, 1234ULL}, - {2.0f, -1.f, 17, 5, false, false, 1234ULL}, - {2.0f, -1.f, 17, 5, false, true, 1234ULL}, - {0.0001f, 0.1f, 1 << 27, 2, false, false, 1234ULL, 0.0001f}, - {0.0001f, 0.1f, 1 << 27, 2, false, true, 1234ULL, 0.0001f}}; - -const std::vector> inputsd = { - {0.15, 1.0, 1024, 32, true, false, 1234ULL}, - {0.15, 1.0, 1024, 64, true, false, 1234ULL}, - {0.15, 1.0, 1024, 128, true, false, 1234ULL}, - {0.15, 1.0, 1024, 256, true, false, 1234ULL}, - {0.15, -1.0, 1024, 32, false, false, 1234ULL}, - {0.15, -1.0, 1024, 64, false, false, 1234ULL}, - {0.15, -1.0, 1024, 128, false, false, 1234ULL}, - {0.15, -1.0, 1024, 256, false, false, 1234ULL}, - {0.15, 1.0, 1024, 32, true, true, 1234ULL}, - {0.15, 1.0, 1024, 64, true, true, 1234ULL}, - {0.15, 1.0, 1024, 128, true, true, 1234ULL}, - {0.15, 1.0, 1024, 256, true, true, 1234ULL}, - {0.15, -1.0, 1024, 32, false, true, 1234ULL}, - {0.15, -1.0, 1024, 64, false, true, 1234ULL}, - {0.15, -1.0, 1024, 128, false, true, 1234ULL}, - {0.15, -1.0, 1024, 256, false, true, 1234ULL}, - {0.15, -1.0, 1030, 1, false, false, 1234ULL}, - {0.15, -1.0, 1030, 60, true, false, 1234ULL}, - {2.0, -1.0, 31, 120, false, false, 1234ULL}, - {2.0, -1.0, 1, 130, false, false, 1234ULL}, - {0.15, -1.0, 1030, 1, false, true, 1234ULL}, - {0.15, -1.0, 1030, 60, true, true, 1234ULL}, - {2.0, -1.0, 31, 120, false, true, 1234ULL}, - {2.0, -1.0, 1, 130, false, true, 1234ULL}, - {2.0, -1.0, 1, 1, false, false, 1234ULL}, - {2.0, -1.0, 1, 1, false, true, 1234ULL}, - {2.0, -1.0, 7, 23, false, false, 1234ULL}, - {2.0, -1.0, 7, 23, false, true, 1234ULL}, - {2.0, -1.0, 17, 5, false, false, 1234ULL}, - {2.0, -1.0, 17, 5, false, true, 1234ULL}, - {1e-8, 1e-1, 1 << 27, 2, false, false, 1234ULL, 0.0001}, - {1e-8, 1e-1, 1 << 27, 2, false, true, 1234ULL, 0.0001}}; + {0.15f, -1.f, 1024, 32, false, 1234ULL}, + {0.15f, -1.f, 1024, 64, false, 1234ULL}, + {0.15f, -1.f, 1024, 128, false, 1234ULL}, + {0.15f, -1.f, 1024, 256, false, 1234ULL}, + {0.15f, -1.f, 1024, 32, true, 1234ULL}, + {0.15f, -1.f, 1024, 64, true, 1234ULL}, + {0.15f, -1.f, 1024, 128, true, 1234ULL}, + {0.15f, -1.f, 1024, 256, true, 1234ULL}, + {0.15f, -1.f, 1030, 1, false, 1234ULL}, + {2.0f, -1.f, 31, 120, false, 1234ULL}, + {2.0f, -1.f, 1, 130, false, 1234ULL}, + {0.15f, -1.f, 1030, 1, true, 1234ULL}, + {2.0f, -1.f, 31, 120, true, 1234ULL}, + {2.0f, -1.f, 1, 130, true, 1234ULL}, + {2.0f, -1.f, 1, 1, false, 1234ULL}, + {2.0f, -1.f, 1, 1, true, 1234ULL}, + {2.0f, -1.f, 7, 23, false, 1234ULL}, + {2.0f, -1.f, 7, 23, true, 1234ULL}, + {2.0f, -1.f, 17, 5, false, 1234ULL}, + {2.0f, -1.f, 17, 5, true, 1234ULL}, + {0.0001f, 0.1f, 1 << 27, 2, false, 1234ULL, 0.0001f}, + {0.0001f, 0.1f, 1 << 27, 2, true, 1234ULL, 0.0001f}}; + +const std::vector> inputsd = {{0.15, -1.0, 1024, 32, false, 1234ULL}, + {0.15, -1.0, 1024, 64, false, 1234ULL}, + {0.15, -1.0, 1024, 128, false, 1234ULL}, + {0.15, -1.0, 1024, 256, false, 1234ULL}, + {0.15, -1.0, 1024, 32, true, 1234ULL}, + {0.15, -1.0, 1024, 64, true, 1234ULL}, + {0.15, -1.0, 1024, 128, true, 1234ULL}, + {0.15, -1.0, 1024, 256, true, 1234ULL}, + {0.15, -1.0, 1030, 1, false, 1234ULL}, + {2.0, -1.0, 31, 120, false, 1234ULL}, + {2.0, -1.0, 1, 130, false, 1234ULL}, + {0.15, -1.0, 1030, 1, true, 1234ULL}, + {2.0, -1.0, 31, 120, true, 1234ULL}, + {2.0, -1.0, 1, 130, true, 1234ULL}, + {2.0, -1.0, 1, 1, false, 1234ULL}, + {2.0, -1.0, 1, 1, true, 1234ULL}, + {2.0, -1.0, 7, 23, false, 1234ULL}, + {2.0, -1.0, 7, 23, true, 1234ULL}, + {2.0, -1.0, 17, 5, false, 1234ULL}, + {2.0, -1.0, 17, 5, true, 1234ULL}, + {1e-8, 1e-1, 1 << 27, 2, false, 1234ULL, 0.0001}, + {1e-8, 1e-1, 1 << 27, 2, true, 1234ULL, 0.0001}}; typedef MeanTest MeanTestF; TEST_P(MeanTestF, Result) diff --git a/cpp/tests/stats/mean_center.cu b/cpp/tests/stats/mean_center.cu index b44d87d1bd..48bf50056c 100644 --- a/cpp/tests/stats/mean_center.cu +++ b/cpp/tests/stats/mean_center.cu @@ -32,7 +32,7 @@ template struct MeanCenterInputs { T tolerance, mean; IdxType rows, cols; - bool sample, rowMajor, bcastAlongRows; + bool rowMajor, bcastAlongRows; unsigned long long int seed; }; @@ -64,8 +64,7 @@ class MeanCenterTest : public ::testing::TestWithParam> inputsf_i32 = { - {0.05f, 1.f, 1024, 32, true, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, true, false, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, true, false, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, true, false, 1234ULL}}; + {0.05f, -1.f, 1024, 32, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 64, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 128, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 32, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 64, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 128, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 32, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 64, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 128, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 32, true, false, 1234ULL}, + {0.05f, -1.f, 1024, 64, true, false, 1234ULL}, + {0.05f, -1.f, 1024, 128, true, false, 1234ULL}}; typedef MeanCenterTest MeanCenterTestF_i32; TEST_P(MeanCenterTestF_i32, Result) { @@ -136,30 +123,18 @@ TEST_P(MeanCenterTestF_i32, Result) INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestF_i32, ::testing::ValuesIn(inputsf_i32)); const std::vector> inputsf_i64 = { - {0.05f, 1.f, 1024, 32, true, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, true, false, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, true, false, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, true, false, 1234ULL}}; + {0.05f, -1.f, 1024, 32, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 64, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 128, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 32, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 64, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 128, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 32, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 64, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 128, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 32, true, false, 1234ULL}, + {0.05f, -1.f, 1024, 64, true, false, 1234ULL}, + {0.05f, -1.f, 1024, 128, true, false, 1234ULL}}; typedef MeanCenterTest MeanCenterTestF_i64; TEST_P(MeanCenterTestF_i64, Result) { @@ -169,30 +144,18 @@ TEST_P(MeanCenterTestF_i64, Result) INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestF_i64, ::testing::ValuesIn(inputsf_i64)); const std::vector> inputsd_i32 = { - {0.05, 1.0, 1024, 32, true, false, true, 1234ULL}, - {0.05, 1.0, 1024, 64, true, false, true, 1234ULL}, - {0.05, 1.0, 1024, 128, true, false, true, 1234ULL}, - {0.05, -1.0, 1024, 32, false, false, true, 1234ULL}, - {0.05, -1.0, 1024, 64, false, false, true, 1234ULL}, - {0.05, -1.0, 1024, 128, false, false, true, 1234ULL}, - {0.05, 1.0, 1024, 32, true, true, true, 1234ULL}, - {0.05, 1.0, 1024, 64, true, true, true, 1234ULL}, - {0.05, 1.0, 1024, 128, true, true, true, 1234ULL}, - {0.05, -1.0, 1024, 32, false, true, true, 1234ULL}, - {0.05, -1.0, 1024, 64, false, true, true, 1234ULL}, - {0.05, -1.0, 1024, 128, false, true, true, 1234ULL}, - {0.05, 1.0, 1024, 32, true, false, false, 1234ULL}, - {0.05, 1.0, 1024, 64, true, false, false, 1234ULL}, - {0.05, 1.0, 1024, 128, true, false, false, 1234ULL}, - {0.05, -1.0, 1024, 32, false, false, false, 1234ULL}, - {0.05, -1.0, 1024, 64, false, false, false, 1234ULL}, - {0.05, -1.0, 1024, 128, false, false, false, 1234ULL}, - {0.05, 1.0, 1024, 32, true, true, false, 1234ULL}, - {0.05, 1.0, 1024, 64, true, true, false, 1234ULL}, - {0.05, 1.0, 1024, 128, true, true, false, 1234ULL}, - {0.05, -1.0, 1024, 32, false, true, false, 1234ULL}, - {0.05, -1.0, 1024, 64, false, true, false, 1234ULL}, - {0.05, -1.0, 1024, 128, false, true, false, 1234ULL}}; + {0.05, -1.0, 1024, 32, false, true, 1234ULL}, + {0.05, -1.0, 1024, 64, false, true, 1234ULL}, + {0.05, -1.0, 1024, 128, false, true, 1234ULL}, + {0.05, -1.0, 1024, 32, true, true, 1234ULL}, + {0.05, -1.0, 1024, 64, true, true, 1234ULL}, + {0.05, -1.0, 1024, 128, true, true, 1234ULL}, + {0.05, -1.0, 1024, 32, false, false, 1234ULL}, + {0.05, -1.0, 1024, 64, false, false, 1234ULL}, + {0.05, -1.0, 1024, 128, false, false, 1234ULL}, + {0.05, -1.0, 1024, 32, true, false, 1234ULL}, + {0.05, -1.0, 1024, 64, true, false, 1234ULL}, + {0.05, -1.0, 1024, 128, true, false, 1234ULL}}; typedef MeanCenterTest MeanCenterTestD_i32; TEST_P(MeanCenterTestD_i32, Result) { @@ -202,30 +165,18 @@ TEST_P(MeanCenterTestD_i32, Result) INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestD_i32, ::testing::ValuesIn(inputsd_i32)); const std::vector> inputsd_i64 = { - {0.05, 1.0, 1024, 32, true, false, true, 1234ULL}, - {0.05, 1.0, 1024, 64, true, false, true, 1234ULL}, - {0.05, 1.0, 1024, 128, true, false, true, 1234ULL}, - {0.05, -1.0, 1024, 32, false, false, true, 1234ULL}, - {0.05, -1.0, 1024, 64, false, false, true, 1234ULL}, - {0.05, -1.0, 1024, 128, false, false, true, 1234ULL}, - {0.05, 1.0, 1024, 32, true, true, true, 1234ULL}, - {0.05, 1.0, 1024, 64, true, true, true, 1234ULL}, - {0.05, 1.0, 1024, 128, true, true, true, 1234ULL}, - {0.05, -1.0, 1024, 32, false, true, true, 1234ULL}, - {0.05, -1.0, 1024, 64, false, true, true, 1234ULL}, - {0.05, -1.0, 1024, 128, false, true, true, 1234ULL}, - {0.05, 1.0, 1024, 32, true, false, false, 1234ULL}, - {0.05, 1.0, 1024, 64, true, false, false, 1234ULL}, - {0.05, 1.0, 1024, 128, true, false, false, 1234ULL}, - {0.05, -1.0, 1024, 32, false, false, false, 1234ULL}, - {0.05, -1.0, 1024, 64, false, false, false, 1234ULL}, - {0.05, -1.0, 1024, 128, false, false, false, 1234ULL}, - {0.05, 1.0, 1024, 32, true, true, false, 1234ULL}, - {0.05, 1.0, 1024, 64, true, true, false, 1234ULL}, - {0.05, 1.0, 1024, 128, true, true, false, 1234ULL}, - {0.05, -1.0, 1024, 32, false, true, false, 1234ULL}, - {0.05, -1.0, 1024, 64, false, true, false, 1234ULL}, - {0.05, -1.0, 1024, 128, false, true, false, 1234ULL}}; + {0.05, -1.0, 1024, 32, false, true, 1234ULL}, + {0.05, -1.0, 1024, 64, false, true, 1234ULL}, + {0.05, -1.0, 1024, 128, false, true, 1234ULL}, + {0.05, -1.0, 1024, 32, true, true, 1234ULL}, + {0.05, -1.0, 1024, 64, true, true, 1234ULL}, + {0.05, -1.0, 1024, 128, true, true, 1234ULL}, + {0.05, -1.0, 1024, 32, false, false, 1234ULL}, + {0.05, -1.0, 1024, 64, false, false, 1234ULL}, + {0.05, -1.0, 1024, 128, false, false, 1234ULL}, + {0.05, -1.0, 1024, 32, true, false, 1234ULL}, + {0.05, -1.0, 1024, 64, true, false, 1234ULL}, + {0.05, -1.0, 1024, 128, true, false, 1234ULL}}; typedef MeanCenterTest MeanCenterTestD_i64; TEST_P(MeanCenterTestD_i64, Result) { diff --git a/cpp/tests/stats/stddev.cu b/cpp/tests/stats/stddev.cu index f4c5f92f49..a9a70b1e60 100644 --- a/cpp/tests/stats/stddev.cu +++ b/cpp/tests/stats/stddev.cu @@ -81,8 +81,7 @@ class StdDevTest : public ::testing::TestWithParam> { using layout_t = raft::row_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - false); + raft::make_device_vector_view(mean_act.data(), cols)); stddev(handle, raft::make_device_matrix_view(data, rows, cols), @@ -99,8 +98,7 @@ class StdDevTest : public ::testing::TestWithParam> { using layout_t = raft::col_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - false); + raft::make_device_vector_view(mean_act.data(), cols)); stddev(handle, raft::make_device_matrix_view(data, rows, cols), From 19b8103077cbd5e16ad17c4a46788faf01fc9047 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 31 Jan 2025 08:02:58 -0800 Subject: [PATCH 35/37] Revert CUDA 12.8 shared workflow branch changes (#2560) This PR points the shared workflow branches back to the default 25.02 branches. xref: https://github.com/rapidsai/build-planning/issues/139 --- .github/workflows/build.yaml | 20 ++++++------- .github/workflows/pr.yaml | 30 +++++++++---------- .github/workflows/test.yaml | 10 +++---- .../trigger-breaking-change-alert.yaml | 2 +- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index cdcb95efad..d484bcae22 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -56,7 +56,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -68,7 +68,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-libraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -80,7 +80,7 @@ jobs: wheel-publish-libraft: needs: wheel-build-libraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -91,7 +91,7 @@ jobs: wheel-build-pylibraft: needs: wheel-build-libraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -101,7 +101,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -112,7 +112,7 @@ jobs: wheel-build-raft-dask: needs: wheel-build-libraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -122,7 +122,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index af963bbc8a..dddee00d5f 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -28,7 +28,7 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} @@ -46,7 +46,7 @@ jobs: repo: raft changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: files_yaml: | test_cpp: @@ -70,47 +70,47 @@ jobs: - '!thirdparty/LICENSES/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: pull-request enable_check_symbols: true conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -120,7 +120,7 @@ jobs: wheel-build-libraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request branch: ${{ inputs.branch }} @@ -132,14 +132,14 @@ jobs: wheel-build-pylibraft: needs: [checks, wheel-build-libraft] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: [wheel-build-pylibraft, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -147,21 +147,21 @@ jobs: wheel-build-raft-dask: needs: [checks, wheel-build-libraft] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: [wheel-build-raft-dask, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' cuda: '["12.8"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 8a4d8a5eb4..178c6f677c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -25,7 +25,7 @@ jobs: enable_check_symbols: true conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -33,7 +33,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -41,7 +41,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -50,7 +50,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 07f0f83cc9..01dd2436be 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -12,7 +12,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} From e15a112d4f5f7f4fcf148ef5af15e8ed98ba89ba Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 31 Jan 2025 17:39:49 -0600 Subject: [PATCH 36/37] Fix docs builds (#2562) This PR fixes two errors in docs builds: 1. a function with `void` return type had a `@return` parameter, which causes an error. The error was `error: found documented return type for raft::random::device::warp_random_sample that does not return anything` 2. a function with return type `std::vector` was being misinterpreted as the beginning of an HTML tag ``. This resulted in `error: end of comment block while expecting command `. --- cpp/include/raft/random/device/sample.cuh | 6 ++++-- cpp/include/raft/util/itertools.hpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/random/device/sample.cuh b/cpp/include/raft/random/device/sample.cuh index d0e5200185..67b98f12fe 100644 --- a/cpp/include/raft/random/device/sample.cuh +++ b/cpp/include/raft/random/device/sample.cuh @@ -27,12 +27,14 @@ namespace raft::random::device { /** * @brief warp-level random sampling of an index. + * * It selects an index with the given discrete probability - * distribution(represented by weights of each index) + * distribution(represented by weights of each index). + * Only thread 0 will contain the valid reduced result. + * * @param rng random number generator, must have next_u32() function * @param weight weight of the rank/index. * @param idx index to be used as rank - * @return only the thread0 will contain valid reduced result */ template DI void warp_random_sample(rng_t& rng, T& weight, i_t& idx) diff --git a/cpp/include/raft/util/itertools.hpp b/cpp/include/raft/util/itertools.hpp index 493ac9befe..a31d9f79df 100644 --- a/cpp/include/raft/util/itertools.hpp +++ b/cpp/include/raft/util/itertools.hpp @@ -36,7 +36,7 @@ namespace raft::util::itertools { * fields of the structure (if the structure has more fields, some might be initialized * with their default value). * @param lists One or more initializer lists. - * @return std::vector A vector of structures containing the cartesian product. + * @return `std::vector` A vector of structures containing the cartesian product. */ template std::vector product(std::initializer_list... lists) From 7af57c3936313ecb5fab8dc0d758a26eb8f533ca Mon Sep 17 00:00:00 2001 From: Jake Awe Date: Thu, 13 Feb 2025 09:44:59 -0600 Subject: [PATCH 37/37] Update Changelog [skip ci] --- CHANGELOG.md | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d7c641b21..a7f1d04beb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,59 @@ +# raft 25.02.00 (13 Feb 2025) + +## ๐Ÿšจ Breaking Changes + +- Update pip devcontainers to UCX 1.18 ([#2550](https://github.com/rapidsai/raft/pull/2550)) [@jameslamb](https://github.com/jameslamb) +- Switch over to rapids-logger ([#2530](https://github.com/rapidsai/raft/pull/2530)) [@vyasr](https://github.com/vyasr) +- Adapt to rmm logger changes ([#2513](https://github.com/rapidsai/raft/pull/2513)) [@vyasr](https://github.com/vyasr) + +## ๐Ÿ› Bug Fixes + +- Rename test to tests. ([#2546](https://github.com/rapidsai/raft/pull/2546)) [@bdice](https://github.com/bdice) +- Fix bit order of RMAT Rectangular Generator to match expectation ([#2542](https://github.com/rapidsai/raft/pull/2542)) [@mfoerste4](https://github.com/mfoerste4) +- Fix broken link to python doc ([#2537](https://github.com/rapidsai/raft/pull/2537)) [@lowener](https://github.com/lowener) +- Fix lanczos solver integer overflow ([#2536](https://github.com/rapidsai/raft/pull/2536)) [@viclafargue](https://github.com/viclafargue) +- Fix rnd bit generation in rmat_rectangular_kernel ([#2524](https://github.com/rapidsai/raft/pull/2524)) [@tfeher](https://github.com/tfeher) + +## ๐Ÿ“– Documentation + +- Fix docs builds ([#2562](https://github.com/rapidsai/raft/pull/2562)) [@bdice](https://github.com/bdice) +- [DOC] Fix sample codes ([#2518](https://github.com/rapidsai/raft/pull/2518)) [@enp1s0](https://github.com/enp1s0) + +## ๐Ÿš€ New Features + +- Add cuda 12.8 support ([#2551](https://github.com/rapidsai/raft/pull/2551)) [@robertmaynard](https://github.com/robertmaynard) +- Add support for different data type of bitset ([#2535](https://github.com/rapidsai/raft/pull/2535)) [@lowener](https://github.com/lowener) +- [Feat] Support `bitset_to_csr` ([#2523](https://github.com/rapidsai/raft/pull/2523)) [@rhdong](https://github.com/rhdong) +- Remove upper bounds on cuda-python to allow 12.6.2 and 11.8.5 ([#2517](https://github.com/rapidsai/raft/pull/2517)) [@bdice](https://github.com/bdice) + +## ๐Ÿ› ๏ธ Improvements + +- Revert CUDA 12.8 shared workflow branch changes ([#2560](https://github.com/rapidsai/raft/pull/2560)) [@vyasr](https://github.com/vyasr) +- Build and test with CUDA 12.8.0 ([#2555](https://github.com/rapidsai/raft/pull/2555)) [@bdice](https://github.com/bdice) +- Update pip devcontainers to UCX 1.18 ([#2550](https://github.com/rapidsai/raft/pull/2550)) [@jameslamb](https://github.com/jameslamb) +- use dynamic CUDA wheels on CUDA 11 ([#2548](https://github.com/rapidsai/raft/pull/2548)) [@jameslamb](https://github.com/jameslamb) +- Normalize whitespace ([#2547](https://github.com/rapidsai/raft/pull/2547)) [@bdice](https://github.com/bdice) +- Use cuda.bindings layout. ([#2545](https://github.com/rapidsai/raft/pull/2545)) [@bdice](https://github.com/bdice) +- Revert "Introduction of the `raft::device_resources_snmg` type ([#2487)" (#2543](https://github.com/rapidsai/raft/pull/2487)" (#2543)) [@cjnolet](https://github.com/cjnolet) +- Add missing `#include <cstdint>` ([#2540](https://github.com/rapidsai/raft/pull/2540)) [@jakirkham](https://github.com/jakirkham) +- Use GCC 13 in CUDA 12 conda builds. ([#2539](https://github.com/rapidsai/raft/pull/2539)) [@bdice](https://github.com/bdice) +- Use rapids-cmake for the logger ([#2534](https://github.com/rapidsai/raft/pull/2534)) [@vyasr](https://github.com/vyasr) +- Check if nightlies have succeeded recently enough ([#2533](https://github.com/rapidsai/raft/pull/2533)) [@vyasr](https://github.com/vyasr) +- remove unused 'joblib' and 'numba' dependencies, other packaging cleanup ([#2532](https://github.com/rapidsai/raft/pull/2532)) [@jameslamb](https://github.com/jameslamb) +- introduce libraft wheels ([#2531](https://github.com/rapidsai/raft/pull/2531)) [@jameslamb](https://github.com/jameslamb) +- Switch over to rapids-logger ([#2530](https://github.com/rapidsai/raft/pull/2530)) [@vyasr](https://github.com/vyasr) +- reduce duplication, removed unused things in dependencies.yaml ([#2529](https://github.com/rapidsai/raft/pull/2529)) [@jameslamb](https://github.com/jameslamb) +- Update cuda-python lower bounds to 12.6.2 / 11.8.5 ([#2522](https://github.com/rapidsai/raft/pull/2522)) [@bdice](https://github.com/bdice) +- [Opt] Optimizing the performance of `bitmap_to_csr` ([#2516](https://github.com/rapidsai/raft/pull/2516)) [@rhdong](https://github.com/rhdong) +- prefer system install of UCX in devcontainers, update outdated RAPIDS references ([#2514](https://github.com/rapidsai/raft/pull/2514)) [@jameslamb](https://github.com/jameslamb) +- Adapt to rmm logger changes ([#2513](https://github.com/rapidsai/raft/pull/2513)) [@vyasr](https://github.com/vyasr) +- Require approval to run CI on draft PRs ([#2512](https://github.com/rapidsai/raft/pull/2512)) [@bdice](https://github.com/bdice) +- Shrink wheel size limit following removal of vector search APIs. ([#2509](https://github.com/rapidsai/raft/pull/2509)) [@bdice](https://github.com/bdice) +- Forward-merge branch-24.12 to branch-25.02 ([#2508](https://github.com/rapidsai/raft/pull/2508)) [@bdice](https://github.com/bdice) +- Introduction of the `raft::device_resources_snmg` type ([#2487](https://github.com/rapidsai/raft/pull/2487)) [@viclafargue](https://github.com/viclafargue) +- Add breaking change workflow trigger ([#2482](https://github.com/rapidsai/raft/pull/2482)) [@AyodeAwe](https://github.com/AyodeAwe) +- Remove 'sample' parameter from stats::mean API ([#2389](https://github.com/rapidsai/raft/pull/2389)) [@mfoerste4](https://github.com/mfoerste4) + # raft 24.12.00 (11 Dec 2024) ## ๐Ÿšจ Breaking Changes