diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index dc12ab2ade..0f6a8b46af 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -13,6 +13,7 @@ RUN apt update -y \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; ENV DEFAULT_VIRTUAL_ENV=rapids +ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true FROM ${BASE} as conda-base diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 008bf8730a..8c857961c2 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 75aed80f9f..94b0909f6c 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "11.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json similarity index 87% rename from .devcontainer/cuda12.5-conda/devcontainer.json rename to .devcontainer/cuda12.8-conda/devcontainer.json index 240ba02131..0995e354af 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.8-conda/devcontainer.json @@ -3,24 +3,24 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", @@ -29,7 +29,7 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json similarity index 85% rename from .devcontainer/cuda12.5-pip/devcontainer.json rename to .devcontainer/cuda12.8-pip/devcontainer.json index c23c79017a..137699dc5f 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.8-pip/devcontainer.json @@ -3,33 +3,33 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.8-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { - "version": "12.5", + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { + "version": "12.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/raft,type=bind,consistency=consistent", @@ -37,7 +37,7 @@ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml index 895ba83ee5..e0ea775aad 100644 --- a/.github/copy-pr-bot.yaml +++ b/.github/copy-pr-bot.yaml @@ -2,3 +2,4 @@ # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ enabled: true +auto_sync_draft: false diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 945589dc12..d484bcae22 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -56,7 +56,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -66,9 +66,32 @@ jobs: node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" sha: ${{ inputs.sha }} + wheel-build-libraft: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_libraft.sh + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + wheel-publish-libraft: + needs: wheel-build-libraft + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: libraft + package-type: cpp wheel-build-pylibraft: + needs: wheel-build-libraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -78,16 +101,18 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: pylibraft + package-type: python wheel-build-raft-dask: + needs: wheel-build-libraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -97,10 +122,11 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: raft_dask + package-type: python diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 9c22edf74c..dddee00d5f 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,6 +12,7 @@ concurrency: jobs: pr-builder: needs: + - check-nightly-ci - changed-files - checks - conda-cpp-build @@ -20,19 +21,32 @@ jobs: - conda-python-build - conda-python-tests - docs-build + - wheel-build-libraft - wheel-build-pylibraft - wheel-tests-pylibraft - wheel-build-raft-dask - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} + check-nightly-ci: + # Switch to ubuntu-latest once it defaults to a version of Ubuntu that + # provides at least Python 3.11 (see + # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat) + runs-on: ubuntu-24.04 + env: + RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Check if nightly CI is passing + uses: rapidsai/shared-actions/check_nightly_success/dispatch@main + with: + repo: raft changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: files_yaml: | test_cpp: @@ -56,89 +70,101 @@ jobs: - '!thirdparty/LICENSES/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: pull-request enable_check_symbols: true conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" - wheel-build-pylibraft: + wheel-build-libraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + with: + build_type: pull-request + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_libraft.sh + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + wheel-build-pylibraft: + needs: [checks, wheel-build-libraft] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: [wheel-build-pylibraft, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_pylibraft.sh wheel-build-raft-dask: - needs: wheel-tests-pylibraft + needs: [checks, wheel-build-libraft] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: [wheel-build-raft-dask, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' - cuda: '["12.5"]' + cuda: '["12.8"]' build_command: | sccache -z; build-all -DBUILD_PRIMS_BENCH=ON --verbose; diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 92020f6a76..178c6f677c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -25,7 +25,7 @@ jobs: enable_check_symbols: true conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -33,7 +33,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -41,7 +41,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -50,7 +50,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml new file mode 100644 index 0000000000..01dd2436be --- /dev/null +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -0,0 +1,26 @@ +name: Trigger Breaking Change Notifications + +on: + pull_request_target: + types: + - closed + - reopened + - labeled + - unlabeled + +jobs: + trigger-notifier: + if: contains(github.event.pull_request.labels.*.name, 'breaking') + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02 + with: + sender_login: ${{ github.event.sender.login }} + sender_avatar: ${{ github.event.sender.avatar_url }} + repo: ${{ github.repository }} + pr_number: ${{ github.event.pull_request.number }} + pr_title: "${{ github.event.pull_request.title }}" + pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}" + pr_base_ref: ${{ github.event.pull_request.base.ref }} + pr_author: ${{ github.event.pull_request.user.login }} + event_action: ${{ github.event.action }} + pr_merged: ${{ github.event.pull_request.merged }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e3b3c8c440..6dfcc72417 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,11 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: @@ -83,7 +88,7 @@ repos: exclude: .*/thirdparty/.* - id: include-check name: include-check - entry: python ./cpp/scripts/include_checker.py cpp/bench cpp/include cpp/test + entry: python ./cpp/scripts/include_checker.py cpp/bench cpp/include cpp/tests pass_filenames: false language: python additional_dependencies: [gitpython] @@ -98,7 +103,7 @@ repos: ^CHANGELOG[.]md$| ^cpp/cmake/patches/cutlass/build-export[.]patch$ - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: check-json - repo: https://github.com/rapidsai/pre-commit-hooks @@ -110,8 +115,7 @@ repos: [.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx)$| CMakeLists[.]txt$| CMakeLists_standalone[.]txt$| - meta[.]yaml$| - setup[.]cfg$ + meta[.]yaml$ exclude: | (?x) cpp/include/raft/neighbors/detail/faiss_select/| @@ -119,7 +123,7 @@ repos: docs/source/sphinxext/github_link[.]py| - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.16.0 + rev: v1.17.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d7c641b21..a7f1d04beb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,59 @@ +# raft 25.02.00 (13 Feb 2025) + +## 🚨 Breaking Changes + +- Update pip devcontainers to UCX 1.18 ([#2550](https://github.com/rapidsai/raft/pull/2550)) [@jameslamb](https://github.com/jameslamb) +- Switch over to rapids-logger ([#2530](https://github.com/rapidsai/raft/pull/2530)) [@vyasr](https://github.com/vyasr) +- Adapt to rmm logger changes ([#2513](https://github.com/rapidsai/raft/pull/2513)) [@vyasr](https://github.com/vyasr) + +## 🐛 Bug Fixes + +- Rename test to tests. ([#2546](https://github.com/rapidsai/raft/pull/2546)) [@bdice](https://github.com/bdice) +- Fix bit order of RMAT Rectangular Generator to match expectation ([#2542](https://github.com/rapidsai/raft/pull/2542)) [@mfoerste4](https://github.com/mfoerste4) +- Fix broken link to python doc ([#2537](https://github.com/rapidsai/raft/pull/2537)) [@lowener](https://github.com/lowener) +- Fix lanczos solver integer overflow ([#2536](https://github.com/rapidsai/raft/pull/2536)) [@viclafargue](https://github.com/viclafargue) +- Fix rnd bit generation in rmat_rectangular_kernel ([#2524](https://github.com/rapidsai/raft/pull/2524)) [@tfeher](https://github.com/tfeher) + +## 📖 Documentation + +- Fix docs builds ([#2562](https://github.com/rapidsai/raft/pull/2562)) [@bdice](https://github.com/bdice) +- [DOC] Fix sample codes ([#2518](https://github.com/rapidsai/raft/pull/2518)) [@enp1s0](https://github.com/enp1s0) + +## 🚀 New Features + +- Add cuda 12.8 support ([#2551](https://github.com/rapidsai/raft/pull/2551)) [@robertmaynard](https://github.com/robertmaynard) +- Add support for different data type of bitset ([#2535](https://github.com/rapidsai/raft/pull/2535)) [@lowener](https://github.com/lowener) +- [Feat] Support `bitset_to_csr` ([#2523](https://github.com/rapidsai/raft/pull/2523)) [@rhdong](https://github.com/rhdong) +- Remove upper bounds on cuda-python to allow 12.6.2 and 11.8.5 ([#2517](https://github.com/rapidsai/raft/pull/2517)) [@bdice](https://github.com/bdice) + +## 🛠️ Improvements + +- Revert CUDA 12.8 shared workflow branch changes ([#2560](https://github.com/rapidsai/raft/pull/2560)) [@vyasr](https://github.com/vyasr) +- Build and test with CUDA 12.8.0 ([#2555](https://github.com/rapidsai/raft/pull/2555)) [@bdice](https://github.com/bdice) +- Update pip devcontainers to UCX 1.18 ([#2550](https://github.com/rapidsai/raft/pull/2550)) [@jameslamb](https://github.com/jameslamb) +- use dynamic CUDA wheels on CUDA 11 ([#2548](https://github.com/rapidsai/raft/pull/2548)) [@jameslamb](https://github.com/jameslamb) +- Normalize whitespace ([#2547](https://github.com/rapidsai/raft/pull/2547)) [@bdice](https://github.com/bdice) +- Use cuda.bindings layout. ([#2545](https://github.com/rapidsai/raft/pull/2545)) [@bdice](https://github.com/bdice) +- Revert "Introduction of the `raft::device_resources_snmg` type ([#2487)" (#2543](https://github.com/rapidsai/raft/pull/2487)" (#2543)) [@cjnolet](https://github.com/cjnolet) +- Add missing `#include <cstdint>` ([#2540](https://github.com/rapidsai/raft/pull/2540)) [@jakirkham](https://github.com/jakirkham) +- Use GCC 13 in CUDA 12 conda builds. ([#2539](https://github.com/rapidsai/raft/pull/2539)) [@bdice](https://github.com/bdice) +- Use rapids-cmake for the logger ([#2534](https://github.com/rapidsai/raft/pull/2534)) [@vyasr](https://github.com/vyasr) +- Check if nightlies have succeeded recently enough ([#2533](https://github.com/rapidsai/raft/pull/2533)) [@vyasr](https://github.com/vyasr) +- remove unused 'joblib' and 'numba' dependencies, other packaging cleanup ([#2532](https://github.com/rapidsai/raft/pull/2532)) [@jameslamb](https://github.com/jameslamb) +- introduce libraft wheels ([#2531](https://github.com/rapidsai/raft/pull/2531)) [@jameslamb](https://github.com/jameslamb) +- Switch over to rapids-logger ([#2530](https://github.com/rapidsai/raft/pull/2530)) [@vyasr](https://github.com/vyasr) +- reduce duplication, removed unused things in dependencies.yaml ([#2529](https://github.com/rapidsai/raft/pull/2529)) [@jameslamb](https://github.com/jameslamb) +- Update cuda-python lower bounds to 12.6.2 / 11.8.5 ([#2522](https://github.com/rapidsai/raft/pull/2522)) [@bdice](https://github.com/bdice) +- [Opt] Optimizing the performance of `bitmap_to_csr` ([#2516](https://github.com/rapidsai/raft/pull/2516)) [@rhdong](https://github.com/rhdong) +- prefer system install of UCX in devcontainers, update outdated RAPIDS references ([#2514](https://github.com/rapidsai/raft/pull/2514)) [@jameslamb](https://github.com/jameslamb) +- Adapt to rmm logger changes ([#2513](https://github.com/rapidsai/raft/pull/2513)) [@vyasr](https://github.com/vyasr) +- Require approval to run CI on draft PRs ([#2512](https://github.com/rapidsai/raft/pull/2512)) [@bdice](https://github.com/bdice) +- Shrink wheel size limit following removal of vector search APIs. ([#2509](https://github.com/rapidsai/raft/pull/2509)) [@bdice](https://github.com/bdice) +- Forward-merge branch-24.12 to branch-25.02 ([#2508](https://github.com/rapidsai/raft/pull/2508)) [@bdice](https://github.com/bdice) +- Introduction of the `raft::device_resources_snmg` type ([#2487](https://github.com/rapidsai/raft/pull/2487)) [@viclafargue](https://github.com/viclafargue) +- Add breaking change workflow trigger ([#2482](https://github.com/rapidsai/raft/pull/2482)) [@AyodeAwe](https://github.com/AyodeAwe) +- Remove 'sample' parameter from stats::mean API ([#2389](https://github.com/rapidsai/raft/pull/2389)) [@mfoerste4](https://github.com/mfoerste4) + # raft 24.12.00 (11 Dec 2024) ## 🚨 Breaking Changes diff --git a/README.md b/README.md index 898c5c22c3..2807ab50cc 100755 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-vers ```bash # for CUDA 12.5 -mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-version=12.5 +mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-version=12.8 ``` Note that the above commands will also install `libraft-headers` and `libraft`. @@ -248,7 +248,7 @@ Note that the above commands will also install `libraft-headers` and `libraft`. You can also install the conda packages individually using the `mamba` command above. For example, if you'd like to install RAFT's headers and pre-compiled shared library to use in your project: ```bash # for CUDA 12.5 -mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.5 +mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.8 ``` ### Installing Python through Pip diff --git a/VERSION b/VERSION index af28c42b52..72eefaf7c7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.12.00 +25.02.00 diff --git a/build.sh b/build.sh index a95cb8ee23..8f388e549c 100755 --- a/build.sh +++ b/build.sh @@ -347,13 +347,8 @@ if [[ ${CMAKE_TARGET} == "" ]]; then CMAKE_TARGET="all" fi -# Append `-DFIND_RAFT_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option. -SKBUILD_EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}" -if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_RAFT_CPP"* ]]; then - SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS} -DFIND_RAFT_CPP=ON" -fi # Replace spaces with semicolons in SKBUILD_EXTRA_CMAKE_ARGS -SKBUILD_EXTRA_CMAKE_ARGS=$(echo ${SKBUILD_EXTRA_CMAKE_ARGS} | sed 's/ /;/g') +SKBUILD_EXTRA_CMAKE_ARGS=$(echo ${EXTRA_CMAKE_ARGS} | sed 's/ /;/g') # If clean given, run it prior to any other steps if (( ${CLEAN} == 1 )); then @@ -478,4 +473,3 @@ if hasArg docs; then cd ${SPHINX_BUILD_DIR} sphinx-build -b html source _html fi - diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 326ee9a4c7..976da98998 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -5,6 +5,7 @@ set -euo pipefail package_name=$1 package_dir=$2 +package_type=$3 underscore_package_name=$(echo "${package_name}" | tr "-" "_") # Clear out system ucx files to ensure that we're getting ucx from the wheel. @@ -20,24 +21,21 @@ rapids-generate-version > ./VERSION cd "${package_dir}" -case "${RAPIDS_CUDA_VERSION}" in - 12.*) - EXCLUDE_ARGS=( - --exclude "libcublas.so.12" - --exclude "libcublasLt.so.12" - --exclude "libcurand.so.10" - --exclude "libcusolver.so.11" - --exclude "libcusparse.so.12" - --exclude "libnvJitLink.so.12" - --exclude "libucp.so.0" +EXCLUDE_ARGS=( + --exclude "libcublas.so.*" + --exclude "libcublasLt.so.*" + --exclude "libcurand.so.*" + --exclude "libcusolver.so.*" + --exclude "libcusparse.so.*" + --exclude "libnvJitLink.so.*" + --exclude "libucp.so.*" +) + +if [[ ${package_name} != "libraft" ]]; then + EXCLUDE_ARGS+=( + --exclude "libraft.so" ) - ;; - 11.*) - EXCLUDE_ARGS=( - --exclude "libucp.so.0" - ) - ;; -esac +fi sccache --zero-stats @@ -55,4 +53,4 @@ sccache --show-adv-stats mkdir -p final_dist python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/* -RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist +RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_type} final_dist diff --git a/ci/build_wheel_libraft.sh b/ci/build_wheel_libraft.sh new file mode 100755 index 0000000000..8ff0da1e9a --- /dev/null +++ b/ci/build_wheel_libraft.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_name="libraft" +package_dir="python/libraft" + +rapids-logger "Generating build requirements" +matrix_selectors="cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" + +rapids-dependency-file-generator \ + --output requirements \ + --file-key "py_build_${package_name}" \ + --file-key "py_rapids_build_${package_name}" \ + --matrix "${matrix_selectors}" \ +| tee /tmp/requirements-build.txt + +rapids-logger "Installing build requirements" +python -m pip install \ + -v \ + --prefer-binary \ + -r /tmp/requirements-build.txt + +# build with '--no-build-isolation', for better sccache hit rate +# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) +export PIP_NO_BUILD_ISOLATION=0 + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +ci/build_wheel.sh libraft ${package_dir} cpp +ci/validate_wheel.sh ${package_dir} final_dist libraft diff --git a/ci/build_wheel_pylibraft.sh b/ci/build_wheel_pylibraft.sh index dacaa1190e..6f74e0e8c5 100755 --- a/ci/build_wheel_pylibraft.sh +++ b/ci/build_wheel_pylibraft.sh @@ -5,17 +5,16 @@ set -euo pipefail package_dir="python/pylibraft" -case "${RAPIDS_CUDA_VERSION}" in - 12.*) - EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" - ;; - 11.*) - EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF" - ;; -esac +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -# Set up skbuild options. Enable sccache in skbuild config options -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_RAFT_CPP=OFF${EXTRA_CMAKE_ARGS}" +# Downloads libraft wheels from this current build, +# then ensures 'pylibraft' wheel builds always use the 'libraft' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libraft_dist +echo "libraft-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libraft_dist/libraft_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" -ci/build_wheel.sh pylibraft ${package_dir} -ci/validate_wheel.sh ${package_dir} final_dist +ci/build_wheel.sh pylibraft ${package_dir} python +ci/validate_wheel.sh ${package_dir} final_dist pylibraft diff --git a/ci/build_wheel_raft_dask.sh b/ci/build_wheel_raft_dask.sh index e4f3f0a833..0cacb6fe30 100755 --- a/ci/build_wheel_raft_dask.sh +++ b/ci/build_wheel_raft_dask.sh @@ -5,8 +5,16 @@ set -euo pipefail package_dir="python/raft-dask" -# Set up skbuild options. Enable sccache in skbuild config options -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_RAFT_CPP=OFF" +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -ci/build_wheel.sh raft-dask ${package_dir} -ci/validate_wheel.sh ${package_dir} final_dist +# Downloads libraft wheels from this current build, +# then ensures 'raft-dask' wheel builds always use the 'libraft' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libraft_dist +echo "libraft-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libraft_dist/libraft_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" + +ci/build_wheel.sh raft-dask ${package_dir} python +ci/validate_wheel.sh ${package_dir} final_dist raft-dask diff --git a/ci/check_style.sh b/ci/check_style.sh index d7ba4cae25..e0c30a2d41 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -14,5 +14,12 @@ rapids-dependency-file-generator \ rapids-mamba-retry env create --yes -f env.yaml -n checks conda activate checks +# get config for cmake-format checks +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" +FORMAT_FILE_URL="https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/cmake-format-rapids-cmake.json" +export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json +mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE}) +wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL} + # Run pre-commit checks pre-commit run --all-files --show-diff-on-failure diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index a70fed9ec8..1ab9157b89 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -43,6 +43,8 @@ echo "${NEXT_FULL_TAG}" > VERSION DEPENDENCIES=( dask-cuda + libraft + librmm pylibraft rmm rapids-dask-dependency diff --git a/ci/run_pylibraft_pytests.sh b/ci/run_pylibraft_pytests.sh index 1167b89c5f..7f3d1f9cfb 100755 --- a/ci/run_pylibraft_pytests.sh +++ b/ci/run_pylibraft_pytests.sh @@ -6,4 +6,4 @@ set -euo pipefail # Support invoking run_pylibraft_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibraft/pylibraft -pytest --cache-clear "$@" test +pytest --cache-clear "$@" tests diff --git a/ci/run_raft_dask_pytests.sh b/ci/run_raft_dask_pytests.sh index 07d0b5baa0..a9e6a130cd 100755 --- a/ci/run_raft_dask_pytests.sh +++ b/ci/run_raft_dask_pytests.sh @@ -6,4 +6,4 @@ set -euo pipefail # Support invoking run_raft_dask_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/raft-dask/raft_dask -pytest --cache-clear --import-mode=append "$@" test +pytest --cache-clear --import-mode=append "$@" tests diff --git a/ci/test_wheel_pylibraft.sh b/ci/test_wheel_pylibraft.sh index b38f5a690b..26f4da267f 100755 --- a/ci/test_wheel_pylibraft.sh +++ b/ci/test_wheel_pylibraft.sh @@ -5,9 +5,13 @@ set -euo pipefail mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libraft-dep +RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + # echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install $(echo ./dist/pylibraft*.whl)[test] +python -m pip install \ + ./local-libraft-dep/libraft*.whl \ + "$(echo ./dist/pylibraft*.whl)[test]" -python -m pytest ./python/pylibraft/pylibraft/test +python -m pytest ./python/pylibraft/pylibraft/tests diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh index a778a3ec51..c394314aac 100755 --- a/ci/test_wheel_raft_dask.sh +++ b/ci/test_wheel_raft_dask.sh @@ -5,17 +5,17 @@ set -euo pipefail mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist - -# Download the pylibraft built in the previous step -RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibraft-dep +RAPIDS_PY_WHEEL_NAME="libraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libraft-dep +RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./local-pylibraft-dep +RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install -v \ + ./local-libraft-dep/libraft*.whl \ ./local-pylibraft-dep/pylibraft*.whl \ "$(echo ./dist/raft_dask_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" -test_dir="python/raft-dask/raft_dask/test" +test_dir="python/raft-dask/raft_dask/tests" rapids-logger "pytest raft-dask" python -m pytest --import-mode=append ${test_dir} diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 5910a5c59f..ec3867aa30 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -5,6 +5,9 @@ set -euo pipefail package_dir=$1 wheel_dir_relative_path=$2 +package_name=$3 + +RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" cd "${package_dir}" diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 6098cd12bf..ecd9aa1ece 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -14,19 +14,18 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 -- dask-cuda==24.12.*,>=0.0.0a0 -- distributed-ucxx==0.41.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - graphviz - ipython -- joblib>=0.11 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -35,27 +34,27 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja -- numba>=0.57 - numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-aarch64=11.8 - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy +- spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables -- sysroot_linux-aarch64==2.17 -- ucx-py==0.41.*,>=0.0.0a0 +- sysroot_linux-aarch64==2.28 +- ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 0fe8fbab39..2f655ae077 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -14,19 +14,18 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 -- dask-cuda==24.12.*,>=0.0.0a0 -- distributed-ucxx==0.41.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-64=11.* - graphviz - ipython -- joblib>=0.11 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 @@ -35,27 +34,27 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja -- numba>=0.57 - numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-64=11.8 - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy +- spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables -- sysroot_linux-64==2.17 -- ucx-py==0.41.*,>=0.0.0a0 +- sysroot_linux-64==2.28 +- ucx-py==0.42.*,>=0.0.0a0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml similarity index 68% rename from conda/environments/all_cuda-125_arch-x86_64.yaml rename to conda/environments/all_cuda-128_arch-aarch64.yaml index bf6f5d6462..1915a3f0f0 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-aarch64.yaml @@ -16,42 +16,41 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 -- cuda-version=12.5 +- cuda-python>=12.6.2,<13.0a0 +- cuda-version=12.8 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 -- dask-cuda==24.12.*,>=0.0.0a0 -- distributed-ucxx==0.41.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 -- gcc_linux-64=11.* +- gcc_linux-aarch64=13.* - graphviz - ipython -- joblib>=0.11 - libcublas-dev - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja -- numba>=0.57 - numpy>=1.23,<3.0a0 - numpydoc - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy +- spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables -- sysroot_linux-64==2.17 -- ucx-py==0.41.*,>=0.0.0a0 -name: all_cuda-125_arch-x86_64 +- sysroot_linux-aarch64==2.28 +- ucx-py==0.42.*,>=0.0.0a0 +name: all_cuda-128_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml similarity index 67% rename from conda/environments/all_cuda-125_arch-aarch64.yaml rename to conda/environments/all_cuda-128_arch-x86_64.yaml index dfb9ac0b97..c8119ff7d5 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -16,42 +16,41 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 -- cuda-version=12.5 +- cuda-python>=12.6.2,<13.0a0 +- cuda-version=12.8 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0,<3.1.0a0 -- dask-cuda==24.12.*,>=0.0.0a0 -- distributed-ucxx==0.41.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- distributed-ucxx==0.42.*,>=0.0.0a0 - doxygen>=1.8.20 -- gcc_linux-aarch64=11.* +- gcc_linux-64=13.* - graphviz - ipython -- joblib>=0.11 - libcublas-dev - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.41.*,>=0.0.0a0 +- libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - ninja -- numba>=0.57 - numpy>=1.23,<3.0a0 - numpydoc - pre-commit - pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy +- spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables -- sysroot_linux-aarch64==2.17 -- ucx-py==0.41.*,>=0.0.0a0 -name: all_cuda-125_arch-aarch64 +- sysroot_linux-64==2.28 +- ucx-py==0.42.*,>=0.0.0a0 +name: all_cuda-128_arch-x86_64 diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml index 4857f12cd1..11b16bc2a8 100644 --- a/conda/recipes/libraft/conda_build_config.yaml +++ b/conda/recipes/libraft/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml index 503c4cb6fb..dbde4e3971 100644 --- a/conda/recipes/libraft/meta.yaml +++ b/conda/recipes/libraft/meta.yaml @@ -39,10 +39,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} - librmm @@ -51,7 +49,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -85,11 +83,7 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} - {% endif %} - librmm requirements: host: @@ -130,10 +124,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -145,7 +137,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -196,10 +188,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} requirements: @@ -207,7 +197,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -258,10 +248,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev - libcublas-dev - libcurand-dev @@ -273,7 +261,7 @@ outputs: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/conda/recipes/pylibraft/conda_build_config.yaml b/conda/recipes/pylibraft/conda_build_config.yaml index 001878ff25..83f5ebcb15 100644 --- a/conda/recipes/pylibraft/conda_build_config.yaml +++ b/conda/recipes/pylibraft/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/pylibraft/meta.yaml b/conda/recipes/pylibraft/meta.yaml index 01a9d61f0f..8f498c7e50 100644 --- a/conda/recipes/pylibraft/meta.yaml +++ b/conda/recipes/pylibraft/meta.yaml @@ -1,7 +1,5 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Usage: -# conda build . -c conda-forge -c numba -c rapidsai -c pytorch {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version = environ['CONDA_PY'] %} @@ -20,10 +18,8 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} - cuda-python @@ -33,7 +29,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -43,10 +39,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.8.5,<12.0a0 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.6.2,<13.0a0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -61,10 +57,10 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.8.5,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.6.2,<13.0a0 {% endif %} - libraft {{ version }} - libraft-headers {{ version }} @@ -81,5 +77,5 @@ tests: about: home: https://rapids.ai/ license: Apache-2.0 - # license_file: LICENSE + license_file: LICENSE summary: pylibraft library diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index d7d2f68b42..d567266027 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -1,26 +1,26 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "2.17" + - "2.28" ucx_py_version: - - "0.41.*" + - "0.42.*" ucxx_version: - - "0.41.*" + - "0.42.*" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index 02a8957b06..29c7f568f1 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -1,7 +1,5 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Usage: -# conda build . -c conda-forge -c numba -c rapidsai -c pytorch {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version = environ['CONDA_PY'] %} @@ -20,10 +18,8 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} - cuda-python @@ -33,7 +29,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -43,10 +39,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.8.5,<12.0a0 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.6.2,<13.0a0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -62,15 +58,14 @@ requirements: run: {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.8.5,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.6.2,<13.0a0 {% endif %} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - dask-cuda ={{ minor_version }} - rapids-dask-dependency ={{ minor_version }} - - joblib >=0.11 - nccl {{ nccl_version }} - pylibraft {{ version }} - python x.x @@ -87,5 +82,5 @@ tests: about: home: https://rapids.ai/ license: Apache-2.0 - # license_file: LICENSE + license_file: LICENSE summary: raft-dask library diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 780f6f8581..c38471bebd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -65,9 +65,12 @@ set(RAFT_COMPILE_LIBRARY_DEFAULT OFF) if(BUILD_TESTS OR BUILD_PRIMS_BENCH) set(RAFT_COMPILE_LIBRARY_DEFAULT ON) endif() -option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations" +option(RAFT_COMPILE_LIBRARY "Enable building raft library instantiations" ${RAFT_COMPILE_LIBRARY_DEFAULT} ) +option(RAFT_COMPILE_DYNAMIC_ONLY "Only build the shared library and skip the +static library. Has no effect if RAFT_COMPILE_LIBRARY is OFF" OFF +) # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to # have different values for the `Threads::Threads` target. Setting this flag ensures @@ -100,6 +103,17 @@ set_property( ) message(VERBOSE "RAFT: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'.") +# Set logging level +set(LIBRAFT_LOGGING_LEVEL + "INFO" + CACHE STRING "Choose the logging level." +) +set_property( + CACHE LIBRAFT_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" + "OFF" +) +message(VERBOSE "RAFT: LIBRAFT_LOGGING_LEVEL = '${LIBRAFT_LOGGING_LEVEL}'.") + # ################################################################################################## # * Conda environment detection ---------------------------------------------- @@ -152,6 +166,10 @@ include(cmake/modules/ConfigureCUDA.cmake) # add third party dependencies using CPM rapids_cpm_init() +include(${rapids-cmake-dir}/cpm/rapids_logger.cmake) +rapids_cpm_rapids_logger() +rapids_make_logger(raft LOGGER_HEADER_DIR include/raft/core EXPORT_SET raft-exports) + # CCCL before rmm/cuco so we get the right version of CCCL include(cmake/thirdparty/get_cccl.cmake) include(cmake/thirdparty/get_rmm.cmake) @@ -180,13 +198,19 @@ target_include_directories( ) # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target. -target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass CCCL::CCCL) +target_link_libraries( + raft INTERFACE rmm::rmm rmm::rmm_logger spdlog::spdlog_header_only cuco::cuco + nvidia::cutlass::cutlass CCCL::CCCL raft_logger +) target_compile_features(raft INTERFACE cxx_std_17 $) target_compile_options( raft INTERFACE $<$:--expt-extended-lambda --expt-relaxed-constexpr> ) +target_compile_definitions( + raft INTERFACE "RAFT_LOG_ACTIVE_LEVEL=RAFT_LOG_LEVEL_${LIBRAFT_LOGGING_LEVEL}" +) set(RAFT_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix}) set(RAFT_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix}) @@ -262,7 +286,6 @@ set_target_properties(raft_compiled PROPERTIES EXPORT_NAME compiled) if(RAFT_COMPILE_LIBRARY) add_library( raft_objs OBJECT - src/core/logger.cpp src/linalg/detail/coalesced_reduction.cu src/raft_runtime/random/rmat_rectangular_generator_int64_double.cu src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu @@ -288,18 +311,26 @@ if(RAFT_COMPILE_LIBRARY) "$<$:${RAFT_CUDA_FLAGS}>" ) - add_library(raft_lib SHARED $) - add_library(raft_lib_static STATIC $) + # Make sure not to add the rmm logger twice since it will be brought in as an interface source by + # the rmm::rmm_logger_impl target. + add_library(raft_lib SHARED $,EXCLUDE,rmm.*logger>) + + set(_raft_lib_targets raft_lib) + if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + add_library(raft_lib_static STATIC $,EXCLUDE,rmm.*logger>) + list(APPEND _raft_lib_targets raft_lib_static) + endif() set_target_properties( - raft_lib raft_lib_static + ${_raft_lib_targets} PROPERTIES OUTPUT_NAME raft BUILD_RPATH "\$ORIGIN" INSTALL_RPATH "\$ORIGIN" INTERFACE_POSITION_INDEPENDENT_CODE ON ) - foreach(target raft_lib raft_lib_static raft_objs) + list(APPEND _raft_lib_targets raft_objs) + foreach(target IN LISTS _raft_lib_targets) target_link_libraries( ${target} PUBLIC raft::raft @@ -313,6 +344,10 @@ if(RAFT_COMPILE_LIBRARY) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries target_link_options(${target} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") endforeach() + target_link_libraries(raft_lib PRIVATE rmm::rmm_logger_impl raft_logger_impl) + if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + target_link_libraries(raft_lib_static PRIVATE rmm::rmm_logger_impl raft_logger_impl) + endif() endif() if(TARGET raft_lib AND (NOT TARGET raft::raft_lib)) @@ -324,20 +359,22 @@ target_link_libraries(raft_compiled INTERFACE raft::raft $ -) + target_link_libraries( + raft_compiled_static INTERFACE raft::raft $ + ) +endif() # ################################################################################################## # * raft_distributed ------------------------------------------------------------------------------- @@ -386,8 +423,12 @@ install( EXPORT raft-exports ) +set(_raft_compiled_install_targets raft_compiled) +if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + list(APPEND _raft_compiled_install_targets raft_compiled_static) +endif() install( - TARGETS raft_compiled raft_compiled_static + TARGETS ${_raft_compiled_install_targets} DESTINATION ${lib_dir} COMPONENT raft EXPORT raft-compiled-exports @@ -400,12 +441,14 @@ if(TARGET raft_lib) COMPONENT compiled EXPORT raft-compiled-lib-exports ) - install( - TARGETS raft_lib_static - DESTINATION ${lib_dir} - COMPONENT compiled-static - EXPORT raft-compiled-static-lib-exports - ) + if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + install( + TARGETS raft_lib_static + DESTINATION ${lib_dir} + COMPONENT compiled-static + EXPORT raft-compiled-static-lib-exports + ) + endif() install( DIRECTORY include/raft_runtime DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} @@ -476,8 +519,12 @@ endif() set(raft_components compiled distributed) set(raft_export_sets raft-compiled-exports raft-distributed-exports) if(TARGET raft_lib) - list(APPEND raft_components compiled compiled-static) - list(APPEND raft_export_sets raft-compiled-lib-exports raft-compiled-static-lib-exports) + list(APPEND raft_components compiled) + list(APPEND raft_export_sets raft-compiled-lib-exports) + if(NOT RAFT_COMPILE_DYNAMIC_ONLY) + list(APPEND raft_components compiled-static) + list(APPEND raft_export_sets raft-compiled-static-lib-exports) + endif() endif() string( @@ -539,7 +586,7 @@ endif() # * build test executable ---------------------------------------------------- if(BUILD_TESTS) - add_subdirectory(test) + add_subdirectory(tests) endif() # ################################################################################################## diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index cf03a36612..edc1af4e02 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -32,6 +32,7 @@ function(ConfigureBench) PRIVATE raft::raft raft_internal $<$:raft::compiled> + $<$>:bench_rmm_logger> ${RAFT_CTK_MATH_DEPENDENCIES} benchmark::benchmark Threads::Threads @@ -73,6 +74,9 @@ function(ConfigureBench) endfunction() +add_library(bench_rmm_logger OBJECT) +target_link_libraries(bench_rmm_logger PRIVATE rmm::rmm_logger_impl) + if(BUILD_PRIMS_BENCH) ConfigureBench(NAME CORE_BENCH PATH core/bitset.cu core/copy.cu main.cpp) diff --git a/cpp/bench/prims/linalg/masked_matmul.cu b/cpp/bench/prims/linalg/masked_matmul.cu index eda9cb1710..b96e14a25d 100644 --- a/cpp/bench/prims/linalg/masked_matmul.cu +++ b/cpp/bench/prims/linalg/masked_matmul.cu @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -49,11 +49,14 @@ inline auto operator<<(std::ostream& os, const MaskedMatmulBenchParams& { os << " m*k*n=" << params.m << "*" << params.k << "*" << params.n << "\tsparsity=" << params.sparsity; - if (params.sparsity == 1.0) { os << "<-inner product for comparison"; } + if (params.sparsity == 0.0) { os << "<-inner product for comparison"; } return os; } -template +template struct MaskedMatmulBench : public fixture { MaskedMatmulBench(const MaskedMatmulBenchParams& p) : fixture(true), @@ -64,15 +67,15 @@ struct MaskedMatmulBench : public fixture { c_indptr_d(0, stream), c_indices_d(0, stream), c_data_d(0, stream), - bitmap_d(0, stream), + bits_d(0, stream), c_dense_data_d(0, stream) { - index_t element = raft::ceildiv(index_t(params.m * params.n), index_t(sizeof(bitmap_t) * 8)); - std::vector bitmap_h(element); + index_t element = raft::ceildiv(index_t(params.m * params.n), index_t(sizeof(bits_t) * 8)); + std::vector bits_h(element); a_data_d.resize(params.m * params.k, stream); b_data_d.resize(params.k * params.n, stream); - bitmap_d.resize(element, stream); + bits_d.resize(element, stream); raft::random::RngState rng(2024ULL); raft::random::uniform( @@ -82,7 +85,13 @@ struct MaskedMatmulBench : public fixture { std::vector c_dense_data_h(params.m * params.n); - c_true_nnz = create_sparse_matrix(params.m, params.n, params.sparsity, bitmap_h); + if constexpr (bitmap_or_bitset) { + c_true_nnz = create_sparse_matrix(params.m, params.n, params.sparsity, bits_h); + } else { + c_true_nnz = create_sparse_matrix(1, params.n, params.sparsity, bits_h); + repeat_cpu_bitset_inplace(bits_h, params.n, params.m - 1); + c_true_nnz *= params.m; + } std::vector values(c_true_nnz); std::vector indices(c_true_nnz); @@ -93,24 +102,49 @@ struct MaskedMatmulBench : public fixture { c_indices_d.resize(c_true_nnz, stream); c_dense_data_d.resize(params.m * params.n, stream); - cpu_convert_to_csr(bitmap_h, params.m, params.n, indices, indptr); + cpu_convert_to_csr(bits_h, params.m, params.n, indices, indptr); RAFT_EXPECTS(c_true_nnz == c_indices_d.size(), "Something wrong. The c_true_nnz != c_indices_d.size()!"); update_device(c_data_d.data(), values.data(), c_true_nnz, stream); update_device(c_indices_d.data(), indices.data(), c_true_nnz, stream); update_device(c_indptr_d.data(), indptr.data(), params.m + 1, stream); - update_device(bitmap_d.data(), bitmap_h.data(), element, stream); + update_device(bits_d.data(), bits_h.data(), element, stream); + } + + void repeat_cpu_bitset_inplace(std::vector& inout, size_t input_bits, size_t repeat) + { + size_t output_bit_index = input_bits; + + for (size_t r = 0; r < repeat; ++r) { + for (size_t i = 0; i < input_bits; ++i) { + size_t input_unit_index = i / (sizeof(bits_t) * 8); + size_t input_bit_offset = i % (sizeof(bits_t) * 8); + bool bit = (inout[input_unit_index] >> input_bit_offset) & 1; + + size_t output_unit_index = output_bit_index / (sizeof(bits_t) * 8); + size_t output_bit_offset = output_bit_index % (sizeof(bits_t) * 8); + + inout[output_unit_index] |= (static_cast(bit) << output_bit_offset); + + ++output_bit_index; + } + } } - index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitmap) + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bits) { index_t total = static_cast(m * n); - index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t num_ones = static_cast((total * 1.0f) * (1.0f - sparsity)); index_t res = num_ones; - for (auto& item : bitmap) { - item = static_cast(0); + if (sparsity == 0.0f) { + std::fill(bits.begin(), bits.end(), 0xffffffff); + return num_ones; + } + + for (auto& item : bits) { + item = static_cast(0); } std::random_device rd; @@ -120,8 +154,8 @@ struct MaskedMatmulBench : public fixture { while (num_ones > 0) { index_t index = dis(gen); - bitmap_t& element = bitmap[index / (8 * sizeof(bitmap_t))]; - index_t bit_position = index % (8 * sizeof(bitmap_t)); + bits_t& element = bits[index / (8 * sizeof(bits_t))]; + index_t bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1) == 0) { element |= (static_cast(1) << bit_position); @@ -131,7 +165,7 @@ struct MaskedMatmulBench : public fixture { return res; } - void cpu_convert_to_csr(std::vector& bitmap, + void cpu_convert_to_csr(std::vector& bits, index_t rows, index_t cols, std::vector& indices, @@ -142,14 +176,14 @@ struct MaskedMatmulBench : public fixture { indptr[offset_indptr++] = 0; index_t index = 0; - bitmap_t element = 0; + bits_t element = 0; index_t bit_position = 0; for (index_t i = 0; i < rows; ++i) { for (index_t j = 0; j < cols; ++j) { index = i * cols + j; - element = bitmap[index / (8 * sizeof(bitmap_t))]; - bit_position = index % (8 * sizeof(bitmap_t)); + element = bits[index / (8 * sizeof(bits_t))]; + bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1)) { indices[offset_values] = static_cast(j); @@ -181,13 +215,17 @@ struct MaskedMatmulBench : public fixture { params.n, static_cast(c_indices_d.size())); - auto mask = - raft::core::bitmap_view(bitmap_d.data(), params.m, params.n); - auto c = raft::make_device_csr_matrix_view(c_data_d.data(), c_structure); - if (params.sparsity < 1.0) { - raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + if (params.sparsity > 0.0) { + if constexpr (bitmap_or_bitset) { + auto mask = + raft::core::bitmap_view(bits_d.data(), params.m, params.n); + raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + } else { + auto mask = raft::core::bitset_view(bits_d.data(), params.n); + raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + } } else { raft::distance::pairwise_distance(handle, a_data_d.data(), @@ -201,12 +239,16 @@ struct MaskedMatmulBench : public fixture { } resource::sync_stream(handle); - raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); - resource::sync_stream(handle); - - loop_on_state(state, [this, &a, &b, &mask, &c]() { - if (params.sparsity < 1.0) { - raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + loop_on_state(state, [this, &a, &b, &c]() { + if (params.sparsity > 0.0) { + if constexpr (bitmap_or_bitset) { + auto mask = + raft::core::bitmap_view(bits_d.data(), params.m, params.n); + raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + } else { + auto mask = raft::core::bitset_view(bits_d.data(), params.n); + raft::sparse::linalg::masked_matmul(handle, a, b, mask, c); + } } else { raft::distance::pairwise_distance(handle, a_data_d.data(), @@ -228,7 +270,7 @@ struct MaskedMatmulBench : public fixture { rmm::device_uvector a_data_d; rmm::device_uvector b_data_d; - rmm::device_uvector bitmap_d; + rmm::device_uvector bits_d; rmm::device_uvector c_dense_data_d; @@ -253,7 +295,7 @@ static std::vector> getInputs() raft::util::itertools::product({size_t(10), size_t(1024)}, {size_t(128), size_t(1024)}, {size_t(1024 * 1024)}, - {0.01f, 0.1f, 0.2f, 0.5f, 1.0f}); + {0.99f, 0.9f, 0.8f, 0.5f, 0.0f}); param_vec.reserve(params_group.size()); for (TestParams params : params_group) { @@ -263,6 +305,7 @@ static std::vector> getInputs() return param_vec; } -RAFT_BENCH_REGISTER((MaskedMatmulBench), "", getInputs()); +RAFT_BENCH_REGISTER((MaskedMatmulBench), "", getInputs()); +RAFT_BENCH_REGISTER((MaskedMatmulBench), "", getInputs()); } // namespace raft::bench::linalg diff --git a/cpp/bench/prims/sparse/bitmap_to_csr.cu b/cpp/bench/prims/sparse/bitmap_to_csr.cu index ed53df3265..71aabb1bf9 100644 --- a/cpp/bench/prims/sparse/bitmap_to_csr.cu +++ b/cpp/bench/prims/sparse/bitmap_to_csr.cu @@ -71,7 +71,7 @@ struct BitmapToCsrBench : public fixture { index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitmap) { index_t total = static_cast(m * n); - index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t num_ones = static_cast((total * 1.0f) * (1.0f - sparsity)); index_t res = num_ones; for (auto& item : bitmap) { @@ -141,7 +141,27 @@ const std::vector> getInputs() }; const std::vector params_group = raft::util::itertools::product( - {index_t(10), index_t(1024)}, {index_t(1024 * 1024)}, {0.01f, 0.1f, 0.2f, 0.5f}); + {index_t(10), index_t(1024)}, {index_t(1024 * 1024)}, {0.99f, 0.9f, 0.8f, 0.5f}); + + param_vec.reserve(params_group.size()); + for (TestParams params : params_group) { + param_vec.push_back(bench_param({params.m, params.n, params.sparsity})); + } + return param_vec; +} + +template +const std::vector> getLargeInputs() +{ + std::vector> param_vec; + struct TestParams { + index_t m; + index_t n; + float sparsity; + }; + + const std::vector params_group = raft::util::itertools::product( + {index_t(1), index_t(100)}, {index_t(100 * 1000000)}, {0.95f, 0.99f}); param_vec.reserve(params_group.size()); for (TestParams params : params_group) { @@ -153,4 +173,6 @@ const std::vector> getInputs() RAFT_BENCH_REGISTER((BitmapToCsrBench), "", getInputs()); RAFT_BENCH_REGISTER((BitmapToCsrBench), "", getInputs()); +RAFT_BENCH_REGISTER((BitmapToCsrBench), "", getLargeInputs()); + } // namespace raft::bench::sparse diff --git a/cpp/bench/prims/sparse/bitset_to_csr.cu b/cpp/bench/prims/sparse/bitset_to_csr.cu new file mode 100644 index 0000000000..fef2d44d3e --- /dev/null +++ b/cpp/bench/prims/sparse/bitset_to_csr.cu @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace raft::bench::sparse { + +template +struct bench_param { + index_t n_repeat; + index_t n_cols; + float sparsity; +}; + +template +inline auto operator<<(std::ostream& os, const bench_param& params) -> std::ostream& +{ + os << " rows*cols=" << params.n_repeat << "*" << params.n_cols + << "\tsparsity=" << params.sparsity; + return os; +} + +template +struct BitsetToCsrBench : public fixture { + BitsetToCsrBench(const bench_param& p) + : fixture(true), + params(p), + handle(stream), + bitset_d(0, stream), + nnz(0), + indptr_d(0, stream), + indices_d(0, stream), + values_d(0, stream) + { + index_t element = raft::ceildiv(1 * params.n_cols, index_t(sizeof(bitset_t) * 8)); + std::vector bitset_h(element); + nnz = create_sparse_matrix(1, params.n_cols, params.sparsity, bitset_h); + + bitset_d.resize(bitset_h.size(), stream); + indptr_d.resize(params.n_repeat + 1, stream); + indices_d.resize(nnz, stream); + values_d.resize(nnz, stream); + + update_device(bitset_d.data(), bitset_h.data(), bitset_h.size(), stream); + + resource::sync_stream(handle); + } + + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitset) + { + index_t total = static_cast(m * n); + index_t num_ones = static_cast((total * 1.0f) * (1.0f - sparsity)); + index_t res = num_ones; + + for (auto& item : bitset) { + item = static_cast(0); + } + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, total - 1); + + while (num_ones > 0) { + index_t index = dis(gen); + + bitset_t& element = bitset[index / (8 * sizeof(bitset_t))]; + index_t bit_position = index % (8 * sizeof(bitset_t)); + + if (((element >> bit_position) & 1) == 0) { + element |= (static_cast(1) << bit_position); + num_ones--; + } + } + return res; + } + + void run_benchmark(::benchmark::State& state) override + { + std::ostringstream label_stream; + label_stream << params; + state.SetLabel(label_stream.str()); + + auto bitset = raft::core::bitset_view(bitset_d.data(), 1 * params.n_cols); + + auto csr_view = raft::make_device_compressed_structure_view( + indptr_d.data(), indices_d.data(), params.n_repeat, params.n_cols, nnz); + auto csr = raft::make_device_csr_matrix(handle, csr_view); + + raft::sparse::convert::bitset_to_csr(handle, bitset, csr); + + resource::sync_stream(handle); + loop_on_state(state, [this, &bitset, &csr]() { + raft::sparse::convert::bitset_to_csr(handle, bitset, csr); + }); + } + + protected: + const raft::device_resources handle; + + bench_param params; + + rmm::device_uvector bitset_d; + rmm::device_uvector indptr_d; + rmm::device_uvector indices_d; + rmm::device_uvector values_d; + + index_t nnz; +}; // struct BitsetToCsrBench + +template +const std::vector> getInputs() +{ + std::vector> param_vec; + struct TestParams { + index_t m; + index_t n; + float sparsity; + }; + + const std::vector params_group = raft::util::itertools::product( + {index_t(10), index_t(1024)}, {index_t(1024 * 1024)}, {0.99f, 0.9f, 0.8f, 0.5f}); + + param_vec.reserve(params_group.size()); + for (TestParams params : params_group) { + param_vec.push_back(bench_param({params.m, params.n, params.sparsity})); + } + return param_vec; +} + +template +const std::vector> getLargeInputs() +{ + std::vector> param_vec; + struct TestParams { + index_t m; + index_t n; + float sparsity; + }; + + const std::vector params_group = raft::util::itertools::product( + {index_t(1), index_t(100)}, {index_t(100 * 1000000)}, {0.95f, 0.99f}); + + param_vec.reserve(params_group.size()); + for (TestParams params : params_group) { + param_vec.push_back(bench_param({params.m, params.n, params.sparsity})); + } + return param_vec; +} + +RAFT_BENCH_REGISTER((BitsetToCsrBench), "", getInputs()); +RAFT_BENCH_REGISTER((BitsetToCsrBench), "", getInputs()); + +RAFT_BENCH_REGISTER((BitsetToCsrBench), "", getLargeInputs()); + +} // namespace raft::bench::sparse diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index b364d8418d..fbf4428650 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -14,7 +14,9 @@ if(DISABLE_DEPRECATION_WARNINGS) list(APPEND RAFT_CXX_FLAGS -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS) - list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS) + list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations + -DRAFT_HIDE_DEPRECATION_WARNINGS + ) endif() # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with @@ -27,6 +29,12 @@ if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0) list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings) endif() + + # Allow invalid CUDA kernels in the short term + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0) + list(APPEND RAFT_CUDA_FLAGS -static-global-template-stub=false) + endif() + endif() if(CUDA_LOG_COMPILE_TIME) diff --git a/cpp/cmake/patches/cutlass/build-export.patch b/cpp/cmake/patches/cutlass/build-export.patch index a6423e9c08..31bbd25102 100644 --- a/cpp/cmake/patches/cutlass/build-export.patch +++ b/cpp/cmake/patches/cutlass/build-export.patch @@ -20,8 +20,7 @@ index 7419bdf5e..545384d82 100755 - $ - $ ) - + # Mark CTK headers as system to supress warnings from them --- +-- 2.34.1 - diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake index 5a7d54ea4a..0e93363039 100644 --- a/cpp/cmake/thirdparty/get_rmm.cmake +++ b/cpp/cmake/thirdparty/get_rmm.cmake @@ -17,7 +17,7 @@ function(find_and_configure_rmm) include(${rapids-cmake-dir}/cpm/rmm.cmake) rapids_cpm_rmm(BUILD_EXPORT_SET raft-exports - INSTALL_EXPORT_SET raft-exports) + INSTALL_EXPORT_SET raft-exports) endfunction() find_and_configure_rmm() diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake index 57e38c2638..b1ffbe246f 100644 --- a/cpp/cmake/thirdparty/get_spdlog.cmake +++ b/cpp/cmake/thirdparty/get_spdlog.cmake @@ -16,9 +16,9 @@ function(find_and_configure_spdlog) include(${rapids-cmake-dir}/cpm/spdlog.cmake) - rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET rmm-exports) - rapids_export_package(BUILD spdlog rmm-exports) + rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET raft-exports) + rapids_export_package(BUILD spdlog raft-exports) endfunction() -find_and_configure_spdlog() \ No newline at end of file +find_and_configure_spdlog() diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 4efeedcbaa..4203f0969b 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -369,7 +369,7 @@ void kmeans_fit_main(raft::resources const& handle, rmm::device_uvector& workspace) { common::nvtx::range fun_scope("kmeans_fit_main"); - logger::get(RAFT_NAME).set_level(params.verbosity); + default_logger().set_level(params.verbosity); cudaStream_t stream = resource::get_cuda_stream(handle); auto n_samples = X.extent(0); auto n_features = X.extent(1); @@ -865,7 +865,7 @@ void kmeans_fit(raft::resources const& handle, params.n_clusters); } - logger::get(RAFT_NAME).set_level(params.verbosity); + default_logger().set_level(params.verbosity); // Allocate memory rmm::device_uvector workspace(0, stream); @@ -1010,7 +1010,7 @@ void kmeans_predict(raft::resources const& handle, RAFT_EXPECTS(centroids.extent(1) == n_features, "invalid parameter (centroids.extent(1) != n_features)"); - logger::get(RAFT_NAME).set_level(params.verbosity); + default_logger().set_level(params.verbosity); auto metric = params.metric; // Allocate memory @@ -1201,7 +1201,7 @@ void kmeans_transform(raft::resources const& handle, raft::device_matrix_view X_new) { common::nvtx::range fun_scope("kmeans_transform"); - logger::get(RAFT_NAME).set_level(params.verbosity); + default_logger().set_level(params.verbosity); cudaStream_t stream = resource::get_cuda_stream(handle); auto n_samples = X.extent(0); auto n_features = X.extent(1); diff --git a/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh b/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh index 97755351c4..f3e2c78584 100644 --- a/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh @@ -227,4 +227,4 @@ void find_k(raft::resources const& handle, n_iter); } } -} // namespace raft::cluster::detail \ No newline at end of file +} // namespace raft::cluster::detail diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh index 0a5a3ba5aa..5dcd679bd5 100644 --- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 55becc8e15..2b77ca9963 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -204,4 +204,4 @@ void build_sorted_mst( raft::copy_async(mst_weight, mst_coo.weights.data(), mst_coo.n_edges, stream); } -}; // namespace raft::cluster::detail \ No newline at end of file +}; // namespace raft::cluster::detail diff --git a/cpp/include/raft/cluster/detail/single_linkage.cuh b/cpp/include/raft/cluster/detail/single_linkage.cuh index ccc6472684..0a21271271 100644 --- a/cpp/include/raft/cluster/detail/single_linkage.cuh +++ b/cpp/include/raft/cluster/detail/single_linkage.cuh @@ -122,4 +122,4 @@ void single_linkage(raft::resources const& handle, out->n_leaves = m; out->n_connected_components = 1; } -}; // namespace raft::cluster::detail \ No newline at end of file +}; // namespace raft::cluster::detail diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index 38318e8ec8..ee1fc83a9b 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -52,7 +52,7 @@ using KeyValueIndexOp = detail::KeyValueIndexOp; * #include * using namespace raft::cluster; * ... - * raft::raft::resources handle; + * raft::resources handle; * raft::cluster::KMeansParams params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); @@ -61,7 +61,7 @@ using KeyValueIndexOp = detail::KeyValueIndexOp; * params, * X, * std::nullopt, - * centroids, + * centroids.view(), * raft::make_scalar_view(&inertia), * raft::make_scalar_view(&n_iter)); * @endcode @@ -107,7 +107,7 @@ template * #include * using namespace raft::cluster; * ... - * raft::raft::resources handle; + * raft::resources handle; * raft::cluster::KMeansParams params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); @@ -175,7 +175,7 @@ template * #include * using namespace raft::cluster; * ... - * raft::raft::resources handle; + * raft::resources handle; * raft::cluster::KMeansParams params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); diff --git a/cpp/include/raft/cluster/kmeans_types.hpp b/cpp/include/raft/cluster/kmeans_types.hpp index 4d956ad7a0..fbedd58417 100644 --- a/cpp/include/raft/cluster/kmeans_types.hpp +++ b/cpp/include/raft/cluster/kmeans_types.hpp @@ -82,7 +82,7 @@ struct KMeansParams : kmeans_base_params { /** * verbosity level. */ - int verbosity = RAFT_LEVEL_INFO; + level_enum verbosity = level_enum::info; /** * Seed to the random number generator. diff --git a/cpp/include/raft/common/logger.hpp b/cpp/include/raft/common/logger.hpp deleted file mode 100644 index 77483e577d..0000000000 --- a/cpp/include/raft/common/logger.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * This file is deprecated and will be removed in release 22.08. - * Please use the include/core/logger.hpp instead. - */ - -#pragma once - -#include \ No newline at end of file diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index 385bc544b0..1cd77ca665 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -21,4 +21,4 @@ #pragma once -#include \ No newline at end of file +#include diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index 667c8be285..8481360897 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -52,7 +52,7 @@ using std_comms = detail::std_comms; * #include * * ncclComm_t nccl_comm; - * raft::raft::resources handle; + * raft::resources handle; * * build_comms_nccl_only(&handle, nccl_comm, 5, 0); * ... @@ -98,7 +98,7 @@ void build_comms_nccl_only(resources* handle, ncclComm_t nccl_comm, int num_rank * #include * * ncclComm_t nccl_comm; - * raft::raft::resources handle; + * raft::resources handle; * ucp_worker_h ucp_worker; * ucp_ep_h *ucp_endpoints_arr; * diff --git a/cpp/include/raft/core/bitmap.cuh b/cpp/include/raft/core/bitmap.cuh index 024b1244a6..b2c9df436f 100644 --- a/cpp/include/raft/core/bitmap.cuh +++ b/cpp/include/raft/core/bitmap.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -42,4 +43,11 @@ _RAFT_DEVICE void bitmap_view::set(const index_t row, set(row * cols_ + col, new_value); } +template +template +void bitmap_view::to_csr(const raft::resources& res, csr_matrix_t& csr) const +{ + raft::sparse::convert::bitmap_to_csr(res, *this, csr); +} + } // end namespace raft::core diff --git a/cpp/include/raft/core/bitmap.hpp b/cpp/include/raft/core/bitmap.hpp index 86b2d77478..be305152e8 100644 --- a/cpp/include/raft/core/bitmap.hpp +++ b/cpp/include/raft/core/bitmap.hpp @@ -53,9 +53,18 @@ struct bitmap_view : public bitset_view { * @param bitmap_ptr Device raw pointer * @param rows Number of row in the matrix. * @param cols Number of col in the matrix. + * @param original_nbits Original number of bits used when the bitmap was created, to handle + * potential mismatches of data types. This is useful for using ANN indexes when a bitmap was + * originally created with a different data type than the ones currently supported in cuVS ANN + * indexes. */ - _RAFT_HOST_DEVICE bitmap_view(bitmap_t* bitmap_ptr, index_t rows, index_t cols) - : bitset_view(bitmap_ptr, rows * cols), rows_(rows), cols_(cols) + _RAFT_HOST_DEVICE bitmap_view(bitmap_t* bitmap_ptr, + index_t rows, + index_t cols, + index_t original_nbits = 0) + : bitset_view(bitmap_ptr, rows * cols, original_nbits), + rows_(rows), + cols_(cols) { } @@ -65,11 +74,18 @@ struct bitmap_view : public bitset_view { * @param bitmap_span Device vector view of the bitmap * @param rows Number of row in the matrix. * @param cols Number of col in the matrix. + * @param original_nbits Original number of bits used when the bitmap was created, to handle + * potential mismatches of data types. This is useful for using ANN indexes when a bitmap was + * originally created with a different data type than the ones currently supported in cuVS ANN + * indexes. */ _RAFT_HOST_DEVICE bitmap_view(raft::device_vector_view bitmap_span, index_t rows, - index_t cols) - : bitset_view(bitmap_span, rows * cols), rows_(rows), cols_(cols) + index_t cols, + index_t original_nbits = 0) + : bitset_view(bitmap_span, rows * cols, original_nbits), + rows_(rows), + cols_(cols) { } @@ -117,6 +133,26 @@ struct bitmap_view : public bitset_view { */ inline _RAFT_HOST_DEVICE index_t get_n_cols() const { return cols_; } + /** + * @brief Converts to a Compressed Sparse Row (CSR) format matrix. + * + * This method transforms a two-dimensional bitmap matrix into a CSR representation, + * where each '1' bit in the bitmap corresponds to a non-zero entry in the CSR matrix. + * The bitmap is interpreted as a row-major matrix, with rows and columns defined by + * the dimensions of the bitmap. + * + * @tparam csr_matrix_t Specifies the CSR matrix type, constrained to raft::device_csr_matrix. + * + * @param[in] res RAFT resources for managing CUDA streams and execution policies. + * @param[out] csr Output parameter where the resulting CSR matrix is stored. Each '1' bit in + * the bitmap corresponds to a non-zero element in the CSR matrix. + * + * The caller must ensure that: The `csr` matrix is pre-allocated with dimensions and non-zero + * count matching the expected output. + */ + template + void to_csr(const raft::resources& res, csr_matrix_t& csr) const; + private: index_t rows_; index_t cols_; diff --git a/cpp/include/raft/core/bitset.cuh b/cpp/include/raft/core/bitset.cuh index d1bffdb81e..24ef3148b8 100644 --- a/cpp/include/raft/core/bitset.cuh +++ b/cpp/include/raft/core/bitset.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -32,12 +33,41 @@ namespace raft::core { +template +_RAFT_HOST_DEVICE void inline compute_original_nbits_position(const index_t original_nbits, + const index_t nbits, + const index_t sample_index, + index_t& new_bit_index, + index_t& new_bit_offset) +{ + const index_t original_bit_index = sample_index / original_nbits; + const index_t original_bit_offset = sample_index % original_nbits; + new_bit_index = original_bit_index * original_nbits / nbits; + new_bit_offset = 0; + if (original_nbits > nbits) { + new_bit_index += original_bit_offset / nbits; + new_bit_offset = original_bit_offset % nbits; + } else { + index_t ratio = nbits / original_nbits; + new_bit_offset += (original_bit_index % ratio) * original_nbits; + new_bit_offset += original_bit_offset % nbits; + } +} + template _RAFT_HOST_DEVICE inline bool bitset_view::test(const index_t sample_index) const { - const bitset_t bit_element = bitset_ptr_[sample_index / bitset_element_size]; - const index_t bit_index = sample_index % bitset_element_size; - const bool is_bit_set = (bit_element & (bitset_t{1} << bit_index)) != 0; + const index_t nbits = sizeof(bitset_t) * 8; + index_t bit_index = 0; + index_t bit_offset = 0; + if (original_nbits_ == 0 || nbits == original_nbits_) { + bit_index = sample_index / bitset_element_size; + bit_offset = sample_index % bitset_element_size; + } else { + compute_original_nbits_position(original_nbits_, nbits, sample_index, bit_index, bit_offset); + } + const bitset_t bit_element = bitset_ptr_[bit_index]; + const bool is_bit_set = (bit_element & (bitset_t{1} << bit_offset)) != 0; return is_bit_set; } @@ -51,14 +81,22 @@ template _RAFT_DEVICE void bitset_view::set(const index_t sample_index, bool set_value) const { - const index_t bit_element = sample_index / bitset_element_size; - const index_t bit_index = sample_index % bitset_element_size; - const bitset_t bitmask = bitset_t{1} << bit_index; + const index_t nbits = sizeof(bitset_t) * 8; + index_t bit_index = 0; + index_t bit_offset = 0; + + if (original_nbits_ == 0 || nbits == original_nbits_) { + bit_index = sample_index / bitset_element_size; + bit_offset = sample_index % bitset_element_size; + } else { + compute_original_nbits_position(original_nbits_, nbits, sample_index, bit_index, bit_offset); + } + const bitset_t bitmask = bitset_t{1} << bit_offset; if (set_value) { - atomicOr(bitset_ptr_ + bit_element, bitmask); + atomicOr(bitset_ptr_ + bit_index, bitmask); } else { const bitset_t bitmask2 = ~bitmask; - atomicAnd(bitset_ptr_ + bit_element, bitmask2); + atomicAnd(bitset_ptr_ + bit_index, bitmask2); } } @@ -165,6 +203,13 @@ double bitset_view::sparsity(const raft::resources& res) cons return static_cast((1.0 * (size_h - count_h)) / (1.0 * size_h)); } +template +template +void bitset_view::to_csr(const raft::resources& res, csr_matrix_t& csr) const +{ + raft::sparse::convert::bitset_to_csr(res, *this, csr); +} + template bitset::bitset(const raft::resources& res, raft::device_vector_view mask_index, diff --git a/cpp/include/raft/core/bitset.hpp b/cpp/include/raft/core/bitset.hpp index be828def87..94113822fb 100644 --- a/cpp/include/raft/core/bitset.hpp +++ b/cpp/include/raft/core/bitset.hpp @@ -42,8 +42,20 @@ template struct bitset_view { static constexpr index_t bitset_element_size = sizeof(bitset_t) * 8; - _RAFT_HOST_DEVICE bitset_view(bitset_t* bitset_ptr, index_t bitset_len) - : bitset_ptr_{bitset_ptr}, bitset_len_{bitset_len} + /** + * @brief Create a bitset view from a device pointer to the bitset. + * + * @param bitset_ptr Device pointer to the bitset + * @param bitset_len Number of bits in the bitset + * @param original_nbits Original number of bits used when the bitset was created, to handle + * potential mismatches of data types. This is useful for using ANN indexes when a bitset was + * originally created with a different data type than the ones currently supported in cuVS ANN + * indexes. + */ + _RAFT_HOST_DEVICE bitset_view(bitset_t* bitset_ptr, + index_t bitset_len, + index_t original_nbits = 0) + : bitset_ptr_{bitset_ptr}, bitset_len_{bitset_len}, original_nbits_{original_nbits} { } /** @@ -51,10 +63,17 @@ struct bitset_view { * * @param bitset_span Device vector view of the bitset * @param bitset_len Number of bits in the bitset + * @param original_nbits Original number of bits used when the bitset was created, to handle + * potential mismatches of data types. This is useful for using ANN indexes when a bitset was + * originally created with a different data type than the ones currently supported in cuVS ANN + * indexes. */ _RAFT_HOST_DEVICE bitset_view(raft::device_vector_view bitset_span, - index_t bitset_len) - : bitset_ptr_{bitset_span.data_handle()}, bitset_len_{bitset_len} + index_t bitset_len, + index_t original_nbits = 0) + : bitset_ptr_{bitset_span.data_handle()}, + bitset_len_{bitset_len}, + original_nbits_{original_nbits} { } /** @@ -180,9 +199,79 @@ struct bitset_view { return (bitset_len + bits_per_element - 1) / bits_per_element; } + /** + * @brief Get the original number of bits of the bitset. + */ + auto get_original_nbits() const -> index_t { return original_nbits_; } + void set_original_nbits(index_t original_nbits) { original_nbits_ = original_nbits; } + + /** + * @brief Converts to a Compressed Sparse Row (CSR) format matrix. + * + * This method transforms the bitset view into a CSR matrix representation, where each '1' bit in + * the bitset corresponds to a non-zero entry in the CSR matrix. The bitset format supports + * only a single-row matrix, so if the CSR matrix requires multiple rows, the bitset data is + * repeated for each row in the output. + * + * Example usage: + * + * @code{.cpp} + * #include + * #include + * #include + * + * using bitset_t = uint32_t; + * using index_t = int; + * using value_t = float; + * + * raft::resources handle; + * auto stream = resource::get_cuda_stream(handle); + * index_t n_rows = 3; + * index_t n_cols = 30; + * + * // Compute bitset size and initialize device memory + * index_t bitset_size = (n_cols + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8); + * rmm::device_uvector bitset_d(bitset_size, stream); + * std::vector bitset_h = { + * bitset_t(0b11001010), + * }; // Example bitset, with 4 non-zero entries. + * + * raft::copy(bitset_d.data(), bitset_h.data(), bitset_h.size(), stream); + * + * // Create bitset view and CSR matrix + * auto bitset_view = raft::core::bitset_view(bitset_d.data(), n_cols); + * auto csr = raft::make_device_csr_matrix(handle, n_rows, n_cols, 4 * n_rows); + * + * // Convert bitset to CSR + * bitset_view.to_csr(handle, csr); + * resource::sync_stream(handle); + * + * // Results: + * // csr.indptr = [0, 4, 8, 12]; + * // csr.indices = [1, 3, 6, 7, + * // 1, 3, 6, 7, + * // 1, 3, 6, 7]; + * // csr.values = [1, 1, 1, 1, + * // 1, 1, 1, 1, + * // 1, 1, 1, 1]; + * @endcode + * + * @tparam csr_matrix_t Specifies the CSR matrix type, constrained to raft::device_csr_matrix. + * + * @param[in] res RAFT resources for managing CUDA streams and execution policies. + * @param[out] csr Output parameter where the resulting CSR matrix is stored. Each '1' bit in + * the bitset corresponds to a non-zero element in the CSR matrix. + * + * The caller must ensure that: The `csr` matrix is pre-allocated with dimensions and non-zero + * count matching the expected output, i.e., `nnz_for_csr = nnz_for_bitset * n_rows`. + */ + template + void to_csr(const raft::resources& res, csr_matrix_t& csr) const; + private: bitset_t* bitset_ptr_; index_t bitset_len_; + index_t original_nbits_; }; /** diff --git a/cpp/include/raft/core/coo_matrix.hpp b/cpp/include/raft/core/coo_matrix.hpp index 52ac69f163..b812e28206 100644 --- a/cpp/include/raft/core/coo_matrix.hpp +++ b/cpp/include/raft/core/coo_matrix.hpp @@ -297,4 +297,4 @@ class coo_matrix /** @} */ -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/core/csr_matrix.hpp b/cpp/include/raft/core/csr_matrix.hpp index 1113cc2023..4f7679bbae 100644 --- a/cpp/include/raft/core/csr_matrix.hpp +++ b/cpp/include/raft/core/csr_matrix.hpp @@ -309,4 +309,4 @@ class csr_matrix /** @} */ -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/core/cublas_macros.hpp b/cpp/include/raft/core/cublas_macros.hpp index b69b121161..6c195d8a6f 100644 --- a/cpp/include/raft/core/cublas_macros.hpp +++ b/cpp/include/raft/core/cublas_macros.hpp @@ -23,9 +23,6 @@ #include -///@todo: enable this once we have logger enabled -// #include - #include #define _CUBLAS_ERR_TO_STR(err) \ diff --git a/cpp/include/raft/core/cusolver_macros.hpp b/cpp/include/raft/core/cusolver_macros.hpp index 74a8b7c36c..beaf2d74dc 100644 --- a/cpp/include/raft/core/cusolver_macros.hpp +++ b/cpp/include/raft/core/cusolver_macros.hpp @@ -19,11 +19,10 @@ #pragma once +#include + #include #include -///@todo: enable this once logging is enabled -// #include -#include #include @@ -135,4 +134,4 @@ inline const char* cusolver_error_to_string(cusolverStatus_t err) #define CUSOLVER_CHECK_NO_THROW(call) CUSOLVER_TRY_NO_THROW(call) #endif -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/core/cusparse_macros.hpp b/cpp/include/raft/core/cusparse_macros.hpp index 5a1968b529..2a1df14345 100644 --- a/cpp/include/raft/core/cusparse_macros.hpp +++ b/cpp/include/raft/core/cusparse_macros.hpp @@ -19,8 +19,6 @@ #include #include -///@todo: enable this once logging is enabled -// #include #define _CUSPARSE_ERR_TO_STR(err) \ case err: return #err; diff --git a/cpp/include/raft/core/detail/callback_sink.hpp b/cpp/include/raft/core/detail/callback_sink.hpp deleted file mode 100644 index a110af5c76..0000000000 --- a/cpp/include/raft/core/detail/callback_sink.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#define SPDLOG_HEADER_ONLY -#include -#include -#include - -namespace spdlog::sinks { - -typedef void (*LogCallback)(int lvl, const char* msg); - -template -class CallbackSink : public base_sink { - public: - explicit CallbackSink(std::string tag = "spdlog", - LogCallback callback = nullptr, - void (*flush)() = nullptr) - : _callback{callback}, _flush{flush} {}; - - void set_callback(LogCallback callback) { _callback = callback; } - void set_flush(void (*flush)()) { _flush = flush; } - - protected: - void sink_it_(const details::log_msg& msg) override - { - spdlog::memory_buf_t formatted; - base_sink::formatter_->format(msg, formatted); - std::string msg_string = fmt::to_string(formatted); - - if (_callback) { - _callback(static_cast(msg.level), msg_string.c_str()); - } else { - std::cout << msg_string; - } - } - - void flush_() override - { - if (_flush) { - _flush(); - } else { - std::cout << std::flush; - } - } - - LogCallback _callback; - void (*_flush)(); -}; - -using callback_sink_mt = CallbackSink; -using callback_sink_st = CallbackSink; - -} // end namespace spdlog::sinks diff --git a/cpp/include/raft/core/detail/fail_container_policy.hpp b/cpp/include/raft/core/detail/fail_container_policy.hpp index cf9d0887dd..f5f1bfb377 100644 --- a/cpp/include/raft/core/detail/fail_container_policy.hpp +++ b/cpp/include/raft/core/detail/fail_container_policy.hpp @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include diff --git a/cpp/include/raft/core/detail/logger.hpp b/cpp/include/raft/core/detail/logger.hpp deleted file mode 100644 index f3f52b46ae..0000000000 --- a/cpp/include/raft/core/detail/logger.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#ifndef RAFT_HIDE_DEPRECATION_WARNINGS -#pragma message(__FILE__ \ - " is deprecated and will be removed in future releases." \ - " Please use the version instead.") -#endif - -#include diff --git a/cpp/include/raft/core/detail/mdspan_util.cuh b/cpp/include/raft/core/detail/mdspan_util.cuh index ded95c2f31..d3438bc07d 100644 --- a/cpp/include/raft/core/detail/mdspan_util.cuh +++ b/cpp/include/raft/core/detail/mdspan_util.cuh @@ -67,4 +67,4 @@ MDSPAN_INLINE_FUNCTION auto popc(uint64_t v) -> int32_t #endif // compiler } -} // end namespace raft::detail \ No newline at end of file +} // end namespace raft::detail diff --git a/cpp/include/raft/core/device_coo_matrix.hpp b/cpp/include/raft/core/device_coo_matrix.hpp index 41da605ff0..4ed67d5fc5 100644 --- a/cpp/include/raft/core/device_coo_matrix.hpp +++ b/cpp/include/raft/core/device_coo_matrix.hpp @@ -395,4 +395,4 @@ auto make_device_coordinate_structure_view(raft::device_span rows, /** @} */ -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/core/device_csr_matrix.hpp b/cpp/include/raft/core/device_csr_matrix.hpp index 1d23c8912d..b0dbfa000d 100644 --- a/cpp/include/raft/core/device_csr_matrix.hpp +++ b/cpp/include/raft/core/device_csr_matrix.hpp @@ -422,4 +422,4 @@ auto make_device_compressed_structure_view(raft::device_span indptr, /** @} */ -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/core/device_span.hpp b/cpp/include/raft/core/device_span.hpp index d3350b5e3a..abf72b6b2e 100644 --- a/cpp/include/raft/core/device_span.hpp +++ b/cpp/include/raft/core/device_span.hpp @@ -34,4 +34,4 @@ using device_span = span; /** * @} */ -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/include/raft/core/host_coo_matrix.hpp b/cpp/include/raft/core/host_coo_matrix.hpp index 7a216dc8a2..e0f95d2a77 100644 --- a/cpp/include/raft/core/host_coo_matrix.hpp +++ b/cpp/include/raft/core/host_coo_matrix.hpp @@ -393,4 +393,4 @@ auto make_host_coordinate_structure_view(raft::host_span rows, /** @} */ -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/core/host_csr_matrix.hpp b/cpp/include/raft/core/host_csr_matrix.hpp index e3cea3cd27..8a29d957f6 100644 --- a/cpp/include/raft/core/host_csr_matrix.hpp +++ b/cpp/include/raft/core/host_csr_matrix.hpp @@ -423,4 +423,4 @@ auto make_host_compressed_structure_view(raft::host_span indptr, /** @} */ -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/core/host_mdarray.hpp b/cpp/include/raft/core/host_mdarray.hpp index 3020cde32d..229619999d 100644 --- a/cpp/include/raft/core/host_mdarray.hpp +++ b/cpp/include/raft/core/host_mdarray.hpp @@ -253,4 +253,4 @@ auto make_host_vector(IndexType n) return make_host_mdarray(make_extents(n)); } -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/include/raft/core/host_span.hpp b/cpp/include/raft/core/host_span.hpp index 36978dfca4..d31f8b4c30 100644 --- a/cpp/include/raft/core/host_span.hpp +++ b/cpp/include/raft/core/host_span.hpp @@ -35,4 +35,4 @@ using host_span = span; * @} */ -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/include/raft/core/logger-ext.hpp b/cpp/include/raft/core/logger-ext.hpp deleted file mode 100644 index 73fe463aba..0000000000 --- a/cpp/include/raft/core/logger-ext.hpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include // RAFT_INLINE_CONDITIONAL - -#include // std::unique_ptr -#include // std::string -#include // std::unordered_map - -namespace raft { - -static const std::string RAFT_NAME = "raft"; -static const std::string default_log_pattern("[%L] [%H:%M:%S.%f] %v"); - -namespace detail { -RAFT_INLINE_CONDITIONAL std::string format(const char* fmt, ...); -} -/** - * @brief The main Logging class for raft library. - * - * This class acts as a thin wrapper over the underlying `spdlog` interface. The - * design is done in this way in order to avoid us having to also ship `spdlog` - * header files in our installation. - * - * @todo This currently only supports logging to stdout. Need to add support in - * future to add custom loggers as well [Issue #2046] - */ -class logger { - public: - // @todo setting the logger once per process with - logger(std::string const& name_ = ""); - /** - * @brief Singleton method to get the underlying logger object - * - * @return the singleton logger object - */ - static logger& get(std::string const& name = ""); - - /** - * @brief Set the logging level. - * - * Only messages with level equal or above this will be printed - * - * @param[in] level logging level - * - * @note The log level will actually be set only if the input is within the - * range [RAFT_LEVEL_TRACE, RAFT_LEVEL_OFF]. If it is not, then it'll - * be ignored. See documentation of decisiontree for how this gets used - */ - void set_level(int level); - - /** - * @brief Set the logging pattern - * - * @param[in] pattern the pattern to be set. Refer this link - * https://github.com/gabime/spdlog/wiki/3.-Custom-formatting - * to know the right syntax of this pattern - */ - void set_pattern(const std::string& pattern); - - /** - * @brief Register a callback function to be run in place of usual log call - * - * @param[in] callback the function to be run on all logged messages - */ - void set_callback(void (*callback)(int lvl, const char* msg)); - - /** - * @brief Register a flush function compatible with the registered callback - * - * @param[in] flush the function to use when flushing logs - */ - void set_flush(void (*flush)()); - - /** - * @brief Tells whether messages will be logged for the given log level - * - * @param[in] level log level to be checked for - * @return true if messages will be logged for this level, else false - */ - bool should_log_for(int level) const; - /** - * @brief Query for the current log level - * - * @return the current log level - */ - int get_level() const; - - /** - * @brief Get the current logging pattern - * @return the pattern - */ - std::string get_pattern() const; - - /** - * @brief Main logging method - * - * @param[in] level logging level of this message - * @param[in] fmt C-like format string, followed by respective params - */ - void log(int level, const char* fmt, ...); - - /** - * @brief Flush logs by calling flush on underlying logger - */ - void flush(); - - ~logger(); - - private: - logger(); - // pimpl pattern: - // https://learn.microsoft.com/en-us/cpp/cpp/pimpl-for-compile-time-encapsulation-modern-cpp?view=msvc-170 - class impl; - std::unique_ptr pimpl; - static inline std::unordered_map> log_map; -}; // class logger - -/** - * @brief An object used for scoped log level setting - * - * Instances of `raft::log_level_setter` will set RAFT logging to the level - * indicated on construction and will revert to the previous set level on - * destruction. - */ -struct log_level_setter { - explicit log_level_setter(int level) - { - prev_level_ = logger::get(RAFT_NAME).get_level(); - logger::get(RAFT_NAME).set_level(level); - } - ~log_level_setter() { logger::get(RAFT_NAME).set_level(prev_level_); } - - private: - int prev_level_; -}; // class log_level_setter - -}; // namespace raft diff --git a/cpp/include/raft/core/logger-inl.hpp b/cpp/include/raft/core/logger-inl.hpp deleted file mode 100644 index ea5f4ea26e..0000000000 --- a/cpp/include/raft/core/logger-inl.hpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "logger-macros.hpp" - -#include - -#include -#include -#include -#include -#include -#include -// The logger-ext.hpp file contains the class declaration of the logger class. -// In this case, it is okay to include the logger-ext.hpp file because it -// contains no RAFT_EXPLICIT template instantiations. -#include "logger-ext.hpp" - -#define SPDLOG_HEADER_ONLY -#include -#include // RAFT_INLINE_CONDITIONAL - -#include // NOLINT -#include // NOLINT - -namespace raft { - -namespace detail { - -inline std::string format(const char* fmt, va_list& vl) -{ - va_list vl_copy; - va_copy(vl_copy, vl); - int length = std::vsnprintf(nullptr, 0, fmt, vl_copy); - assert(length >= 0); - std::vector buf(length + 1); - std::vsnprintf(buf.data(), length + 1, fmt, vl); - return std::string(buf.data()); -} - -RAFT_INLINE_CONDITIONAL std::string format(const char* fmt, ...) -{ - va_list vl; - va_start(vl, fmt); - std::string str = format(fmt, vl); - va_end(vl); - return str; -} - -inline int convert_level_to_spdlog(int level) -{ - level = std::max(RAFT_LEVEL_OFF, std::min(RAFT_LEVEL_TRACE, level)); - return RAFT_LEVEL_TRACE - level; -} - -} // namespace detail - -class logger::impl { // defined privately here - // ... all private data and functions: all of these - // can now change without recompiling callers ... - public: - std::shared_ptr sink; - std::shared_ptr spdlogger; - std::string cur_pattern; - int cur_level; - - impl(std::string const& name_ = "") - : sink{std::make_shared()}, - spdlogger{std::make_shared(name_, sink)}, - cur_pattern() - { - } -}; // class logger::impl - -RAFT_INLINE_CONDITIONAL logger::logger(std::string const& name_) : pimpl(new impl(name_)) -{ - set_pattern(default_log_pattern); - set_level(RAFT_ACTIVE_LEVEL); -} - -RAFT_INLINE_CONDITIONAL logger& logger::get(std::string const& name) -{ - if (log_map.find(name) == log_map.end()) { log_map[name] = std::make_shared(name); } - return *log_map[name]; -} - -RAFT_INLINE_CONDITIONAL void logger::set_level(int level) -{ - level = raft::detail::convert_level_to_spdlog(level); - pimpl->spdlogger->set_level(static_cast(level)); -} - -RAFT_INLINE_CONDITIONAL void logger::set_pattern(const std::string& pattern) -{ - pimpl->cur_pattern = pattern; - pimpl->spdlogger->set_pattern(pattern); -} - -RAFT_INLINE_CONDITIONAL void logger::set_callback(void (*callback)(int lvl, const char* msg)) -{ - pimpl->sink->set_callback(callback); -} - -RAFT_INLINE_CONDITIONAL void logger::set_flush(void (*flush)()) { pimpl->sink->set_flush(flush); } - -RAFT_INLINE_CONDITIONAL bool logger::should_log_for(int level) const -{ - level = raft::detail::convert_level_to_spdlog(level); - auto level_e = static_cast(level); - return pimpl->spdlogger->should_log(level_e); -} - -RAFT_INLINE_CONDITIONAL int logger::get_level() const -{ - auto level_e = pimpl->spdlogger->level(); - return RAFT_LEVEL_TRACE - static_cast(level_e); -} - -RAFT_INLINE_CONDITIONAL std::string logger::get_pattern() const { return pimpl->cur_pattern; } - -RAFT_INLINE_CONDITIONAL void logger::log(int level, const char* fmt, ...) -{ - level = raft::detail::convert_level_to_spdlog(level); - auto level_e = static_cast(level); - // explicit check to make sure that we only expand messages when required - if (pimpl->spdlogger->should_log(level_e)) { - va_list vl; - va_start(vl, fmt); - auto msg = raft::detail::format(fmt, vl); - va_end(vl); - pimpl->spdlogger->log(level_e, msg); - } -} - -RAFT_INLINE_CONDITIONAL void logger::flush() { pimpl->spdlogger->flush(); } - -RAFT_INLINE_CONDITIONAL logger::~logger() {} - -}; // namespace raft diff --git a/cpp/include/raft/core/logger-macros.hpp b/cpp/include/raft/core/logger-macros.hpp index 5ddb072067..e32440dcce 100644 --- a/cpp/include/raft/core/logger-macros.hpp +++ b/cpp/include/raft/core/logger-macros.hpp @@ -15,92 +15,17 @@ */ #pragma once -/** - * @defgroup logging levels used in raft - * - * @note exactly match the corresponding ones (but reverse in terms of value) - * in spdlog for wrapping purposes - * - * @{ - */ -#define RAFT_LEVEL_TRACE 6 -#define RAFT_LEVEL_DEBUG 5 -#define RAFT_LEVEL_INFO 4 -#define RAFT_LEVEL_WARN 3 -#define RAFT_LEVEL_ERROR 2 -#define RAFT_LEVEL_CRITICAL 1 -#define RAFT_LEVEL_OFF 0 -/** @} */ - -#if !defined(RAFT_ACTIVE_LEVEL) -#define RAFT_ACTIVE_LEVEL RAFT_LEVEL_INFO -#endif - -/** - * @defgroup loggerMacros Helper macros for dealing with logging - * @{ - */ -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_TRACE) -#define RAFT_LOG_TRACE(fmt, ...) \ - do { \ - std::stringstream ss; \ - ss << raft::detail::format("%s:%d ", __FILE__, __LINE__); \ - ss << raft::detail::format(fmt, ##__VA_ARGS__); \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \ - } while (0) -#else -#define RAFT_LOG_TRACE(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_TRACE) -#define RAFT_LOG_TRACE_VEC(ptr, len) \ - do { \ - std::stringstream ss; \ - ss << raft::detail::format("%s:%d ", __FILE__, __LINE__); \ - print_vector(#ptr, ptr, len, ss); \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \ +#include + +#if (RAFT_LOG_ACTIVE_LEVEL <= RAFT_LOG_LEVEL_TRACE) +#define RAFT_LOG_TRACE_VEC(ptr, len) \ + do { \ + std::stringstream ss; \ + ss << raft::detail::format("%s:%d ", __FILE__, __LINE__); \ + print_vector(#ptr, ptr, len, ss); \ + raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \ + RAFT_LOGGER_CALL(raft::default_logger(), raft::level_enum::trace, __VA_ARGS__) \ } while (0) #else #define RAFT_LOG_TRACE_VEC(ptr, len) void(0) #endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_DEBUG) -#define RAFT_LOG_DEBUG(fmt, ...) \ - do { \ - std::stringstream ss; \ - ss << raft::detail::format("%s:%d ", __FILE__, __LINE__); \ - ss << raft::detail::format(fmt, ##__VA_ARGS__); \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_DEBUG, ss.str().c_str()); \ - } while (0) -#else -#define RAFT_LOG_DEBUG(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_INFO) -#define RAFT_LOG_INFO(fmt, ...) \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_INFO, fmt, ##__VA_ARGS__) -#else -#define RAFT_LOG_INFO(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_WARN) -#define RAFT_LOG_WARN(fmt, ...) \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_WARN, fmt, ##__VA_ARGS__) -#else -#define RAFT_LOG_WARN(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_ERROR) -#define RAFT_LOG_ERROR(fmt, ...) \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_ERROR, fmt, ##__VA_ARGS__) -#else -#define RAFT_LOG_ERROR(fmt, ...) void(0) -#endif - -#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_CRITICAL) -#define RAFT_LOG_CRITICAL(fmt, ...) \ - raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_CRITICAL, fmt, ##__VA_ARGS__) -#else -#define RAFT_LOG_CRITICAL(fmt, ...) void(0) -#endif -/** @} */ diff --git a/cpp/include/raft/core/logger.hpp b/cpp/include/raft/core/logger.hpp deleted file mode 100644 index e64a0db257..0000000000 --- a/cpp/include/raft/core/logger.hpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "logger-ext.hpp" -#include "logger-macros.hpp" - -#if !defined(RAFT_COMPILED) -#include "logger-inl.hpp" -#endif diff --git a/cpp/include/raft/core/resource/device_id.hpp b/cpp/include/raft/core/resource/device_id.hpp index 570d815780..a371f9ddde 100644 --- a/cpp/include/raft/core/resource/device_id.hpp +++ b/cpp/include/raft/core/resource/device_id.hpp @@ -73,4 +73,4 @@ inline int get_device_id(resources const& res) /** * @} */ -} // namespace raft::resource \ No newline at end of file +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/device_properties.hpp b/cpp/include/raft/core/resource/device_properties.hpp index a87c29f709..7ac780ef16 100644 --- a/cpp/include/raft/core/resource/device_properties.hpp +++ b/cpp/include/raft/core/resource/device_properties.hpp @@ -75,4 +75,4 @@ inline cudaDeviceProp& get_device_properties(resources const& res) /** * @} */ -} // namespace raft::resource \ No newline at end of file +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/sub_comms.hpp b/cpp/include/raft/core/resource/sub_comms.hpp index 11d2aed1e0..b4fef75d57 100644 --- a/cpp/include/raft/core/resource/sub_comms.hpp +++ b/cpp/include/raft/core/resource/sub_comms.hpp @@ -79,4 +79,4 @@ inline void set_subcomm(resources const& res, * @} */ -} // namespace raft::resource \ No newline at end of file +} // namespace raft::resource diff --git a/cpp/include/raft/core/sparse_types.hpp b/cpp/include/raft/core/sparse_types.hpp index 55da3037a9..6e5092f50f 100644 --- a/cpp/include/raft/core/sparse_types.hpp +++ b/cpp/include/raft/core/sparse_types.hpp @@ -222,4 +222,4 @@ class sparse_matrix { /* @} */ -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h b/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h index 42de4860a0..56cce4de8b 100644 --- a/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h +++ b/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h @@ -406,4 +406,4 @@ struct FusedDistanceNNGemm * #include * - * raft::raft::resources handle; + * raft::resources handle; * int n_samples = 5000; * int n_features = 50; * diff --git a/cpp/include/raft/distance/fused_distance_nn.cuh b/cpp/include/raft/distance/fused_distance_nn.cuh index 25b1ae01ea..aa20bfeaf1 100755 --- a/cpp/include/raft/distance/fused_distance_nn.cuh +++ b/cpp/include/raft/distance/fused_distance_nn.cuh @@ -15,4 +15,4 @@ */ #pragma once -#include "fused_distance_nn-inl.cuh" \ No newline at end of file +#include "fused_distance_nn-inl.cuh" diff --git a/cpp/include/raft/label/classlabels.cuh b/cpp/include/raft/label/classlabels.cuh index 93c1080ff2..c539419738 100644 --- a/cpp/include/raft/label/classlabels.cuh +++ b/cpp/include/raft/label/classlabels.cuh @@ -118,4 +118,4 @@ void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zer }; // namespace label }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/label/detail/merge_labels.cuh b/cpp/include/raft/label/detail/merge_labels.cuh index 5513f16b9f..891bc9313a 100644 --- a/cpp/include/raft/label/detail/merge_labels.cuh +++ b/cpp/include/raft/label/detail/merge_labels.cuh @@ -155,4 +155,4 @@ void merge_labels(value_idx* labels_a, } // namespace detail }; // namespace label -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/label/merge_labels.cuh b/cpp/include/raft/label/merge_labels.cuh index 2bf2fa830b..370b6b8996 100644 --- a/cpp/include/raft/label/merge_labels.cuh +++ b/cpp/include/raft/label/merge_labels.cuh @@ -68,4 +68,4 @@ void merge_labels(value_idx* labels_a, }; // namespace label }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/linalg/cholesky_r1_update.cuh b/cpp/include/raft/linalg/cholesky_r1_update.cuh index 292140b4dc..e938626b20 100644 --- a/cpp/include/raft/linalg/cholesky_r1_update.cuh +++ b/cpp/include/raft/linalg/cholesky_r1_update.cuh @@ -139,4 +139,4 @@ void choleskyRank1Update(raft::resources const& handle, }; // namespace linalg }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh index a4247e618f..b377bad101 100644 --- a/cpp/include/raft/linalg/coalesced_reduction.cuh +++ b/cpp/include/raft/linalg/coalesced_reduction.cuh @@ -163,4 +163,4 @@ void coalesced_reduction(raft::resources const& handle, }; // end namespace linalg }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/linalg/detail/add.cuh b/cpp/include/raft/linalg/detail/add.cuh index 121ac10e24..0227fea4a4 100644 --- a/cpp/include/raft/linalg/detail/add.cuh +++ b/cpp/include/raft/linalg/detail/add.cuh @@ -60,4 +60,4 @@ void addDevScalar( } // namespace detail } // namespace linalg -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh index d23c7d60a6..2b9a7ba485 100644 --- a/cpp/include/raft/linalg/divide.cuh +++ b/cpp/include/raft/linalg/divide.cuh @@ -99,4 +99,4 @@ void divide_scalar(raft::resources const& handle, }; // end namespace linalg }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 9f03f54f9a..7245d31191 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -223,4 +223,4 @@ void eig_jacobi(raft::resources const& handle, }; // end namespace linalg }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/linalg/eltwise.cuh b/cpp/include/raft/linalg/eltwise.cuh index 2e6c1a4ab5..569845d488 100644 --- a/cpp/include/raft/linalg/eltwise.cuh +++ b/cpp/include/raft/linalg/eltwise.cuh @@ -97,4 +97,4 @@ void eltwiseDivideCheckZero( }; // end namespace linalg }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/linalg/gemv.cuh b/cpp/include/raft/linalg/gemv.cuh index 31bad62930..6b33561f48 100644 --- a/cpp/include/raft/linalg/gemv.cuh +++ b/cpp/include/raft/linalg/gemv.cuh @@ -307,4 +307,4 @@ void gemv(raft::resources const& handle, }; // namespace linalg }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/linalg/linalg_types.hpp b/cpp/include/raft/linalg/linalg_types.hpp index 9c81fbc177..aa3e402988 100644 --- a/cpp/include/raft/linalg/linalg_types.hpp +++ b/cpp/include/raft/linalg/linalg_types.hpp @@ -39,4 +39,4 @@ enum class FillMode { UPPER, LOWER }; */ enum class Operation { NON_TRANSPOSE, TRANSPOSE }; -} // end namespace raft::linalg \ No newline at end of file +} // end namespace raft::linalg diff --git a/cpp/include/raft/linalg/lstsq.cuh b/cpp/include/raft/linalg/lstsq.cuh index 21575d7806..5188e69268 100644 --- a/cpp/include/raft/linalg/lstsq.cuh +++ b/cpp/include/raft/linalg/lstsq.cuh @@ -248,4 +248,4 @@ void lstsq_qr(raft::resources const& handle, }; // namespace linalg }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/linalg/map_reduce.cuh b/cpp/include/raft/linalg/map_reduce.cuh index 1886c941b9..505aade1cf 100644 --- a/cpp/include/raft/linalg/map_reduce.cuh +++ b/cpp/include/raft/linalg/map_reduce.cuh @@ -115,4 +115,4 @@ void map_reduce(raft::resources const& handle, } // end namespace raft::linalg -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/linalg/map_then_reduce.cuh b/cpp/include/raft/linalg/map_then_reduce.cuh index a69ac6df36..f4ab356f1c 100644 --- a/cpp/include/raft/linalg/map_then_reduce.cuh +++ b/cpp/include/raft/linalg/map_then_reduce.cuh @@ -91,4 +91,4 @@ template /** @} */ // end of group matrix_norm -} // namespace raft::matrix \ No newline at end of file +} // namespace raft::matrix diff --git a/cpp/include/raft/matrix/reverse.cuh b/cpp/include/raft/matrix/reverse.cuh index 42057bb0f5..c10fa8f5f0 100644 --- a/cpp/include/raft/matrix/reverse.cuh +++ b/cpp/include/raft/matrix/reverse.cuh @@ -69,4 +69,4 @@ void row_reverse(raft::resources const& handle, } /** @} */ // end group matrix_reverse -} // namespace raft::matrix \ No newline at end of file +} // namespace raft::matrix diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index cd2d76a863..072f0c18ac 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -55,4 +55,4 @@ void scatter(raft::resources const& handle, detail::scatter(handle, inout, map, col_batch_size); } -} // namespace raft::matrix \ No newline at end of file +} // namespace raft::matrix diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index c922a0d7f4..caff6ea341 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -228,4 +228,4 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t +#include #include #include #include diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh index 388dd60f14..44d55c36de 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh @@ -16,7 +16,8 @@ #pragma once -#include // RAFT_LOG_TRACE +#include +#include #include #include // raft::resources #include // is_min_close, DistanceType diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 02610f9afb..64e4a3ea7a 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -709,7 +709,8 @@ template > RAFT_KERNEL #ifdef __CUDA_ARCH__ -#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) +#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) || \ + (__CUDA_ARCH__) == 1200 __launch_bounds__(BLOCK_SIZE) #else __launch_bounds__(BLOCK_SIZE, 4) diff --git a/cpp/include/raft/neighbors/epsilon_neighborhood.cuh b/cpp/include/raft/neighbors/epsilon_neighborhood.cuh index bade4385fb..c2f531263d 100644 --- a/cpp/include/raft/neighbors/epsilon_neighborhood.cuh +++ b/cpp/include/raft/neighbors/epsilon_neighborhood.cuh @@ -76,7 +76,7 @@ void epsUnexpL2SqNeighborhood(bool* adj, * #include * #include * using namespace raft::neighbors; - * raft::raft::resources handle; + * raft::resources handle; * ... * auto adj = raft::make_device_matrix(handle, m * n); * auto vd = raft::make_device_vector(handle, m+1); @@ -120,4 +120,4 @@ void eps_neighbors_l2sq(raft::resources const& handle, } // namespace raft::neighbors::epsilon_neighborhood -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/neighbors/ivf_flat_codepacker.hpp b/cpp/include/raft/neighbors/ivf_flat_codepacker.hpp index 5379788ab4..db03d78105 100644 --- a/cpp/include/raft/neighbors/ivf_flat_codepacker.hpp +++ b/cpp/include/raft/neighbors/ivf_flat_codepacker.hpp @@ -87,4 +87,4 @@ _RAFT_HOST_DEVICE void unpack_1( } } } -} // namespace raft::neighbors::ivf_flat::codepacker \ No newline at end of file +} // namespace raft::neighbors::ivf_flat::codepacker diff --git a/cpp/include/raft/random/detail/curand_wrappers.hpp b/cpp/include/raft/random/detail/curand_wrappers.hpp index 969d739cc1..d62e64d532 100644 --- a/cpp/include/raft/random/detail/curand_wrappers.hpp +++ b/cpp/include/raft/random/detail/curand_wrappers.hpp @@ -54,4 +54,4 @@ inline curandStatus_t curandGenerateNormal( /** @} */ }; // end namespace detail -}; // end namespace raft::random \ No newline at end of file +}; // end namespace raft::random diff --git a/cpp/include/raft/random/detail/permute.cuh b/cpp/include/raft/random/detail/permute.cuh index 37caa51ad3..b1c56afa0c 100644 --- a/cpp/include/raft/random/detail/permute.cuh +++ b/cpp/include/raft/random/detail/permute.cuh @@ -161,4 +161,4 @@ void permute(IntType* perms, } }; // end namespace detail -}; // end namespace raft::random \ No newline at end of file +}; // end namespace raft::random diff --git a/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh index 9ad7c68f87..12c01fc5d7 100644 --- a/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh +++ b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh @@ -54,8 +54,8 @@ DI void gen_and_update_bits(IdxT& src_id, } else { src_bit = dst_bit = true; } - if (curr_depth < r_scale) { src_id |= (IdxT(src_bit) << (r_scale - curr_depth - 1)); } - if (curr_depth < c_scale) { dst_id |= (IdxT(dst_bit) << (c_scale - curr_depth - 1)); } + if (curr_depth < r_scale) { src_id |= (IdxT(src_bit) << (curr_depth)); } + if (curr_depth < c_scale) { dst_id |= (IdxT(dst_bit) << (curr_depth)); } } template @@ -151,15 +151,16 @@ RAFT_KERNEL rmat_gen_kernel(IdxT* out, raft::random::PCGenerator gen{r.seed, r.base_subsequence + idx, 0}; auto min_scale = min(r_scale, c_scale); IdxT i = 0; - for (; i < min_scale; ++i) { - gen_and_update_bits(src_id, dst_id, a, a + b, a + b + c, r_scale, c_scale, i, gen); - } - for (; i < r_scale; ++i) { - gen_and_update_bits(src_id, dst_id, a + b, a + b, ProbT(1), r_scale, c_scale, i, gen); - } - for (; i < c_scale; ++i) { - gen_and_update_bits(src_id, dst_id, a + c, ProbT(1), ProbT(1), r_scale, c_scale, i, gen); + // Whether we have more rows than columns. + const bool more_rows = r_scale > c_scale; + + for (; i < max_scale; ++i) { + ProbT A = (i < min_scale) ? a : (more_rows ? a + b : a + c); + ProbT AB = (i < min_scale) ? a + b : (more_rows ? a + b : ProbT(1)); + ProbT ABC = (i < min_scale) ? a + b + c : ProbT(1); + gen_and_update_bits(src_id, dst_id, A, AB, ABC, r_scale, c_scale, i, gen); } + store_ids(out, out_src, out_dst, src_id, dst_id, idx, n_edges); } diff --git a/cpp/include/raft/random/device/sample.cuh b/cpp/include/raft/random/device/sample.cuh index d0e5200185..67b98f12fe 100644 --- a/cpp/include/raft/random/device/sample.cuh +++ b/cpp/include/raft/random/device/sample.cuh @@ -27,12 +27,14 @@ namespace raft::random::device { /** * @brief warp-level random sampling of an index. + * * It selects an index with the given discrete probability - * distribution(represented by weights of each index) + * distribution(represented by weights of each index). + * Only thread 0 will contain the valid reduced result. + * * @param rng random number generator, must have next_u32() function * @param weight weight of the rank/index. * @param idx index to be used as rank - * @return only the thread0 will contain valid reduced result */ template DI void warp_random_sample(rng_t& rng, T& weight, i_t& idx) diff --git a/cpp/include/raft/random/make_blobs.cuh b/cpp/include/raft/random/make_blobs.cuh index 4fd1f44f64..296b7ab283 100644 --- a/cpp/include/raft/random/make_blobs.cuh +++ b/cpp/include/raft/random/make_blobs.cuh @@ -187,4 +187,4 @@ void make_blobs( } // end namespace raft::random -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/random/rmat_rectangular_generator.cuh b/cpp/include/raft/random/rmat_rectangular_generator.cuh index 5598b25c8e..cdd89f40dd 100644 --- a/cpp/include/raft/random/rmat_rectangular_generator.cuh +++ b/cpp/include/raft/random/rmat_rectangular_generator.cuh @@ -30,8 +30,18 @@ namespace raft::random { /** * @brief Generate a bipartite RMAT graph for a rectangular adjacency matrix. * - * This is the most general of several overloads of `rmat_rectangular_gen` - * in this file, and thus has the most detailed documentation. + * This function generates a random graph represented by a (sparse) adjacency matrix. As described + * in [1], to generate connections, we recursively subdivide the adjacency matrix into four + * equal-sized partitions, and distribute edges within these partitions with a unequal + * probabilities. The probabilities are described by numbers [a, b, c, d]. We chose the upper left + * partition with probability `a`. The chosen partition is again subdivided into four smaller + * partitions, and the procedure is repeated until we reach a single element (1 x 1 partition). + * + * We can prescribe different probability distribution at each iteariton. The `theta` array stores + * the probability values for each level. + * + * [1] "R-MAT: A Recursive Model for Graph Mining" Deepayan Chakrabarti, Yiping Zhan, Christos + * Faloutsos (2004) https://doi.org/10.1137/1.9781611972740.43 * * @tparam IdxT Type of each node index * @tparam ProbT Data type used for probability distributions (either fp32 or fp64) @@ -49,11 +59,14 @@ namespace raft::random { * @param[out] out_dst Destination node id's [on device]. `out_src` and `out_dst` * together form the struct-of-arrays representation of the same * output data as `out`. - * @param[in] theta distribution of each quadrant at each level of resolution. - * Since these are probabilities, each of the 2x2 matrices for - * each level of the RMAT must sum to one. [on device] - * [dim = max(r_scale, c_scale) x 2 x 2]. Of course, it is assumed - * that each of the group of 2 x 2 numbers all sum up to 1. + * @param[in] theta array [on device] with the distribution of each quadrant at each level of + * resolution. theta = [a0, b0, c0, d0, a1, b1, c1, d1, ...], where + * [a0, b0, c0, d0] defines the probability at the finest level (2x2). + * The last four elements in the array describe the probability in the + * coarsest level (where matrix size = [2^r_scale, 2^c_scale]). + * Since these are probabilities, the four [a_i, b_i, c_i, d_i] values for + * each level of the RMAT must sum to one. + * [dim = max(r_scale, c_scale) x 2 x 2]. * @param[in] r_scale 2^r_scale represents the number of source nodes * @param[in] c_scale 2^c_scale represents the number of destination nodes * diff --git a/cpp/include/raft/random/sample_without_replacement.cuh b/cpp/include/raft/random/sample_without_replacement.cuh index fad1d4adfa..6e3d63ab9f 100644 --- a/cpp/include/raft/random/sample_without_replacement.cuh +++ b/cpp/include/raft/random/sample_without_replacement.cuh @@ -166,4 +166,4 @@ void sample_without_replacement(Args... args) /** @} */ -} // end namespace raft::random \ No newline at end of file +} // end namespace raft::random diff --git a/cpp/include/raft/solver/detail/lap_kernels.cuh b/cpp/include/raft/solver/detail/lap_kernels.cuh index 383c3ab713..3c25852240 100644 --- a/cpp/include/raft/solver/detail/lap_kernels.cuh +++ b/cpp/include/raft/solver/detail/lap_kernels.cuh @@ -26,6 +26,7 @@ #include "../linear_assignment_types.hpp" +#include #include #include @@ -552,4 +553,4 @@ RAFT_KERNEL kernel_calcObjValPrimal(weight_t* d_obj_val_primal, } } -} // namespace raft::solver::detail \ No newline at end of file +} // namespace raft::solver::detail diff --git a/cpp/include/raft/solver/linear_assignment.cuh b/cpp/include/raft/solver/linear_assignment.cuh index 7ee0f5fbc3..2357c56422 100644 --- a/cpp/include/raft/solver/linear_assignment.cuh +++ b/cpp/include/raft/solver/linear_assignment.cuh @@ -331,4 +331,4 @@ class LinearAssignmentProblem { } // namespace raft::solver -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/convert/coo.cuh b/cpp/include/raft/sparse/convert/coo.cuh index b5568ef7d9..ba3efc7ff0 100644 --- a/cpp/include/raft/sparse/convert/coo.cuh +++ b/cpp/include/raft/sparse/convert/coo.cuh @@ -43,4 +43,4 @@ void csr_to_coo( }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh index 081192ed44..73d099a719 100644 --- a/cpp/include/raft/sparse/convert/csr.cuh +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -18,10 +18,12 @@ #pragma once -#include +#include +#include #include #include #include +#include #include #include @@ -129,6 +131,80 @@ void bitmap_to_csr(raft::resources const& handle, detail::bitmap_to_csr(handle, bitmap, csr); } +/** + * @brief Converts a bitset matrix to a Compressed Sparse Row (CSR) format matrix. + * + * The bitset format inherently supports only a single-row matrix (rows=1). If the CSR matrix + * requires multiple rows, the data from the bitset will be repeated for each row in the output. + * + * Example usage: + * + * @code{.cpp} + * #include + * #include + * #include + * + * #include + * + * using bitset_t = uint32_t; + * using index_t = int; + * using value_t = float; + * using nnz_t = index_t; + * + * raft::resources handle; + * auto stream = resource::get_cuda_stream(handle); + * index_t n_rows = 3; + * index_t n_cols = 30; + * + * nnz_t nnz_for_bitset = 4; + * nnz_t nnz_for_csr = nnz_for_bitset * n_rows; + * + * index_t bitset_size = (n_cols + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8); // = 1 + * + * rmm::device_uvector bitset_d(bitset_size, stream); + * std::vector bitset_h = { + * bitset_t(0b11001010), + * }; // nnz_for_bitset = 4; + * + * raft::copy(bitset_d.data(), bitset_h.data(), bitset_h.size(), stream); + * + * auto bitset_view = raft::core::bitset_view(bitset_d.data(), n_cols); + * auto csr = raft::make_device_csr_matrix(handle, n_rows, n_cols, nnz_for_csr); + * + * raft::sparse::convert::bitset_to_csr(handle, bitset_view, csr); + * resource::sync_stream(handle); + * + * // Results: + * // csr.indptr = [0, 4, 8, 12]; + * // csr.indices = [1, 3, 6, 7, + * // 1, 3, 6, 7, + * // 1, 3, 6, 7]; + * // csr.values = [1, 1, 1, 1, + * // 1, 1, 1, 1, + * // 1, 1, 1, 1]; + * @endcode + * + * @tparam bitset_t The data type of the elements in the bitset matrix. + * @tparam index_t The data type used for indexing the elements in the matrices. + * @tparam csr_matrix_t Specifies the CSR matrix type, constrained to + * raft::device_csr_matrix. + * + * @param[in] handle The RAFT handle containing the CUDA stream for operations. + * @param[in] bitset The bitset matrix view, to be converted to CSR format. + * @param[out] csr Output parameter where the resulting CSR matrix is stored. In the + * bitset, each '1' bit corresponds to a non-zero element in the CSR matrix. + */ +template >> +void bitset_to_csr(raft::resources const& handle, + raft::core::bitset_view bitset, + csr_matrix_t& csr) +{ + detail::bitset_to_csr(handle, bitset, csr); +} + }; // end NAMESPACE convert }; // end NAMESPACE sparse }; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/convert/dense.cuh b/cpp/include/raft/sparse/convert/dense.cuh index a146113a86..6613049f25 100644 --- a/cpp/include/raft/sparse/convert/dense.cuh +++ b/cpp/include/raft/sparse/convert/dense.cuh @@ -64,4 +64,4 @@ void csr_to_dense(cusparseHandle_t handle, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh b/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh index 769d5de9be..be62f76502 100644 --- a/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -41,61 +42,68 @@ namespace sparse { namespace convert { namespace detail { -// Threads per block in calc_nnz_by_rows_kernel. -static const constexpr int calc_nnz_by_rows_tpb = 32; +// Threads per block in bitmap_to_csr. +static const constexpr int bitmap_to_csr_tpb = 256; template -RAFT_KERNEL __launch_bounds__(calc_nnz_by_rows_tpb) calc_nnz_by_rows_kernel(const bitmap_t* bitmap, - index_t num_rows, - index_t num_cols, - index_t bitmap_num, - nnz_t* nnz_per_row) +RAFT_KERNEL __launch_bounds__(bitmap_to_csr_tpb) calc_nnz_by_rows_kernel(const bitmap_t* bitmap, + index_t num_rows, + index_t num_cols, + index_t bitmap_num, + nnz_t* sub_col_nnz, + index_t bits_per_sub_col) { - constexpr bitmap_t FULL_MASK = ~bitmap_t(0u); - constexpr bitmap_t ONE = bitmap_t(1u); + using mutable_bitmap_t = typename std::remove_const_t; + using BlockReduce = cub::BlockReduce; + + __shared__ typename BlockReduce::TempStorage reduce_storage; + constexpr index_t BITS_PER_BITMAP = sizeof(bitmap_t) * 8; - auto block = cg::this_thread_block(); - auto tile = cg::tiled_partition<32>(block); + const auto tid = threadIdx.x; + const auto row = blockIdx.x; - int lane_id = threadIdx.x & 0x1f; + const auto num_sub_cols = gridDim.y; + const auto sub_col = blockIdx.y; - for (index_t row = blockIdx.x; row < num_rows; row += gridDim.x) { - index_t offset = 0; - index_t s_bit = row * num_cols; - index_t e_bit = s_bit + num_cols; - index_t l_sum = 0; + size_t s_bit = size_t(row) * num_cols + sub_col * bits_per_sub_col; + size_t e_bit = min(s_bit + bits_per_sub_col, size_t(num_cols) * (row + 1)); - int s_gap = 0; - int e_gap = 0; + nnz_t l_sum = 0; + nnz_t g_sum = 0; - while (offset < num_cols) { - index_t bitmap_idx = lane_id + (s_bit + offset) / BITS_PER_BITMAP; - std::remove_const_t l_bitmap = 0; + index_t s_offset = s_bit % BITS_PER_BITMAP; + size_t bitmap_idx = s_bit / BITS_PER_BITMAP; - if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } + if (tid == 0 && s_offset != 0) { + mutable_bitmap_t l_bitmap = bitmap[bitmap_idx]; - offset += BITS_PER_BITMAP * warpSize; + l_bitmap >>= s_offset; - s_gap = s_bit - bitmap_idx * BITS_PER_BITMAP; - if (s_gap > 0) { - l_bitmap >>= s_gap; - l_bitmap <<= s_gap; - offset -= s_gap; - } + size_t remaining_bits = min(size_t(BITS_PER_BITMAP - s_offset), e_bit - s_bit); - e_gap = (bitmap_idx + 1) * BITS_PER_BITMAP - e_bit; - if (e_gap > 0) { - l_bitmap <<= e_gap; - l_bitmap >>= e_gap; - } - l_sum += static_cast(raft::detail::popc(l_bitmap)); + if (remaining_bits < BITS_PER_BITMAP) { + l_bitmap &= ((mutable_bitmap_t(1) << remaining_bits) - 1); } + l_sum += static_cast(raft::detail::popc(l_bitmap)); + } + if (s_offset != 0) { s_bit += (BITS_PER_BITMAP - s_offset); } - l_sum = cg::reduce(tile, l_sum, cg::plus()); + for (size_t bit_idx = s_bit; bit_idx < e_bit; bit_idx += BITS_PER_BITMAP * blockDim.x) { + mutable_bitmap_t l_bitmap = 0; + bitmap_idx = bit_idx / BITS_PER_BITMAP + tid; - if (lane_id == 0) { *(nnz_per_row + row) += static_cast(l_sum); } + index_t remaining_bits = min(BITS_PER_BITMAP, index_t(e_bit - bitmap_idx * BITS_PER_BITMAP)); + + if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } + + if (remaining_bits < BITS_PER_BITMAP) { + l_bitmap &= ((mutable_bitmap_t(1) << remaining_bits) - 1); + } + l_sum += static_cast(raft::detail::popc(l_bitmap)); } + g_sum = BlockReduce(reduce_storage).Reduce(l_sum, cub::Sum()); + stg(g_sum, sub_col_nnz + sub_col + row * num_sub_cols, tid == 0); } template @@ -103,144 +111,164 @@ void calc_nnz_by_rows(raft::resources const& handle, const bitmap_t* bitmap, index_t num_rows, index_t num_cols, - nnz_t* nnz_per_row) + nnz_t* sub_col_nnz, + size_t& sub_nnz_size, + index_t& bits_per_sub_col) { - auto stream = resource::get_cuda_stream(handle); - const index_t total = num_rows * num_cols; - const index_t bitmap_num = raft::ceildiv(total, index_t(sizeof(bitmap_t) * 8)); - - int dev_id, sm_count, blocks_per_sm; + if (sub_nnz_size == 0) { + bits_per_sub_col = bitmap_to_csr_tpb * sizeof(index_t) * 8 * 8; + auto grid_dim_y = (num_cols + bits_per_sub_col - 1) / bits_per_sub_col; + sub_nnz_size = num_rows * ((num_cols + bits_per_sub_col - 1) / bits_per_sub_col); + return; + } + auto stream = resource::get_cuda_stream(handle); + const size_t total = num_rows * num_cols; + const size_t bitmap_num = + (total + index_t(sizeof(bitmap_t) * 8) - 1) / index_t(sizeof(bitmap_t) * 8); - cudaGetDevice(&dev_id); - cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev_id); - cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &blocks_per_sm, calc_nnz_by_rows_kernel, calc_nnz_by_rows_tpb, 0); + auto block_x = num_rows; + auto block_y = sub_nnz_size / num_rows; + dim3 grid(block_x, block_y, 1); - index_t max_active_blocks = sm_count * blocks_per_sm; - auto grid = std::min(max_active_blocks, raft::ceildiv(bitmap_num, index_t(calc_nnz_by_rows_tpb))); - auto block = calc_nnz_by_rows_tpb; + auto block = bitmap_to_csr_tpb; - calc_nnz_by_rows_kernel - <<>>(bitmap, num_rows, num_cols, bitmap_num, nnz_per_row); + calc_nnz_by_rows_kernel<<>>( + bitmap, num_rows, num_cols, bitmap_num, sub_col_nnz, bits_per_sub_col); RAFT_CUDA_TRY(cudaPeekAtLastError()); } -/* - Execute the exclusive_scan within one warp with no inter-warp communication. - This function calculates the exclusive prefix sum of `value` across threads within the same warp. - Each thread in the warp will end up with the sum of all the values of the threads with lower IDs - in the same warp, with the first thread always getting a sum of 0. -*/ -template -RAFT_DEVICE_INLINE_FUNCTION value_t warp_exclusive_scan(value_t value) -{ - int lane_id = threadIdx.x & 0x1f; - value_t shifted_value = __shfl_up_sync(0xffffffff, value, 1, warpSize); - if (lane_id == 0) shifted_value = 0; - - value_t sum = shifted_value; - - for (int i = 1; i < warpSize; i *= 2) { - value_t n = __shfl_up_sync(0xffffffff, sum, i, warpSize); - if (lane_id >= i) { sum += n; } - } - return sum; -} - -// Threads per block in fill_indices_by_rows_kernel. -static const constexpr int fill_indices_by_rows_tpb = 32; - template -RAFT_KERNEL __launch_bounds__(fill_indices_by_rows_tpb) +RAFT_KERNEL __launch_bounds__(bitmap_to_csr_tpb) fill_indices_by_rows_kernel(const bitmap_t* bitmap, - const index_t* indptr, - index_t num_rows, - index_t num_cols, + index_t* indptr, + size_t num_rows, + size_t num_cols, nnz_t nnz, - index_t bitmap_num, - index_t* indices) + index_t* indices, + nnz_t* sub_col_nnz, + index_t bits_per_sub_col) { - constexpr bitmap_t FULL_MASK = ~bitmap_t(0u); constexpr bitmap_t ONE = bitmap_t(1u); constexpr index_t BITS_PER_BITMAP = sizeof(bitmap_t) * 8; - int lane_id = threadIdx.x & 0x1f; + using mutable_bitmap_t = typename std::remove_const_t; + using BlockScan = cub::BlockScan; + + __shared__ typename BlockScan::TempStorage scan_storage; + + const auto tid = threadIdx.x; + const auto row = blockIdx.x; + + const auto num_sub_cols = gridDim.y; + const auto sub_col = blockIdx.y; // Ensure the HBM allocated for CSR values is sufficient to handle all non-zero bitmap bits. // An assert will trigger if the allocated HBM is insufficient when `NDEBUG` isn't defined. // Note: Assertion is active only if `NDEBUG` is undefined. if constexpr (check_nnz) { - if (lane_id == 0) { assert(nnz < indptr[num_rows]); } + if (tid == 0) { assert(nnz < sub_col_nnz[num_rows * num_sub_cols]); } } + size_t s_bit = size_t(row) * num_cols + sub_col * bits_per_sub_col; + size_t e_bit = min(s_bit + bits_per_sub_col, size_t(num_cols) * (row + 1)); + + size_t l_sum = 0; + __shared__ size_t g_sum; + + index_t s_offset = s_bit % BITS_PER_BITMAP; + size_t bitmap_idx = s_bit / BITS_PER_BITMAP; + + if (tid == 0 && row == 0 && sub_col == 0) { indptr[0] = 0; } + if (tid == 0 && sub_col == 0) { indptr[row + 1] = sub_col_nnz[(row + 1) * num_sub_cols]; } + + size_t g_nnz = sub_col_nnz[sub_col + row * num_sub_cols]; + index_t* sub_cols_indices_addr = indices + g_nnz; + + bool guard[BITS_PER_BITMAP]; + + index_t g_bits = sub_col * bits_per_sub_col + tid * BITS_PER_BITMAP; + + if (tid == 0 && s_offset != 0) { + mutable_bitmap_t l_bitmap = bitmap[bitmap_idx]; + l_bitmap >>= s_offset; + + size_t remaining_bits = min(size_t(BITS_PER_BITMAP - s_offset), e_bit - s_bit); + if (remaining_bits < BITS_PER_BITMAP) { + l_bitmap &= ((mutable_bitmap_t(1) << remaining_bits) - 1); + } + +#pragma unroll + for (int i = 0; i < BITS_PER_BITMAP; i++) { + guard[i] = l_bitmap & (ONE << i); + } #pragma unroll - for (index_t row = blockIdx.x; row < num_rows; row += gridDim.x) { - index_t g_sum = 0; - index_t s_bit = row * num_cols; - index_t e_bit = s_bit + num_cols; - index_t indptr_row = indptr[row]; + for (int i = 0; i < BITS_PER_BITMAP; i++) { + stg(index_t(i + g_bits), sub_cols_indices_addr + l_sum, guard[i]); + l_sum += guard[i]; + } + } + + if (tid == 0) { g_sum = l_sum; } + __syncthreads(); + + if (s_offset != 0) { + s_bit += (BITS_PER_BITMAP - s_offset); + g_bits += (BITS_PER_BITMAP - s_offset); + } + + for (size_t bit_idx = s_bit; bit_idx < e_bit; bit_idx += BITS_PER_BITMAP * blockDim.x) { + mutable_bitmap_t l_bitmap = 0; + bitmap_idx = bit_idx / BITS_PER_BITMAP + tid; + + if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } + + index_t remaining_bits = min(BITS_PER_BITMAP, index_t(e_bit - bitmap_idx * BITS_PER_BITMAP)); + if (remaining_bits < BITS_PER_BITMAP) { + l_bitmap &= ((mutable_bitmap_t(1) << remaining_bits) - 1); + } + + int l_bits = raft::detail::popc(l_bitmap); + int l_sum_32b = 0; + BlockScan(scan_storage).InclusiveSum(l_bits, l_sum_32b); + l_sum = l_sum_32b + g_sum - l_bits; + __syncthreads(); #pragma unroll - for (index_t offset = 0; offset < num_cols; offset += BITS_PER_BITMAP * warpSize) { - index_t bitmap_idx = lane_id + (s_bit + offset) / BITS_PER_BITMAP; - std::remove_const_t l_bitmap = 0; - index_t l_offset = offset + lane_id * BITS_PER_BITMAP - (s_bit % BITS_PER_BITMAP); - - if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } - - if (s_bit > bitmap_idx * BITS_PER_BITMAP) { - l_bitmap >>= (s_bit - bitmap_idx * BITS_PER_BITMAP); - l_bitmap <<= (s_bit - bitmap_idx * BITS_PER_BITMAP); - } - - if ((bitmap_idx + 1) * BITS_PER_BITMAP > e_bit) { - l_bitmap <<= ((bitmap_idx + 1) * BITS_PER_BITMAP - e_bit); - l_bitmap >>= ((bitmap_idx + 1) * BITS_PER_BITMAP - e_bit); - } - - index_t l_sum = - g_sum + warp_exclusive_scan(static_cast(raft::detail::popc(l_bitmap))); - - for (int i = 0; i < BITS_PER_BITMAP; i++) { - if (l_bitmap & (ONE << i)) { - indices[indptr_row + l_sum] = l_offset + i; - l_sum++; - } - } - g_sum = __shfl_sync(0xffffffff, l_sum, warpSize - 1); + for (int i = 0; i < BITS_PER_BITMAP; i++) { + guard[i] = l_bitmap & (ONE << i); } +#pragma unroll + for (int i = 0; i < BITS_PER_BITMAP; i++) { + stg(index_t(i + g_bits), sub_cols_indices_addr + l_sum, guard[i]); + l_sum += guard[i]; + } + + if (threadIdx.x == (bitmap_to_csr_tpb - 1)) { g_sum += (l_sum_32b); } + g_bits += BITS_PER_BITMAP * blockDim.x; } } template void fill_indices_by_rows(raft::resources const& handle, const bitmap_t* bitmap, - const index_t* indptr, + index_t* indptr, index_t num_rows, index_t num_cols, nnz_t nnz, - index_t* indices) + index_t* indices, + nnz_t* sub_col_nnz, + index_t bits_per_sub_col, + size_t sub_nnz_size) { - auto stream = resource::get_cuda_stream(handle); - const index_t total = num_rows * num_cols; - const index_t bitmap_num = raft::ceildiv(total, index_t(sizeof(bitmap_t) * 8)); - - int dev_id, sm_count, blocks_per_sm; - - cudaGetDevice(&dev_id); - cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev_id); - cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &blocks_per_sm, - fill_indices_by_rows_kernel, - fill_indices_by_rows_tpb, - 0); - - index_t max_active_blocks = sm_count * blocks_per_sm; - auto grid = std::min(max_active_blocks, num_rows); - auto block = fill_indices_by_rows_tpb; - - fill_indices_by_rows_kernel - <<>>(bitmap, indptr, num_rows, num_cols, nnz, bitmap_num, indices); + auto stream = resource::get_cuda_stream(handle); + auto block_x = num_rows; + auto block_y = sub_nnz_size / num_rows; + dim3 grid(block_x, block_y, 1); + + auto block = bitmap_to_csr_tpb; + + fill_indices_by_rows_kernel<<>>( + bitmap, indptr, num_rows, num_cols, nnz, indices, sub_col_nnz, bits_per_sub_col); RAFT_CUDA_TRY(cudaPeekAtLastError()); } @@ -252,12 +280,9 @@ void bitmap_to_csr(raft::resources const& handle, raft::core::bitmap_view bitmap, csr_matrix_t& csr) { + using nnz_t = typename csr_matrix_t::nnz_type; auto csr_view = csr.structure_view(); - if (csr_view.get_n_rows() == 0 || csr_view.get_n_cols() == 0 || csr_view.get_nnz() == 0) { - return; - } - RAFT_EXPECTS(bitmap.get_n_rows() == csr_view.get_n_rows(), "Number of rows in bitmap must be equal to " "number of rows in csr"); @@ -266,6 +291,8 @@ void bitmap_to_csr(raft::resources const& handle, "Number of columns in bitmap must be equal to " "number of columns in csr"); + if (csr_view.get_n_rows() == 0 || csr_view.get_n_cols() == 0) { return; } + auto thrust_policy = resource::get_thrust_policy(handle); auto stream = resource::get_cuda_stream(handle); @@ -274,25 +301,52 @@ void bitmap_to_csr(raft::resources const& handle, RAFT_CUDA_TRY(cudaMemsetAsync(indptr, 0, (csr_view.get_n_rows() + 1) * sizeof(index_t), stream)); - calc_nnz_by_rows(handle, bitmap.data(), csr_view.get_n_rows(), csr_view.get_n_cols(), indptr); - thrust::exclusive_scan(thrust_policy, indptr, indptr + csr_view.get_n_rows() + 1, indptr); + size_t sub_nnz_size = 0; + index_t bits_per_sub_col = 0; + + // Get buffer size and number of bits per each sub-columns + calc_nnz_by_rows(handle, + bitmap.data(), + csr_view.get_n_rows(), + csr_view.get_n_cols(), + static_cast(nullptr), + sub_nnz_size, + bits_per_sub_col); + + rmm::device_async_resource_ref device_memory = resource::get_workspace_resource(handle); + rmm::device_uvector sub_nnz(sub_nnz_size + 1, stream, device_memory); + + calc_nnz_by_rows(handle, + bitmap.data(), + csr_view.get_n_rows(), + csr_view.get_n_cols(), + sub_nnz.data(), + sub_nnz_size, + bits_per_sub_col); + + thrust::exclusive_scan( + thrust_policy, sub_nnz.data(), sub_nnz.data() + sub_nnz_size + 1, sub_nnz.data()); if constexpr (is_device_csr_sparsity_owning_v) { - index_t nnz = 0; + nnz_t nnz = 0; RAFT_CUDA_TRY(cudaMemcpyAsync( - &nnz, indptr + csr_view.get_n_rows(), sizeof(index_t), cudaMemcpyDeviceToHost, stream)); + &nnz, sub_nnz.data() + sub_nnz_size, sizeof(nnz_t), cudaMemcpyDeviceToHost, stream)); resource::sync_stream(handle); csr.initialize_sparsity(nnz); + if (nnz == 0) return; } + constexpr bool check_nnz = is_device_csr_sparsity_preserving_v; - fill_indices_by_rows( - handle, - bitmap.data(), - indptr, - csr_view.get_n_rows(), - csr_view.get_n_cols(), - csr_view.get_nnz(), - indices); + fill_indices_by_rows(handle, + bitmap.data(), + indptr, + csr_view.get_n_rows(), + csr_view.get_n_cols(), + csr_view.get_nnz(), + indices, + sub_nnz.data(), + bits_per_sub_col, + sub_nnz_size); thrust::fill_n(thrust_policy, csr.get_elements().data(), diff --git a/cpp/include/raft/sparse/convert/detail/bitset_to_csr.cuh b/cpp/include/raft/sparse/convert/detail/bitset_to_csr.cuh new file mode 100644 index 0000000000..b3b341d793 --- /dev/null +++ b/cpp/include/raft/sparse/convert/detail/bitset_to_csr.cuh @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include // detail::popc +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace raft { +namespace sparse { +namespace convert { +namespace detail { + +template +RAFT_KERNEL repeat_csr_kernel(const index_t* indptr, + const index_t* indices, + index_t* repeated_indptr, + index_t* repeated_indices, + nnz_t nnz, + index_t repeat_count) +{ + int global_id = blockIdx.x * blockDim.x + threadIdx.x; + bool guard = global_id < nnz; + index_t* repeated_indices_addr = repeated_indices + global_id; + + for (index_t i = global_id; i < repeat_count; i += gridDim.x * blockDim.x) { + repeated_indptr[i] = (i + 2) * nnz; + } + + __syncthreads(); + + index_t item; + item = (global_id < nnz) ? indices[global_id] : -1; + + __syncthreads(); + + for (index_t row = 0; row < repeat_count; ++row) { + index_t start_offset = row * nnz; + if (guard) { repeated_indices_addr[start_offset] = item; } + } +} + +template +void gpu_repeat_csr(raft::resources const& handle, + const index_t* d_indptr, + const index_t* d_indices, + nnz_t nnz, + index_t repeat_count, + index_t* d_repeated_indptr, + index_t* d_repeated_indices) +{ + if (nnz == 0) return; + + auto stream = resource::get_cuda_stream(handle); + index_t repeat_csr_tpb = 256; + index_t grid = (nnz + repeat_csr_tpb - 1) / (repeat_csr_tpb); + + repeat_csr_kernel<<>>( + d_indptr, d_indices, d_repeated_indptr, d_repeated_indices, nnz, repeat_count); +} + +template >> +void bitset_to_csr(raft::resources const& handle, + raft::core::bitset_view bitset, + csr_matrix_t& csr) +{ + using row_t = typename csr_matrix_t::row_type; + using nnz_t = typename csr_matrix_t::nnz_type; + + auto csr_view = csr.structure_view(); + + RAFT_EXPECTS(bitset.size() == csr_view.get_n_cols(), + "Number of size in bitset must be equal to " + "number of columns in csr"); + if (csr_view.get_n_rows() == 0 || csr_view.get_n_cols() == 0) { return; } + + auto thrust_policy = resource::get_thrust_policy(handle); + auto stream = resource::get_cuda_stream(handle); + + index_t* indptr = csr_view.get_indptr().data(); + index_t* indices = csr_view.get_indices().data(); + + RAFT_CUDA_TRY(cudaMemsetAsync(indptr, 0, (csr_view.get_n_rows() + 1) * sizeof(index_t), stream)); + + size_t sub_nnz_size = 0; + index_t bits_per_sub_col = 0; + + // Get buffer size and number of bits per each sub-columns + calc_nnz_by_rows(handle, + bitset.data(), + row_t(1), + csr_view.get_n_cols(), + static_cast(nullptr), + sub_nnz_size, + bits_per_sub_col); + + rmm::device_async_resource_ref device_memory = resource::get_workspace_resource(handle); + rmm::device_uvector sub_nnz(sub_nnz_size + 1, stream, device_memory); + + calc_nnz_by_rows(handle, + bitset.data(), + row_t(1), + csr_view.get_n_cols(), + sub_nnz.data(), + sub_nnz_size, + bits_per_sub_col); + + thrust::exclusive_scan( + thrust_policy, sub_nnz.data(), sub_nnz.data() + sub_nnz_size + 1, sub_nnz.data()); + + nnz_t bitset_nnz = 0; + if constexpr (is_device_csr_sparsity_owning_v) { + RAFT_CUDA_TRY(cudaMemcpyAsync( + &bitset_nnz, sub_nnz.data() + sub_nnz_size, sizeof(nnz_t), cudaMemcpyDeviceToHost, stream)); + resource::sync_stream(handle); + csr.initialize_sparsity(bitset_nnz * csr_view.get_n_rows()); + if (bitset_nnz == 0) return; + } else { + bitset_nnz = csr_view.get_nnz() / csr_view.get_n_rows(); + } + + constexpr bool check_nnz = is_device_csr_sparsity_preserving_v; + fill_indices_by_rows(handle, + bitset.data(), + indptr, + 1, + csr_view.get_n_cols(), + csr_view.get_nnz(), + indices, + sub_nnz.data(), + bits_per_sub_col, + sub_nnz_size); + if (csr_view.get_n_rows() > 1) { + gpu_repeat_csr(handle, + indptr, + indices, + bitset_nnz, + csr_view.get_n_rows() - 1, + indptr + 2, + indices + bitset_nnz); + } + + thrust::fill_n(thrust_policy, + csr.get_elements().data(), + csr_view.get_nnz(), + typename csr_matrix_t::element_type(1)); +} + +}; // end NAMESPACE detail +}; // end NAMESPACE convert +}; // end NAMESPACE sparse +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/convert/detail/coo.cuh b/cpp/include/raft/sparse/convert/detail/coo.cuh index 0a498bb1ca..469dac3c86 100644 --- a/cpp/include/raft/sparse/convert/detail/coo.cuh +++ b/cpp/include/raft/sparse/convert/detail/coo.cuh @@ -76,4 +76,4 @@ void csr_to_coo( }; // end NAMESPACE detail }; // end NAMESPACE convert }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh index e60e494d34..ec3d0ec1c3 100644 --- a/cpp/include/raft/sparse/convert/detail/dense.cuh +++ b/cpp/include/raft/sparse/convert/detail/dense.cuh @@ -141,4 +141,4 @@ void csr_to_dense(cusparseHandle_t handle, }; // namespace detail }; // end NAMESPACE convert }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/detail/coo.cuh b/cpp/include/raft/sparse/detail/coo.cuh index 91ba363168..9a38c11a07 100644 --- a/cpp/include/raft/sparse/detail/coo.cuh +++ b/cpp/include/raft/sparse/detail/coo.cuh @@ -182,7 +182,7 @@ class COO { * @param n_rows: number of rows in the dense matrix * @param n_cols: number of columns in the dense matrix */ - void setSize(int n_rows, int n_cols) + void setSize(Index_Type n_rows, Index_Type n_cols) { this->n_rows = n_rows; this->n_cols = n_cols; @@ -192,7 +192,7 @@ class COO { * @brief Set the number of rows and cols for a square dense matrix * @param n: number of rows and cols */ - void setSize(int n) + void setSize(Index_Type n) { this->n_rows = n; this->n_cols = n; @@ -204,7 +204,10 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - void allocate(int nnz, bool init, cudaStream_t stream) { this->allocate(nnz, 0, init, stream); } + void allocate(Index_Type nnz, bool init, cudaStream_t stream) + { + this->allocate(nnz, 0, init, stream); + } /** * @brief Allocate the underlying arrays @@ -213,7 +216,7 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - void allocate(int nnz, int size, bool init, cudaStream_t stream) + void allocate(Index_Type nnz, Index_Type size, bool init, cudaStream_t stream) { this->allocate(nnz, size, size, init, stream); } @@ -226,7 +229,8 @@ class COO { * @param init: should values be initialized to 0? * @param stream: stream to use for init */ - void allocate(int nnz, int n_rows, int n_cols, bool init, cudaStream_t stream) + void allocate( + Index_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) { this->n_rows = n_rows; this->n_cols = n_cols; diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h index e7d81f51aa..d5262581a3 100644 --- a/cpp/include/raft/sparse/detail/cusparse_macros.h +++ b/cpp/include/raft/sparse/detail/cusparse_macros.h @@ -20,4 +20,4 @@ #pragma once -#include \ No newline at end of file +#include diff --git a/cpp/include/raft/sparse/distance/detail/common.hpp b/cpp/include/raft/sparse/distance/detail/common.hpp index 0f463dac80..19fe9c1786 100644 --- a/cpp/include/raft/sparse/distance/detail/common.hpp +++ b/cpp/include/raft/sparse/distance/detail/common.hpp @@ -56,4 +56,4 @@ class distances_t { }; // namespace detail }; // namespace distance }; // namespace sparse -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/coo_mask_row_iterators.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/coo_mask_row_iterators.cuh index 38aa106d78..59cfcfa186 100644 --- a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/coo_mask_row_iterators.cuh +++ b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/coo_mask_row_iterators.cuh @@ -229,4 +229,4 @@ class chunked_mask_row_it : public mask_row_it { } // namespace detail } // namespace distance } // namespace sparse -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/dense_smem_strategy.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/dense_smem_strategy.cuh index 5a1c152bd0..4a075cf530 100644 --- a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/dense_smem_strategy.cuh +++ b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/dense_smem_strategy.cuh @@ -116,4 +116,4 @@ class dense_smem_strategy : public coo_spmv_strategy { } // namespace detail } // namespace distance } // namespace sparse -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/sparse/distance/distance.cuh b/cpp/include/raft/sparse/distance/distance.cuh index ead44f0c51..5bcd1ff005 100644 --- a/cpp/include/raft/sparse/distance/distance.cuh +++ b/cpp/include/raft/sparse/distance/distance.cuh @@ -221,4 +221,4 @@ void pairwise_distance(raft::resources const& handle, }; // namespace sparse }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/add.cuh b/cpp/include/raft/sparse/linalg/add.cuh index def305afb2..a97b935f58 100644 --- a/cpp/include/raft/sparse/linalg/add.cuh +++ b/cpp/include/raft/sparse/linalg/add.cuh @@ -96,4 +96,4 @@ void csr_add_finalize(const int* a_ind, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index 57c9b986b4..8ac97259da 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -120,4 +120,4 @@ void coo_degree_nz(COO* in, int* results, cudaStream_t stream) }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/detail/masked_matmul.cuh b/cpp/include/raft/sparse/linalg/detail/masked_matmul.cuh index 276960628d..bfffa413b2 100644 --- a/cpp/include/raft/sparse/linalg/detail/masked_matmul.cuh +++ b/cpp/include/raft/sparse/linalg/detail/masked_matmul.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -41,7 +42,7 @@ template & A, raft::device_matrix_view& B, - raft::core::bitmap_view& mask, + raft::core::bitmap_view& mask, raft::device_csr_matrix_view& C, std::optional> alpha, std::optional> beta) @@ -100,6 +101,69 @@ void masked_matmul(raft::resources const& handle, } } +template +void masked_matmul(raft::resources const& handle, + raft::device_matrix_view& A, + raft::device_matrix_view& B, + raft::core::bitset_view& mask, + raft::device_csr_matrix_view& C, + std::optional> alpha, + std::optional> beta) +{ + index_t m = A.extent(0); + index_t n = B.extent(0); + index_t dim = A.extent(1); + + auto compressed_C_view = C.structure_view(); + + RAFT_EXPECTS(A.extent(1) == B.extent(1), "The dim of A must be equal to the dim of B."); + RAFT_EXPECTS(A.extent(0) == compressed_C_view.get_n_rows(), + "Number of rows in C must match the number of rows in A."); + RAFT_EXPECTS(B.extent(0) == compressed_C_view.get_n_cols(), + "Number of columns in C must match the number of columns in B."); + + auto stream = raft::resource::get_cuda_stream(handle); + + auto C_matrix = raft::make_device_csr_matrix(handle, compressed_C_view); + + // fill C + raft::sparse::convert::bitset_to_csr(handle, mask, C_matrix); + + if (m > 10 || alpha.has_value() || beta.has_value()) { + auto C_view = raft::make_device_csr_matrix_view( + C.get_elements().data(), compressed_C_view); + + // create B col_major view + auto B_col_major = raft::make_device_matrix_view( + B.data_handle(), dim, n); + + output_t default_alpha = static_cast(1.0f); + output_t default_beta = static_cast(0.0f); + + if (!alpha.has_value()) { alpha = raft::make_host_scalar_view(&default_alpha); } + if (!beta.has_value()) { beta = raft::make_host_scalar_view(&default_beta); } + + raft::sparse::linalg::sddmm(handle, + A, + B_col_major, + C_view, + raft::linalg::Operation::NON_TRANSPOSE, + raft::linalg::Operation::NON_TRANSPOSE, + *alpha, + *beta); + } else { + raft::sparse::distance::detail::faster_dot_on_csr(handle, + C.get_elements().data(), + compressed_C_view.get_nnz(), + compressed_C_view.get_indptr().data(), + compressed_C_view.get_indices().data(), + A.data_handle(), + B.data_handle(), + compressed_C_view.get_n_rows(), + dim); + } +} + } // namespace detail } // namespace linalg } // namespace sparse diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 3702111f83..2619048388 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -232,4 +232,4 @@ void rowNormCsrCaller(const IdxType* ia, }; // end NAMESPACE detail }; // end NAMESPACE linalg }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/linalg/detail/transpose.h b/cpp/include/raft/sparse/linalg/detail/transpose.h index 3a646b9a6e..579ee88d38 100644 --- a/cpp/include/raft/sparse/linalg/detail/transpose.h +++ b/cpp/include/raft/sparse/linalg/detail/transpose.h @@ -107,4 +107,4 @@ void csr_transpose(cusparseHandle_t handle, }; // end NAMESPACE detail }; // end NAMESPACE linalg }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/linalg/masked_matmul.cuh b/cpp/include/raft/sparse/linalg/masked_matmul.cuh new file mode 100644 index 0000000000..c33a1afd43 --- /dev/null +++ b/cpp/include/raft/sparse/linalg/masked_matmul.cuh @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain A copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace raft { +namespace sparse { +namespace linalg { + +/** + * @defgroup masked_matmul Masked Matrix Multiplication + * @{ + */ + +/** + * @brief Performs a masked multiplication of dense matrices A and B, followed by an element-wise + * multiplication with the sparsity pattern defined by the mask, resulting in the computation + * C = alpha * ((A * B) ∘ spy(mask)) + beta * C. + * + * This function multiplies two dense matrices A and B, and then applies an element-wise + * multiplication using the sparsity pattern provided by the mask. The result is scaled by alpha + * and added to beta times the original matrix C. + * + * @tparam value_t Data type of elements in the input matrices (e.g., half, float, double) + * @tparam output_t Data type of elements in the output matrices (e.g., float, double) + * @tparam index_t Type used for matrix indices + * @tparam nnz_t Type used for the number of non-zero entries in CSR format + * @tparam bitmap_t Type of the bitmap used for the mask + * + * @param[in] handle RAFT handle for resource management + * @param[in] A Input dense matrix (device_matrix_view) with shape [m, k] + * @param[in] B Input dense matrix (device_matrix_view) with shape [n, k] + * @param[in] mask Bitmap view representing the sparsity pattern (bitmap_view) with logical shape + * [m, n]. Each bit in the mask indicates whether the corresponding element pair in A and B is + * included (1) or masked out (0). + * @param[inout] C Output sparse matrix in CSR format (device_csr_matrix_view) with dense shape [m, + * n] + * @param[in] alpha Optional scalar multiplier for the product of A and B (default: 1.0 if + * std::nullopt) + * @param[in] beta Optional scalar multiplier for the original matrix C (default: 0 if std::nullopt) + */ +template +void masked_matmul(raft::resources const& handle, + raft::device_matrix_view A, + raft::device_matrix_view B, + raft::core::bitmap_view mask, + raft::device_csr_matrix_view C, + std::optional> alpha = std::nullopt, + std::optional> beta = std::nullopt) +{ + detail::masked_matmul(handle, A, B, mask, C, alpha, beta); +} + +/** + * @brief Computes a sparse matrix product with a masked sparsity pattern and scaling. + * + * This function computes the result of: + * C = alpha * ((A * B) ∘ spy(mask)) + beta * C + * where: + * - A and B are dense input matrices. + * - "mask" defines the sparsity pattern for element-wise multiplication. + * - The result is scaled by alpha and added to beta times the original C. + * + * **Special behavior of the mask**: + * - The `bitset` mask represents a single row of data, with its bits indicating whether + * each corresponding element in (A * B) is included (1) or masked out (0). + * - If the output CSR matrix `C` has multiple rows, the `bitset` is logically repeated + * across all rows of `C`. For example, if `C` has `n_rows` rows, the same `bitset` + * pattern is applied to all rows. + * + * @tparam value_t Data type of input matrix elements (e.g., half, float, double). + * @tparam output_t Data type of output matrix elements (e.g., float, double). + * @tparam index_t Type for matrix indices. + * @tparam nnz_t Type for non-zero entries in CSR format. + * @tparam bitset_t Type for the bitset mask. + * + * @param[in] handle RAFT handle for managing resources. + * @param[in] A Dense input matrix [m, k] (row-major). + * @param[in] B Dense input matrix [n, k] (row-major). + * @param[in] mask Bitmap view representing a single row [1, n], where each bit + * indicates if the corresponding element in (A * B) is included (1) + * or masked out (0). The pattern is repeated for all rows of `C`. + * @param[inout] C Output sparse matrix in CSR format [m, n]. + * @param[in] alpha Scalar multiplier for (A * B) (default: 1.0 if std::nullopt). + * @param[in] beta Scalar multiplier for the initial C (default: 0 if std::nullopt). + */ +template +void masked_matmul(raft::resources const& handle, + raft::device_matrix_view A, + raft::device_matrix_view B, + raft::core::bitset_view mask, + raft::device_csr_matrix_view C, + std::optional> alpha = std::nullopt, + std::optional> beta = std::nullopt) +{ + detail::masked_matmul(handle, A, B, mask, C, alpha, beta); +} + +/** @} */ // end of masked_matmul + +} // end namespace linalg +} // end namespace sparse +} // end namespace raft diff --git a/cpp/include/raft/sparse/linalg/masked_matmul.hpp b/cpp/include/raft/sparse/linalg/masked_matmul.hpp index 6cf6e834b9..32322b90f6 100644 --- a/cpp/include/raft/sparse/linalg/masked_matmul.hpp +++ b/cpp/include/raft/sparse/linalg/masked_matmul.hpp @@ -13,60 +13,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#pragma once - -#include - -namespace raft { -namespace sparse { -namespace linalg { - /** - * @defgroup masked_matmul Masked Matrix Multiplication - * @{ + * This file is deprecated and will be removed in future release. + * Please use the cuh version instead. */ /** - * @brief Performs a masked multiplication of dense matrices A and B, followed by an element-wise - * multiplication with the sparsity pattern defined by the mask, resulting in the computation - * C = alpha * ((A * B) ∘ spy(mask)) + beta * C. - * - * This function multiplies two dense matrices A and B, and then applies an element-wise - * multiplication using the sparsity pattern provided by the mask. The result is scaled by alpha - * and added to beta times the original matrix C. - * - * @tparam value_t Data type of elements in the input matrices (e.g., half, float, double) - * @tparam output_t Data type of elements in the output matrices (e.g., float, double) - * @tparam index_t Type used for matrix indices - * @tparam nnz_t Type used for the number of non-zero entries in CSR format - * @tparam bitmap_t Type of the bitmap used for the mask - * - * @param[in] handle RAFT handle for resource management - * @param[in] A Input dense matrix (device_matrix_view) with shape [m, k] - * @param[in] B Input dense matrix (device_matrix_view) with shape [n, k] - * @param[in] mask Bitmap view representing the sparsity pattern (bitmap_view) with logical shape - * [m, n]. Each bit in the mask indicates whether the corresponding element pair in A and B is - * included (1) or masked out (0). - * @param[inout] C Output sparse matrix in CSR format (device_csr_matrix_view) with dense shape [m, - * n] - * @param[in] alpha Optional scalar multiplier for the product of A and B (default: 1.0 if - * std::nullopt) - * @param[in] beta Optional scalar multiplier for the original matrix C (default: 0 if std::nullopt) + * DISCLAIMER: this file is deprecated: use masked_matmul.cuh instead */ -template -void masked_matmul(raft::resources const& handle, - raft::device_matrix_view A, - raft::device_matrix_view B, - raft::core::bitmap_view mask, - raft::device_csr_matrix_view C, - std::optional> alpha = std::nullopt, - std::optional> beta = std::nullopt) -{ - detail::masked_matmul(handle, A, B, mask, C, alpha, beta); -} -/** @} */ // end of masked_matmul +#pragma once + +#ifndef RAFT_HIDE_DEPRECATION_WARNINGS +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") +#endif -} // end namespace linalg -} // end namespace sparse -} // end namespace raft +#include diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index 43dd182fe5..7adf245abc 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -104,4 +104,4 @@ void rowNormCsr(raft::resources const& handle, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/spectral.cuh b/cpp/include/raft/sparse/linalg/spectral.cuh index 4c0595bf91..276a64c125 100644 --- a/cpp/include/raft/sparse/linalg/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/spectral.cuh @@ -40,4 +40,4 @@ void fit_embedding(raft::resources const& handle, }; // namespace sparse }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/symmetrize.cuh b/cpp/include/raft/sparse/linalg/symmetrize.cuh index 1de8d5b426..8ee53cd3ae 100644 --- a/cpp/include/raft/sparse/linalg/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/symmetrize.cuh @@ -165,4 +165,4 @@ void symmetrize(raft::resources const& handle, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/transpose.cuh b/cpp/include/raft/sparse/linalg/transpose.cuh index 4333060ad9..304cbf4936 100644 --- a/cpp/include/raft/sparse/linalg/transpose.cuh +++ b/cpp/include/raft/sparse/linalg/transpose.cuh @@ -68,4 +68,4 @@ void csr_transpose(raft::resources const& handle, }; // end NAMESPACE linalg }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh b/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh index c94c6254c3..ed4aa4c98f 100644 --- a/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh +++ b/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh @@ -96,4 +96,4 @@ void cross_component_nn( metric); } -}; // end namespace raft::sparse::neighbors \ No newline at end of file +}; // end namespace raft::sparse::neighbors diff --git a/cpp/include/raft/sparse/op/filter.cuh b/cpp/include/raft/sparse/op/filter.cuh index c64c05ae4e..4b329325ca 100644 --- a/cpp/include/raft/sparse/op/filter.cuh +++ b/cpp/include/raft/sparse/op/filter.cuh @@ -91,4 +91,4 @@ void coo_remove_zeros(COO* in, COO* out, cudaStream_t stream) }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/op/reduce.cuh b/cpp/include/raft/sparse/op/reduce.cuh index 52f1d3b239..b03192f111 100644 --- a/cpp/include/raft/sparse/op/reduce.cuh +++ b/cpp/include/raft/sparse/op/reduce.cuh @@ -84,4 +84,4 @@ void max_duplicates(raft::resources const& handle, }; // END namespace sparse }; // END namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/op/row_op.cuh b/cpp/include/raft/sparse/op/row_op.cuh index a799093226..b8d5a49d9f 100644 --- a/cpp/include/raft/sparse/op/row_op.cuh +++ b/cpp/include/raft/sparse/op/row_op.cuh @@ -45,4 +45,4 @@ void csr_row_op(const Index_* row_ind, Index_ n_rows, Index_ nnz, Lambda op, cud }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/op/slice.cuh b/cpp/include/raft/sparse/op/slice.cuh index 2da6dad4fc..e8a456d23e 100644 --- a/cpp/include/raft/sparse/op/slice.cuh +++ b/cpp/include/raft/sparse/op/slice.cuh @@ -78,4 +78,4 @@ void csr_row_slice_populate(value_idx start_offset, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 02a77a0d99..ddfa01731a 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -624,7 +624,7 @@ static int lanczosRestart(raft::resources const& handle, value_type_t* shifts_host; // Orthonormal matrix for similarity transform - value_type_t* V_dev = work_dev + n * iter; + value_type_t* V_dev = work_dev + (size_t)n * (size_t)iter; // ------------------------------------------------------- // Implementation @@ -641,7 +641,7 @@ static int lanczosRestart(raft::resources const& handle, // std::cout < 0 && nEigVecs <= n, "Invalid number of eigenvectors."); + RAFT_EXPECTS(nEigVecs > 0 && (size_t)nEigVecs <= n, "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter."); @@ -1395,10 +1398,10 @@ int computeLargestEigenvectors( unsigned long long seed = 123456) { // Matrix dimension - index_type_t n = A.nrows_; + size_t n = A.nrows_; // Check that parameters are valid - RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors."); + RAFT_EXPECTS(nEigVecs > 0 && (size_t)nEigVecs <= n, "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter."); diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index fed31e6a9c..4c45a28cc6 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -230,4 +230,4 @@ int computeLargestEigenvectors( } // namespace raft::sparse::solver -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/spectral/cluster_solvers.cuh b/cpp/include/raft/spectral/cluster_solvers.cuh index b693ac4af3..c273808cf8 100644 --- a/cpp/include/raft/spectral/cluster_solvers.cuh +++ b/cpp/include/raft/spectral/cluster_solvers.cuh @@ -97,4 +97,4 @@ struct kmeans_solver_t { } // namespace spectral } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh b/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh index 40b0324548..139df1d27f 100644 --- a/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh +++ b/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh @@ -87,4 +87,4 @@ struct kmeans_solver_deprecated_t { } // namespace spectral } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp index 1fe078bd32..db8a5dc9ef 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp @@ -39,14 +39,14 @@ // ========================================================= // Get index of matrix entry -#define IDX(i, j, lda) ((i) + (j) * (lda)) +#define IDX(i, j, lda) ((size_t)(i) + (j) * (lda)) namespace raft { namespace spectral { namespace matrix { namespace detail { -using size_type = int; // for now; TODO: move it in appropriate header +using size_type = size_t; // for now; TODO: move it in appropriate header // Apply diagonal matrix to vector: // @@ -326,7 +326,7 @@ struct laplacian_matrix_t : sparse_matrix_t { raft_handle, row_offsets, col_indices, values, nrows, nnz), diagonal_(raft_handle, nrows) { - vector_t ones{raft_handle, nrows}; + vector_t ones{raft_handle, (size_t)nrows}; ones.fill(1.0); sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); } @@ -341,7 +341,7 @@ struct laplacian_matrix_t : sparse_matrix_t { csr_m.nnz_), diagonal_(raft_handle, csr_m.nrows_) { - vector_t ones{raft_handle, csr_m.nrows_}; + vector_t ones{raft_handle, (size_t)csr_m.nrows_}; ones.fill(1.0); sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); } diff --git a/cpp/include/raft/spectral/modularity_maximization.cuh b/cpp/include/raft/spectral/modularity_maximization.cuh index ab1398a2a1..6514f7ef21 100644 --- a/cpp/include/raft/spectral/modularity_maximization.cuh +++ b/cpp/include/raft/spectral/modularity_maximization.cuh @@ -83,4 +83,4 @@ void analyzeModularity(raft::resources const& handle, } // namespace spectral } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/spectral/partition.cuh b/cpp/include/raft/spectral/partition.cuh index f7ea456ac5..a2ac328aa1 100644 --- a/cpp/include/raft/spectral/partition.cuh +++ b/cpp/include/raft/spectral/partition.cuh @@ -92,4 +92,4 @@ void analyzePartition(raft::resources const& handle, } // namespace spectral } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index 6625d38a7a..0b352e185b 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -75,4 +75,4 @@ float accuracy(raft::resources const& handle, } // namespace stats } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 1f97cd5f76..6822e069a2 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -86,4 +86,4 @@ double adjusted_rand_index(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index b669e0de32..f4667b37dc 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -88,4 +88,4 @@ double completeness_score(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 16f0998435..03fa0d4924 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -214,4 +214,4 @@ void contingency_matrix(Args... args) }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index ad5d233c0e..096ec4bc1c 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -119,4 +119,4 @@ void cov(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/detail/mean.cuh b/cpp/include/raft/stats/detail/mean.cuh index ee39c87a68..1262d538c8 100644 --- a/cpp/include/raft/stats/detail/mean.cuh +++ b/cpp/include/raft/stats/detail/mean.cuh @@ -27,7 +27,25 @@ namespace stats { namespace detail { template -void mean( +void mean(Type* mu, const Type* data, IdxType D, IdxType N, bool rowMajor, cudaStream_t stream) +{ + Type ratio = Type(1) / Type(N); + raft::linalg::reduce(mu, + data, + D, + N, + Type(0), + rowMajor, + false, + stream, + false, + raft::identity_op(), + raft::add_op(), + raft::mul_const_op(ratio)); +} + +template +[[deprecated]] void mean( Type* mu, const Type* data, IdxType D, IdxType N, bool sample, bool rowMajor, cudaStream_t stream) { Type ratio = Type(1) / ((sample) ? Type(N - 1) : Type(N)); @@ -47,4 +65,4 @@ void mean( } // namespace detail } // namespace stats -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/stats/detail/scores.cuh b/cpp/include/raft/stats/detail/scores.cuh index 947df6848a..66951f52ab 100644 --- a/cpp/include/raft/stats/detail/scores.cuh +++ b/cpp/include/raft/stats/detail/scores.cuh @@ -59,7 +59,7 @@ math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) { rmm::device_scalar y_bar(stream); - raft::stats::mean(y_bar.data(), y, 1, n, false, false, stream); + raft::stats::mean(y_bar.data(), y, 1, n, false, stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); rmm::device_uvector sse_arr(n, stream); diff --git a/cpp/include/raft/stats/detail/stddev.cuh b/cpp/include/raft/stats/detail/stddev.cuh index 4c861b49fb..c758584ec9 100644 --- a/cpp/include/raft/stats/detail/stddev.cuh +++ b/cpp/include/raft/stats/detail/stddev.cuh @@ -120,4 +120,4 @@ void vars(Type* var, } // namespace detail } // namespace stats -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/stats/detail/sum.cuh b/cpp/include/raft/stats/detail/sum.cuh index 39bd2c3b6c..4f5438b133 100644 --- a/cpp/include/raft/stats/detail/sum.cuh +++ b/cpp/include/raft/stats/detail/sum.cuh @@ -34,4 +34,4 @@ void sum(Type* output, const Type* input, IdxType D, IdxType N, bool rowMajor, c } // namespace detail } // namespace stats -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/stats/detail/weighted_mean.cuh b/cpp/include/raft/stats/detail/weighted_mean.cuh index ada0995f7d..9b96ed5949 100644 --- a/cpp/include/raft/stats/detail/weighted_mean.cuh +++ b/cpp/include/raft/stats/detail/weighted_mean.cuh @@ -72,4 +72,4 @@ void weightedMean(Type* mu, } }; // end namespace detail }; // end namespace stats -}; // end namespace raft \ No newline at end of file +}; // end namespace raft diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index ded7c8178b..444cc04bca 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -131,4 +131,4 @@ value_t cluster_dispersion( } // end namespace stats } // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index fe432569ee..a0c6ae5bdb 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -83,4 +83,4 @@ double entropy(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index 311cd599f8..3095d2c724 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -91,4 +91,4 @@ double homogeneity_score(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 43d39cfd6c..b76b945400 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,27 @@ namespace stats { * @param data: the input matrix * @param D: number of columns of data * @param N: number of rows of data + * @param rowMajor: whether the input data is row or col major + * @param stream: cuda stream + */ +template +void mean(Type* mu, const Type* data, IdxType D, IdxType N, bool rowMajor, cudaStream_t stream) +{ + detail::mean(mu, data, D, N, rowMajor, stream); +} + +/** + * @brief Compute mean of the input matrix + * + * Mean operation is assumed to be performed on a given column. + * Note: This call is deprecated, please use `mean` call without `sample` parameter. + * + * @tparam Type: the data type + * @tparam IdxType Integer type used to for addressing + * @param mu: the output mean vector + * @param data: the input matrix + * @param D: number of columns of data + * @param N: number of rows of data * @param sample: whether to evaluate sample mean or not. In other words, * whether * to normalize the output using N-1 or N, for true or false, respectively @@ -45,7 +66,7 @@ namespace stats { * @param stream: cuda stream */ template -void mean( +[[deprecated("'sample' parameter deprecated")]] void mean( Type* mu, const Type* data, IdxType D, IdxType N, bool sample, bool rowMajor, cudaStream_t stream) { detail::mean(mu, data, D, N, sample, rowMajor, stream); @@ -67,14 +88,47 @@ void mean( * @param[in] handle the raft handle * @param[in] data: the input matrix * @param[out] mu: the output mean vector - * @param[in] sample: whether to evaluate sample mean or not. In other words, whether - * to normalize the output using N-1 or N, for true or false, respectively */ template void mean(raft::resources const& handle, raft::device_matrix_view data, - raft::device_vector_view mu, - bool sample) + raft::device_vector_view mu) +{ + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); + RAFT_EXPECTS(data.extent(1) == mu.extent(0), "Size mismatch between data and mu"); + RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + detail::mean(mu.data_handle(), + data.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + resource::get_cuda_stream(handle)); +} + +/** + * @brief Compute mean of the input matrix + * + * Mean operation is assumed to be performed on a given column. + * Note: This call is deprecated, please use `mean` call without `sample` parameter. + * + * @tparam value_t the data type + * @tparam idx_t index type + * @tparam layout_t Layout type of the input matrix. + * @param[in] handle the raft handle + * @param[in] data: the input matrix + * @param[out] mu: the output mean vector + * @param[in] sample: whether to evaluate sample mean or not. In other words, whether + * to normalize the output using N-1 or N, for true or false, respectively + */ +template +[[deprecated("'sample' parameter deprecated")]] void mean( + raft::resources const& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + bool sample) { static_assert( std::is_same_v || std::is_same_v, @@ -96,4 +150,4 @@ void mean(raft::resources const& handle, }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 83f9a8a941..fb9da4dd39 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -163,4 +163,4 @@ void mean_add(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index d2c410dab1..930a6f8b9e 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -141,4 +141,4 @@ void minmax(raft::resources const& handle, }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh index 5a334e9280..c895a911e9 100644 --- a/cpp/include/raft/stats/mutual_info_score.cuh +++ b/cpp/include/raft/stats/mutual_info_score.cuh @@ -89,4 +89,4 @@ double mutual_info_score(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/r2_score.cuh b/cpp/include/raft/stats/r2_score.cuh index c98b4bc93a..4ff9f491d8 100644 --- a/cpp/include/raft/stats/r2_score.cuh +++ b/cpp/include/raft/stats/r2_score.cuh @@ -90,4 +90,4 @@ value_t r2_score(raft::resources const& handle, } // namespace stats } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh index a21a0c0dc5..1230d615eb 100644 --- a/cpp/include/raft/stats/rand_index.cuh +++ b/cpp/include/raft/stats/rand_index.cuh @@ -75,4 +75,4 @@ double rand_index(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/regression_metrics.cuh b/cpp/include/raft/stats/regression_metrics.cuh index 718170f716..74763de2fc 100644 --- a/cpp/include/raft/stats/regression_metrics.cuh +++ b/cpp/include/raft/stats/regression_metrics.cuh @@ -104,4 +104,4 @@ void regression_metrics(raft::resources const& handle, } // namespace stats } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh index 23eef84604..15d86969af 100644 --- a/cpp/include/raft/stats/silhouette_score.cuh +++ b/cpp/include/raft/stats/silhouette_score.cuh @@ -223,4 +223,4 @@ value_t silhouette_score_batched( }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh index 0a67bd2325..62668b3ddd 100644 --- a/cpp/include/raft/stats/stddev.cuh +++ b/cpp/include/raft/stats/stddev.cuh @@ -185,4 +185,4 @@ void vars(raft::resources const& handle, }; // namespace stats }; // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh index 2c3ed1b83e..6c18a21988 100644 --- a/cpp/include/raft/stats/sum.cuh +++ b/cpp/include/raft/stats/sum.cuh @@ -88,4 +88,4 @@ void sum(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/trustworthiness_score.cuh b/cpp/include/raft/stats/trustworthiness_score.cuh index 3f4464f4d3..2435cb4ef9 100644 --- a/cpp/include/raft/stats/trustworthiness_score.cuh +++ b/cpp/include/raft/stats/trustworthiness_score.cuh @@ -98,4 +98,4 @@ double trustworthiness_score( } // namespace stats } // namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh index 041adb5e38..1df3eab460 100644 --- a/cpp/include/raft/stats/v_measure.cuh +++ b/cpp/include/raft/stats/v_measure.cuh @@ -95,4 +95,4 @@ double v_measure(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index da22f0163c..a3e38f7168 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -189,4 +189,4 @@ void col_weighted_mean(raft::resources const& handle, }; // end namespace stats }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/thirdparty/mdspan/.github/workflows/cmake.yml b/cpp/include/raft/thirdparty/mdspan/.github/workflows/cmake.yml index a5411082af..4357c207a1 100644 --- a/cpp/include/raft/thirdparty/mdspan/.github/workflows/cmake.yml +++ b/cpp/include/raft/thirdparty/mdspan/.github/workflows/cmake.yml @@ -37,27 +37,27 @@ jobs: - name: Create Build Environment run: cmake -E make_directory ${{github.workspace}}/mdspan-build - + - name: Check Out uses: actions/checkout@v2 with: path: ${{github.workspace}}/mdspan-src - + - name: Configure CMake shell: bash working-directory: ${{github.workspace}}/mdspan-build run: CXX=${{ matrix.compiler_prefix}}/${{ matrix.compiler_driver }} cmake $GITHUB_WORKSPACE/mdspan-src -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/mdspan-install -DMDSPAN_ENABLE_TESTS=ON -DMDSPAN_ENABLE_EXAMPLES=ON - + - name: Build shell: bash working-directory: ${{github.workspace}}/mdspan-build run: make -j - + - name: Test working-directory: ${{github.workspace}}/mdspan-build shell: bash run: ctest - + - name: Install shell: bash working-directory: ${{github.workspace}}/mdspan-build diff --git a/cpp/include/raft/thirdparty/mdspan/LICENSE b/cpp/include/raft/thirdparty/mdspan/LICENSE index c68a8a2a9f..db92c208da 100644 --- a/cpp/include/raft/thirdparty/mdspan/LICENSE +++ b/cpp/include/raft/thirdparty/mdspan/LICENSE @@ -1,14 +1,14 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Kokkos is licensed under 3-clause BSD terms of use: -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -37,6 +37,6 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// +// // ************************************************************************ //@HEADER diff --git a/cpp/include/raft/thirdparty/mdspan/README.md b/cpp/include/raft/thirdparty/mdspan/README.md index a062777261..15af4dd4a9 100644 --- a/cpp/include/raft/thirdparty/mdspan/README.md +++ b/cpp/include/raft/thirdparty/mdspan/README.md @@ -70,4 +70,3 @@ Acknowledgements ================ This work was undertaken as part of the [Kokkos project](https://github.com/kokkos/kokkos) at Sandia National Laboratories. Sandia National Laboratories is a multimission laboratory managed and operated by National Technology & Engineering Solutions of Sandia, LLC, a wholly owned subsidiary of Honeywell International Inc., for the U. S. Department of Energy's National Nuclear Security Administration under contract DE-NA0003525. - diff --git a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/cuda/CMakeLists.txt b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/cuda/CMakeLists.txt index 30391b3d70..3d5cbb955a 100644 --- a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/cuda/CMakeLists.txt +++ b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/cuda/CMakeLists.txt @@ -2,4 +2,4 @@ mdspan_add_cuda_benchmark(sum_3d_cuda) target_include_directories(sum_3d_cuda PUBLIC $ -) \ No newline at end of file +) diff --git a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/CMakeLists.txt b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/CMakeLists.txt index 566c47c9ab..ccab58bfa1 100644 --- a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/CMakeLists.txt +++ b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/CMakeLists.txt @@ -4,4 +4,4 @@ if(OpenMP_CXX_FOUND) target_include_directories(sum_3d_openmp PUBLIC $ ) -endif() \ No newline at end of file +endif() diff --git a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/sum_3d_openmp.cpp b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/sum_3d_openmp.cpp index 9ab6a0ddf4..ef75349925 100644 --- a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/sum_3d_openmp.cpp +++ b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/openmp/sum_3d_openmp.cpp @@ -174,4 +174,3 @@ BENCHMARK_CAPTURE( //================================================================================ BENCHMARK_MAIN(); - diff --git a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/sum_submdspan_right.cpp b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/sum_submdspan_right.cpp index f106e2f5ff..4cbfe029c7 100644 --- a/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/sum_submdspan_right.cpp +++ b/cpp/include/raft/thirdparty/mdspan/benchmarks/sum/sum_submdspan_right.cpp @@ -223,4 +223,3 @@ BENCHMARK_CAPTURE( //================================================================================ BENCHMARK_MAIN(); - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_compressed_pair_layout.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_compressed_pair_layout.cpp index ea2bad164c..ef45c9d18f 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_compressed_pair_layout.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_compressed_pair_layout.cpp @@ -169,4 +169,3 @@ test, CP>, 4 * sizeof(int*), non_empty>(); // end compressed pair layout: 2 nested pairs, 4 leaf elements }}}1 //============================================================================== } - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_extents_ctors.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_extents_ctors.cpp index 00126691aa..64d71d650c 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_extents_ctors.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_extents_ctors.cpp @@ -176,4 +176,3 @@ MDSPAN_STATIC_TEST( stdex::extents >::value ); - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_layout_convertible.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_layout_convertible.cpp index e293734444..fc30fa25e5 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_layout_convertible.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_layout_convertible.cpp @@ -117,5 +117,3 @@ MDSPAN_STATIC_TEST( MDSPAN_STATIC_TEST( !std::is_constructible>::value ); - - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_mdspan_convertible.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_mdspan_convertible.cpp index fa1136b9d6..c64fcdbabd 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_mdspan_convertible.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_mdspan_convertible.cpp @@ -68,4 +68,3 @@ MDSPAN_STATIC_TEST( // end mdspan }}}1 //============================================================================== - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_no_unique_address.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_no_unique_address.cpp index 9f7c6c052d..c44b02bf76 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_no_unique_address.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_no_unique_address.cpp @@ -109,5 +109,3 @@ MDSPAN_STATIC_TEST( // end layouts }}}1 //============================================================================== - - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_standard_layout.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_standard_layout.cpp index d8edf31ab2..6e41433d6a 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_standard_layout.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_standard_layout.cpp @@ -216,6 +216,3 @@ MDSPAN_STATIC_TEST( // end mdspan }}}1 //============================================================================== - - - diff --git a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_trivially_copyable.cpp b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_trivially_copyable.cpp index 73ab426afa..f6457234d7 100644 --- a/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_trivially_copyable.cpp +++ b/cpp/include/raft/thirdparty/mdspan/compilation_tests/ctest_trivially_copyable.cpp @@ -212,6 +212,3 @@ MDSPAN_STATIC_TEST( // end mdspan }}}1 //============================================================================== - - - diff --git a/cpp/include/raft/thirdparty/mdspan/examples/tiled_layout/simple_tiled_layout.cpp b/cpp/include/raft/thirdparty/mdspan/examples/tiled_layout/simple_tiled_layout.cpp index b8740d5227..ba481c3144 100644 --- a/cpp/include/raft/thirdparty/mdspan/examples/tiled_layout/simple_tiled_layout.cpp +++ b/cpp/include/raft/thirdparty/mdspan/examples/tiled_layout/simple_tiled_layout.cpp @@ -207,4 +207,3 @@ int main() { std::cout << "Success! SimpleTiledLayout2D works as expected." << std::endl; } } - diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/aligned_accessor.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/aligned_accessor.hpp index 67356785c0..02e386e3aa 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/aligned_accessor.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/aligned_accessor.hpp @@ -42,7 +42,7 @@ */ -// NOTE: This code is prematurely taken from an example based on +// NOTE: This code is prematurely taken from an example based on // https://github.com/kokkos/mdspan/pull/176 #pragma once diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/extents.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/extents.hpp index 6be71b432c..3b4d69d63e 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/extents.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/extents.hpp @@ -531,7 +531,7 @@ struct __extents_to_partially_static_sizes; template struct __extents_to_partially_static_sizes<::std::experimental::extents> { using type = detail::__partially_static_sizes< - typename ::std::experimental::extents::index_type, size_t, + typename ::std::experimental::extents::index_type, size_t, ExtentsPack...>; }; diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_left.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_left.hpp index ed1478dc8b..92a291e915 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_left.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_left.hpp @@ -237,4 +237,3 @@ class layout_left::mapping { } // end namespace experimental } // end namespace std - diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_padded.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_padded.hpp index cd9c9c19bf..c761146874 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_padded.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_padded.hpp @@ -45,7 +45,7 @@ // NOTE: This code is prematurely taken from https://github.com/kokkos/mdspan/pull/180 // and matches requirements described in https://github.com/ORNL/cpp-proposals-pub/pull/296 // Some parts (as submdspan integration) are missing -// EDIT: the meaning of the template argument 'padding_stride' was adjusted from a +// EDIT: the meaning of the template argument 'padding_stride' was adjusted from a // fixed stride to a padding alignment, allowing dimensions > padding_stride to be padded // to multiples of 'padding_stride' @@ -140,7 +140,7 @@ namespace details { // layout_padded_left implementation namespace details { - + // The *_helper functions work around not having C++20 // templated lambdas: []{} . diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_right.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_right.hpp index a9b64ca36a..d4b71efae1 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_right.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_right.hpp @@ -237,4 +237,3 @@ class layout_right::mapping { } // end namespace experimental } // end namespace std - diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp index 904dd40a75..90b1a46288 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp @@ -74,13 +74,13 @@ struct __no_unique_address_emulation< // If the type isn't trivially destructible, its destructor // won't be called at the right time, so don't use this // specialization - _MDSPAN_TRAIT(is_trivially_destructible, _T)>> : + _MDSPAN_TRAIT(is_trivially_destructible, _T)>> : #ifdef _MDSPAN_COMPILER_MSVC // MSVC doesn't allow you to access public static member functions of a type // when you *happen* to privately inherit from that type. protected #else - // But we still want this to be private if possible so that we don't accidentally + // But we still want this to be private if possible so that we don't accidentally // access members of _T directly rather than calling __ref() first, which wouldn't // work if _T happens to be stateful and thus we're using the unspecialized definition // of __no_unique_address_emulation above. diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/type_list.hpp b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/type_list.hpp index 7de72e6537..64845190ae 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/type_list.hpp +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/__p0009_bits/type_list.hpp @@ -114,4 +114,3 @@ struct __type_at<3, __type_list<_T0, _T1, _T2, _T3, _Ts...>> { } // end namespace experimental } // end namespace std - diff --git a/cpp/include/raft/thirdparty/mdspan/include/experimental/mdarray b/cpp/include/raft/thirdparty/mdspan/include/experimental/mdarray index fa710a59b6..60e06dd68e 100644 --- a/cpp/include/raft/thirdparty/mdspan/include/experimental/mdarray +++ b/cpp/include/raft/thirdparty/mdspan/include/experimental/mdarray @@ -45,4 +45,3 @@ #include "mdspan" #include "__p1684_bits/mdarray.hpp" - diff --git a/cpp/include/raft/thirdparty/mdspan/make_single_header.py b/cpp/include/raft/thirdparty/mdspan/make_single_header.py index 1b562c7176..98ab3526db 100755 --- a/cpp/include/raft/thirdparty/mdspan/make_single_header.py +++ b/cpp/include/raft/thirdparty/mdspan/make_single_header.py @@ -49,4 +49,3 @@ def process_file(file_path, out_lines=[], front_matter_lines=[], back_matter_lin "#define _MDSPAN_SINGLE_HEADER_INCLUDE_GUARD_\n"], ["#endif // _MDSPAN_SINGLE_HEADER_INCLUDE_GUARD_\n"], [abspath(sys.argv[1])])) - diff --git a/cpp/include/raft/thirdparty/mdspan/tests/CMakeLists.txt b/cpp/include/raft/thirdparty/mdspan/tests/CMakeLists.txt index d92834beb7..a30ce2c198 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/CMakeLists.txt +++ b/cpp/include/raft/thirdparty/mdspan/tests/CMakeLists.txt @@ -57,4 +57,3 @@ mdspan_add_test(test_layout_ctors) mdspan_add_test(test_layout_stride) mdspan_add_test(test_submdspan) mdspan_add_test(test_mdarray_ctors) - diff --git a/cpp/include/raft/thirdparty/mdspan/tests/test_exhaustive_layouts.cpp b/cpp/include/raft/thirdparty/mdspan/tests/test_exhaustive_layouts.cpp index f09b799684..e91896c1c4 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/test_exhaustive_layouts.cpp +++ b/cpp/include/raft/thirdparty/mdspan/tests/test_exhaustive_layouts.cpp @@ -424,4 +424,3 @@ TYPED_TEST(TestLayoutConversion, implicit_conversion) { ASSERT_EQ(map1.stride(r), map2.stride(r)); } } - diff --git a/cpp/include/raft/thirdparty/mdspan/tests/test_layout_stride.cpp b/cpp/include/raft/thirdparty/mdspan/tests/test_layout_stride.cpp index 3a3e1c2696..12008f05cf 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/test_layout_stride.cpp +++ b/cpp/include/raft/thirdparty/mdspan/tests/test_layout_stride.cpp @@ -164,4 +164,3 @@ TEST(TestLayoutStrideCTAD, test_ctad) { */ } #endif - diff --git a/cpp/include/raft/thirdparty/mdspan/tests/test_mdarray_ctors.cpp b/cpp/include/raft/thirdparty/mdspan/tests/test_mdarray_ctors.cpp index 781a12a697..3dcb61d454 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/test_mdarray_ctors.cpp +++ b/cpp/include/raft/thirdparty/mdspan/tests/test_mdarray_ctors.cpp @@ -740,7 +740,7 @@ TEST(TestMdarrayCTAD, layout_stride) { ASSERT_EQ(m0.stride(1), 128); ASSERT_FALSE(m0.is_exhaustive()); - /* + /* stdex::mdarray m1{d.data(), stdex::layout_stride::mapping{stdex::extents{16, 32}, stdex::extents{1, 128}}}; ASSERT_EQ(m1.data(), d.data()); ASSERT_EQ(m1.rank(), 2); diff --git a/cpp/include/raft/thirdparty/mdspan/tests/test_mdspan_ctors.cpp b/cpp/include/raft/thirdparty/mdspan/tests/test_mdspan_ctors.cpp index 81d3fdb983..14ae51a259 100644 --- a/cpp/include/raft/thirdparty/mdspan/tests/test_mdspan_ctors.cpp +++ b/cpp/include/raft/thirdparty/mdspan/tests/test_mdspan_ctors.cpp @@ -346,7 +346,7 @@ TEST(TestMdspanCTAD, layout_stride) { ASSERT_EQ(m0.stride(1), 128); ASSERT_FALSE(m0.is_exhaustive()); - /* + /* stdex::mdspan m1{d.data(), stdex::layout_stride::mapping{stdex::extents{16, 32}, stdex::extents{1, 128}}}; ASSERT_EQ(m1.data(), d.data()); ASSERT_EQ(m1.rank(), 2); diff --git a/cpp/include/raft/util/detail/popc.cuh b/cpp/include/raft/util/detail/popc.cuh index f335be6fd0..9638a261a5 100644 --- a/cpp/include/raft/util/detail/popc.cuh +++ b/cpp/include/raft/util/detail/popc.cuh @@ -73,4 +73,4 @@ void popc(const raft::resources& res, }); } -} // end namespace raft::detail \ No newline at end of file +} // end namespace raft::detail diff --git a/cpp/include/raft/util/device_loads_stores.cuh b/cpp/include/raft/util/device_loads_stores.cuh index 2c954ec99a..c1b668fed6 100644 --- a/cpp/include/raft/util/device_loads_stores.cuh +++ b/cpp/include/raft/util/device_loads_stores.cuh @@ -739,4 +739,46 @@ DI void block_copy(raft::device_span dst, const raft::device_span src) /** @} */ +/** + * @defgroup GlobalStores Global Store Operations + * @{ + * @brief Perform conditional stores to global memory. + * + * These functions store data to a specified global memory address, + * controlled by a guard flag to enable conditional execution. + * + * @param[in] reg The data to store in global memory. + * The type of `reg` determines the size of the store. + * @param[in] addr The global memory address where the data will be stored. + * @param[in] guard A flag to conditionally enable the store operation. + * If `true`, the store is performed; otherwise, it is skipped + */ +DI void stg(const int& reg, void* addr, bool guard) +{ + asm volatile( + "{\n" + ".reg .pred p;\n" + "setp.ne.b32 p, %2, 0;\n" + "@p st.global.b32 [%0], %1;\n" + "}\n" + : + : "l"(addr), "r"(reg), "r"((int)guard) + : "memory"); +} + +DI void stg(const int64_t& reg, void* addr, bool guard) +{ + asm volatile( + "{\n" + ".reg .pred p;\n" + "setp.ne.b32 p, %2, 0;\n" + "@p st.global.b64 [%0], %1;\n" + "}\n" + : + : "l"(addr), "l"(reg), "r"((int)guard) + : "memory"); +} + +/** @} */ + } // namespace raft diff --git a/cpp/include/raft/util/input_validation.hpp b/cpp/include/raft/util/input_validation.hpp index 17bb53f22b..119fd9d2e2 100644 --- a/cpp/include/raft/util/input_validation.hpp +++ b/cpp/include/raft/util/input_validation.hpp @@ -129,4 +129,4 @@ constexpr bool is_scalar_view(mdspan m) return false; } -}; // end namespace raft \ No newline at end of file +}; // end namespace raft diff --git a/cpp/include/raft/util/integer_utils.hpp b/cpp/include/raft/util/integer_utils.hpp index 5224d5ac4c..7ea5a3d212 100644 --- a/cpp/include/raft/util/integer_utils.hpp +++ b/cpp/include/raft/util/integer_utils.hpp @@ -25,6 +25,7 @@ #include +#include #include #include #include diff --git a/cpp/include/raft/util/itertools.hpp b/cpp/include/raft/util/itertools.hpp index 493ac9befe..a31d9f79df 100644 --- a/cpp/include/raft/util/itertools.hpp +++ b/cpp/include/raft/util/itertools.hpp @@ -36,7 +36,7 @@ namespace raft::util::itertools { * fields of the structure (if the structure has more fields, some might be initialized * with their default value). * @param lists One or more initializer lists. - * @return std::vector A vector of structures containing the cartesian product. + * @return `std::vector` A vector of structures containing the cartesian product. */ template std::vector product(std::initializer_list... lists) diff --git a/cpp/include/raft/util/warp_primitives.cuh b/cpp/include/raft/util/warp_primitives.cuh index 953c137cdf..2a7c4e9127 100644 --- a/cpp/include/raft/util/warp_primitives.cuh +++ b/cpp/include/raft/util/warp_primitives.cuh @@ -256,4 +256,4 @@ DI std::enable_if_t, T> shfl_xor(T val, return output; } -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/scripts/run-clang-compile.py b/cpp/scripts/run-clang-compile.py index 123f0e4075..8ed9aa00f0 100644 --- a/cpp/scripts/run-clang-compile.py +++ b/cpp/scripts/run-clang-compile.py @@ -253,12 +253,12 @@ def run_clang_command(clang_cmd, cwd): class LockContext(object): def __init__(self, lock=None) -> None: self._lock = lock - + def __enter__(self): if self._lock: self._lock.acquire() return self - + def __exit__(self, _, __, ___): if self._lock: self._lock.release() diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 3d8bbcec4a..cad08ca551 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -296,12 +296,12 @@ def run_clang_tidy_command(tidy_cmd, cwd): class LockContext(object): def __init__(self, lock=None) -> None: self._lock = lock - + def __enter__(self): if self._lock: self._lock.acquire() return self - + def __exit__(self, _, __, ___): if self._lock: self._lock.release() diff --git a/cpp/scripts/run-cmake-format.sh b/cpp/scripts/run-cmake-format.sh index db5a8b5804..e08481fbd6 100755 --- a/cpp/scripts/run-cmake-format.sh +++ b/cpp/scripts/run-cmake-format.sh @@ -17,7 +17,7 @@ # and exits gracefully if the file is not found. If a user wishes to specify a # config file at a nonstandard location, they may do so by setting the # environment variable RAPIDS_CMAKE_FORMAT_FILE. -# +# # This script can be invoked directly anywhere within the project repository. # Alternatively, it may be invoked as a pre-commit hook via # `pre-commit run (cmake-format)|(cmake-lint)`. diff --git a/cpp/src/core/logger.cpp b/cpp/src/core/logger.cpp deleted file mode 100644 index 8f81cf2926..0000000000 --- a/cpp/src/core/logger.cpp +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu deleted file mode 100644 index b44d87d1bd..0000000000 --- a/cpp/test/stats/mean_center.cu +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../linalg/matrix_vector_op.cuh" -#include "../test_utils.cuh" - -#include -#include -#include -#include -#include - -#include - -namespace raft { -namespace stats { - -template -struct MeanCenterInputs { - T tolerance, mean; - IdxType rows, cols; - bool sample, rowMajor, bcastAlongRows; - unsigned long long int seed; -}; - -template -::std::ostream& operator<<(::std::ostream& os, const MeanCenterInputs& dims) -{ - return os; -} - -template -class MeanCenterTest : public ::testing::TestWithParam> { - public: - MeanCenterTest() - : params(::testing::TestWithParam>::GetParam()), - stream(resource::get_cuda_stream(handle)), - rows(params.rows), - cols(params.cols), - out(rows * cols, stream), - out_ref(rows * cols, stream), - data(rows * cols, stream), - meanVec(params.bcastAlongRows ? cols : rows, stream) - { - } - - protected: - void SetUp() override - { - raft::random::RngState r(params.seed); - auto len = rows * cols; - auto meanVecSize = params.bcastAlongRows ? cols : rows; - normal(handle, r, data.data(), len, params.mean, (T)1.0); - raft::stats::mean( - meanVec.data(), data.data(), cols, rows, params.sample, params.rowMajor, stream); - if (params.rowMajor) { - using layout = raft::row_major; - mean_center(handle, - raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(meanVec.data(), meanVecSize), - raft::make_device_matrix_view(out.data(), rows, cols), - params.bcastAlongRows); - } else { - using layout = raft::col_major; - mean_center(handle, - raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(meanVec.data(), meanVecSize), - raft::make_device_matrix_view(out.data(), rows, cols), - params.bcastAlongRows); - } - raft::linalg::naiveMatVec(out_ref.data(), - data.data(), - meanVec.data(), - cols, - rows, - params.rowMajor, - params.bcastAlongRows, - (T)-1.0, - stream); - resource::sync_stream(handle, stream); - } - - protected: - raft::resources handle; - cudaStream_t stream; - - MeanCenterInputs params; - int rows, cols; - rmm::device_uvector data, meanVec, out, out_ref; -}; - -const std::vector> inputsf_i32 = { - {0.05f, 1.f, 1024, 32, true, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, true, false, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, true, false, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, true, false, 1234ULL}}; -typedef MeanCenterTest MeanCenterTestF_i32; -TEST_P(MeanCenterTestF_i32, Result) -{ - ASSERT_TRUE(devArrMatch( - out.data(), out_ref.data(), params.cols, raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestF_i32, ::testing::ValuesIn(inputsf_i32)); - -const std::vector> inputsf_i64 = { - {0.05f, 1.f, 1024, 32, true, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, false, true, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, false, true, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, true, true, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, true, true, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, false, false, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, false, false, 1234ULL}, - {0.05f, 1.f, 1024, 32, true, true, false, 1234ULL}, - {0.05f, 1.f, 1024, 64, true, true, false, 1234ULL}, - {0.05f, 1.f, 1024, 128, true, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 32, false, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 64, false, true, false, 1234ULL}, - {0.05f, -1.f, 1024, 128, false, true, false, 1234ULL}}; -typedef MeanCenterTest MeanCenterTestF_i64; -TEST_P(MeanCenterTestF_i64, Result) -{ - ASSERT_TRUE(devArrMatch( - out.data(), out_ref.data(), params.cols, raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestF_i64, ::testing::ValuesIn(inputsf_i64)); - -const std::vector> inputsd_i32 = { - {0.05, 1.0, 1024, 32, true, false, true, 1234ULL}, - {0.05, 1.0, 1024, 64, true, false, true, 1234ULL}, - {0.05, 1.0, 1024, 128, true, false, true, 1234ULL}, - {0.05, -1.0, 1024, 32, false, false, true, 1234ULL}, - {0.05, -1.0, 1024, 64, false, false, true, 1234ULL}, - {0.05, -1.0, 1024, 128, false, false, true, 1234ULL}, - {0.05, 1.0, 1024, 32, true, true, true, 1234ULL}, - {0.05, 1.0, 1024, 64, true, true, true, 1234ULL}, - {0.05, 1.0, 1024, 128, true, true, true, 1234ULL}, - {0.05, -1.0, 1024, 32, false, true, true, 1234ULL}, - {0.05, -1.0, 1024, 64, false, true, true, 1234ULL}, - {0.05, -1.0, 1024, 128, false, true, true, 1234ULL}, - {0.05, 1.0, 1024, 32, true, false, false, 1234ULL}, - {0.05, 1.0, 1024, 64, true, false, false, 1234ULL}, - {0.05, 1.0, 1024, 128, true, false, false, 1234ULL}, - {0.05, -1.0, 1024, 32, false, false, false, 1234ULL}, - {0.05, -1.0, 1024, 64, false, false, false, 1234ULL}, - {0.05, -1.0, 1024, 128, false, false, false, 1234ULL}, - {0.05, 1.0, 1024, 32, true, true, false, 1234ULL}, - {0.05, 1.0, 1024, 64, true, true, false, 1234ULL}, - {0.05, 1.0, 1024, 128, true, true, false, 1234ULL}, - {0.05, -1.0, 1024, 32, false, true, false, 1234ULL}, - {0.05, -1.0, 1024, 64, false, true, false, 1234ULL}, - {0.05, -1.0, 1024, 128, false, true, false, 1234ULL}}; -typedef MeanCenterTest MeanCenterTestD_i32; -TEST_P(MeanCenterTestD_i32, Result) -{ - ASSERT_TRUE(devArrMatch( - out.data(), out_ref.data(), params.cols, raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestD_i32, ::testing::ValuesIn(inputsd_i32)); - -const std::vector> inputsd_i64 = { - {0.05, 1.0, 1024, 32, true, false, true, 1234ULL}, - {0.05, 1.0, 1024, 64, true, false, true, 1234ULL}, - {0.05, 1.0, 1024, 128, true, false, true, 1234ULL}, - {0.05, -1.0, 1024, 32, false, false, true, 1234ULL}, - {0.05, -1.0, 1024, 64, false, false, true, 1234ULL}, - {0.05, -1.0, 1024, 128, false, false, true, 1234ULL}, - {0.05, 1.0, 1024, 32, true, true, true, 1234ULL}, - {0.05, 1.0, 1024, 64, true, true, true, 1234ULL}, - {0.05, 1.0, 1024, 128, true, true, true, 1234ULL}, - {0.05, -1.0, 1024, 32, false, true, true, 1234ULL}, - {0.05, -1.0, 1024, 64, false, true, true, 1234ULL}, - {0.05, -1.0, 1024, 128, false, true, true, 1234ULL}, - {0.05, 1.0, 1024, 32, true, false, false, 1234ULL}, - {0.05, 1.0, 1024, 64, true, false, false, 1234ULL}, - {0.05, 1.0, 1024, 128, true, false, false, 1234ULL}, - {0.05, -1.0, 1024, 32, false, false, false, 1234ULL}, - {0.05, -1.0, 1024, 64, false, false, false, 1234ULL}, - {0.05, -1.0, 1024, 128, false, false, false, 1234ULL}, - {0.05, 1.0, 1024, 32, true, true, false, 1234ULL}, - {0.05, 1.0, 1024, 64, true, true, false, 1234ULL}, - {0.05, 1.0, 1024, 128, true, true, false, 1234ULL}, - {0.05, -1.0, 1024, 32, false, true, false, 1234ULL}, - {0.05, -1.0, 1024, 64, false, true, false, 1234ULL}, - {0.05, -1.0, 1024, 128, false, true, false, 1234ULL}}; -typedef MeanCenterTest MeanCenterTestD_i64; -TEST_P(MeanCenterTestD_i64, Result) -{ - ASSERT_TRUE(devArrMatch( - out.data(), out_ref.data(), params.cols, raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestD_i64, ::testing::ValuesIn(inputsd_i64)); - -} // end namespace stats -} // end namespace raft diff --git a/cpp/test/CMakeLists.txt b/cpp/tests/CMakeLists.txt similarity index 97% rename from cpp/test/CMakeLists.txt rename to cpp/tests/CMakeLists.txt index 621ee6c160..9f96b93e7a 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -55,6 +55,7 @@ function(ConfigureTest) ${RAFT_CTK_MATH_DEPENDENCIES} $ $ + raft_test_logger ) set_target_properties( ${TEST_NAME} @@ -76,7 +77,7 @@ function(ConfigureTest) target_compile_definitions(${TEST_NAME} PRIVATE "RAFT_DISABLE_CUDA") endif() - target_include_directories(${TEST_NAME} PUBLIC "$") + target_include_directories(${TEST_NAME} PUBLIC "$") rapids_test_add( NAME ${TEST_NAME} @@ -87,6 +88,10 @@ function(ConfigureTest) ) endfunction() +# Create an object library for the logger so that we don't have to recompile it. +add_library(raft_test_logger OBJECT) +target_link_libraries(raft_test_logger PRIVATE raft_logger_impl) + # ################################################################################################## # test sources ################################################################################## # ################################################################################################## diff --git a/cpp/test/core/bitmap.cu b/cpp/tests/core/bitmap.cu similarity index 100% rename from cpp/test/core/bitmap.cu rename to cpp/tests/core/bitmap.cu diff --git a/cpp/test/core/bitset.cu b/cpp/tests/core/bitset.cu similarity index 72% rename from cpp/test/core/bitset.cu rename to cpp/tests/core/bitset.cu index ac601274c1..f094f60ded 100644 --- a/cpp/test/core/bitset.cu +++ b/cpp/tests/core/bitset.cu @@ -24,6 +24,8 @@ #include #include +#include +#include #include namespace raft::core { @@ -73,6 +75,40 @@ void test_cpu_bitset(const std::vector& bitset, } } +template +void test_cpu_bitset_nbits(const bitset_t* bitset, + const std::vector& queries, + std::vector& result, + unsigned original_nbits_) +{ + constexpr size_t nbits = sizeof(bitset_t) * 8; + if (original_nbits_ == nbits) { + for (size_t i = 0; i < queries.size(); i++) { + result[i] = + uint8_t((bitset[queries[i] / nbits] & (bitset_t{1} << (queries[i] % nbits))) != 0); + } + } + for (size_t i = 0; i < queries.size(); i++) { + const index_t sample_index = queries[i]; + const index_t original_bit_index = sample_index / original_nbits_; + const index_t original_bit_offset = sample_index % original_nbits_; + index_t new_bit_index = original_bit_index * original_nbits_ / nbits; + index_t new_bit_offset = 0; + if (original_nbits_ > nbits) { + new_bit_index += original_bit_offset / nbits; + new_bit_offset = original_bit_offset % nbits; + } else { + index_t ratio = nbits / original_nbits_; + new_bit_offset += (original_bit_index % ratio) * original_nbits_; + new_bit_offset += original_bit_offset % nbits; + } + const bitset_t bit_element = bitset[new_bit_index]; + const bool is_bit_set = (bit_element & (bitset_t{1} << new_bit_offset)) != 0; + + result[i] = uint8_t(is_bit_set); + } +} + template void flip_cpu_bitset(std::vector& bitset) { @@ -168,11 +204,12 @@ class BitsetTest : public testing::TestWithParam { resource::sync_stream(res, stream); ASSERT_TRUE(hostVecMatch(bitset_ref, bitset_result, raft::Compare())); - auto query_device = raft::make_device_vector(res, spec.query_len); - auto result_device = raft::make_device_vector(res, spec.query_len); - auto query_cpu = std::vector(spec.query_len); - auto result_cpu = std::vector(spec.query_len); - auto result_ref = std::vector(spec.query_len); + auto query_device = raft::make_device_vector(res, spec.query_len); + auto result_device = raft::make_device_vector(res, spec.query_len); + auto query_cpu = std::vector(spec.query_len); + auto result_cpu = std::vector(spec.query_len); + auto result_ref_nbits = std::vector(spec.query_len); + auto result_ref = std::vector(spec.query_len); // Create queries and verify the test results raft::random::uniformInt(res, rng, query_device.view(), index_t(0), index_t(spec.bitset_len)); @@ -194,6 +231,57 @@ class BitsetTest : public testing::TestWithParam { resource::sync_stream(res, stream); ASSERT_TRUE(hostVecMatch(bitset_ref, bitset_result, raft::Compare())); + // Reinterpret the bitset as uint8_t, uint32 then uint64_t + { + // Test CPU logic + test_cpu_bitset(bitset_ref, query_cpu, result_ref); + uint8_t* bitset_cpu_uint8 = (uint8_t*)std::malloc(sizeof(bitset_t) * bitset_ref.size()); + std::memcpy(bitset_cpu_uint8, bitset_ref.data(), sizeof(bitset_t) * bitset_ref.size()); + test_cpu_bitset_nbits(bitset_cpu_uint8, query_cpu, result_ref_nbits, sizeof(bitset_t) * 8); + ASSERT_TRUE(hostVecMatch(result_ref, result_ref_nbits, raft::Compare())); + std::free(bitset_cpu_uint8); + + // Test GPU uint8_t, uint32_t, uint64_t + auto my_bitset_view_uint8_t = raft::core::bitset_view( + reinterpret_cast(my_bitset.data()), my_bitset.size(), sizeof(bitset_t) * 8); + raft::linalg::map( + res, + result_device.view(), + [my_bitset_view_uint8_t] __device__(index_t query) { + return my_bitset_view_uint8_t.test(query); + }, + raft::make_const_mdspan(query_device.view())); + update_host(result_cpu.data(), result_device.data_handle(), result_device.extent(0), stream); + resource::sync_stream(res, stream); + ASSERT_TRUE(hostVecMatch(result_ref, result_cpu, Compare())); + + auto my_bitset_view_uint32_t = raft::core::bitset_view( + reinterpret_cast(my_bitset.data()), my_bitset.size(), sizeof(bitset_t) * 8); + raft::linalg::map( + res, + result_device.view(), + [my_bitset_view_uint32_t] __device__(index_t query) { + return my_bitset_view_uint32_t.test(query); + }, + raft::make_const_mdspan(query_device.view())); + update_host(result_cpu.data(), result_device.data_handle(), result_device.extent(0), stream); + resource::sync_stream(res, stream); + ASSERT_TRUE(hostVecMatch(result_ref, result_cpu, Compare())); + + auto my_bitset_view_uint64_t = raft::core::bitset_view( + reinterpret_cast(my_bitset.data()), my_bitset.size(), sizeof(bitset_t) * 8); + raft::linalg::map( + res, + result_device.view(), + [my_bitset_view_uint64_t] __device__(index_t query) { + return my_bitset_view_uint64_t.test(query); + }, + raft::make_const_mdspan(query_device.view())); + update_host(result_cpu.data(), result_device.data_handle(), result_device.extent(0), stream); + resource::sync_stream(res, stream); + ASSERT_TRUE(hostVecMatch(result_ref, result_cpu, Compare())); + } + // test sparsity, repeat and eval_n_elements { auto my_bitset_view = my_bitset.view(); diff --git a/cpp/test/core/device_resources_manager.cpp b/cpp/tests/core/device_resources_manager.cpp similarity index 98% rename from cpp/test/core/device_resources_manager.cpp rename to cpp/tests/core/device_resources_manager.cpp index c63d5896e5..007b57378f 100644 --- a/cpp/test/core/device_resources_manager.cpp +++ b/cpp/tests/core/device_resources_manager.cpp @@ -89,7 +89,7 @@ TEST(DeviceResourcesManager, ObeysSetters) // Suppress the many warnings from testing use of setters after initial // get_device_resources call - auto scoped_log_level = log_level_setter{RAFT_LEVEL_ERROR}; + auto scoped_log_level = log_level_setter{level_enum::error}; omp_set_dynamic(0); #pragma omp parallel for num_threads(5) diff --git a/cpp/test/core/device_setter.cpp b/cpp/tests/core/device_setter.cpp similarity index 100% rename from cpp/test/core/device_setter.cpp rename to cpp/tests/core/device_setter.cpp diff --git a/cpp/test/core/handle.cpp b/cpp/tests/core/handle.cpp similarity index 100% rename from cpp/test/core/handle.cpp rename to cpp/tests/core/handle.cpp diff --git a/cpp/test/core/interruptible.cu b/cpp/tests/core/interruptible.cu similarity index 100% rename from cpp/test/core/interruptible.cu rename to cpp/tests/core/interruptible.cu diff --git a/cpp/test/core/logger.cpp b/cpp/tests/core/logger.cpp similarity index 52% rename from cpp/test/core/logger.cpp rename to cpp/tests/core/logger.cpp index 7f31beed71..10adb71dda 100644 --- a/cpp/test/core/logger.cpp +++ b/cpp/tests/core/logger.cpp @@ -14,10 +14,10 @@ * limitations under the License. */ -// We set RAFT_ACTIVE_LEVEL to a value that would enable testing trace and debug logs +// We set RAFT_LOG_ACTIVE_LEVEL to a value that would enable testing trace and debug logs // (otherwise trace and debug logs are desabled by default). -#undef RAFT_ACTIVE_LEVEL -#define RAFT_ACTIVE_LEVEL 6 +#undef RAFT_LOG_ACTIVE_LEVEL +#define RAFT_LOG_ACTIVE_LEVEL RAFT_LOG_LEVEL_TRACE #include @@ -34,15 +34,15 @@ TEST(logger, Test) RAFT_LOG_WARN("This is a warning message"); RAFT_LOG_INFO("This is an info message"); - logger::get(RAFT_NAME).set_level(RAFT_LEVEL_WARN); - ASSERT_EQ(RAFT_LEVEL_WARN, logger::get(RAFT_NAME).get_level()); - logger::get(RAFT_NAME).set_level(RAFT_LEVEL_INFO); - ASSERT_EQ(RAFT_LEVEL_INFO, logger::get(RAFT_NAME).get_level()); + default_logger().set_level(raft::level_enum::warn); + ASSERT_EQ(raft::level_enum::warn, default_logger().level()); + default_logger().set_level(raft::level_enum::info); + ASSERT_EQ(raft::level_enum::info, default_logger().level()); - ASSERT_FALSE(logger::get(RAFT_NAME).should_log_for(RAFT_LEVEL_TRACE)); - ASSERT_FALSE(logger::get(RAFT_NAME).should_log_for(RAFT_LEVEL_DEBUG)); - ASSERT_TRUE(logger::get(RAFT_NAME).should_log_for(RAFT_LEVEL_INFO)); - ASSERT_TRUE(logger::get(RAFT_NAME).should_log_for(RAFT_LEVEL_WARN)); + ASSERT_FALSE(default_logger().should_log(raft::level_enum::trace)); + ASSERT_FALSE(default_logger().should_log(raft::level_enum::debug)); + ASSERT_TRUE(default_logger().should_log(raft::level_enum::info)); + ASSERT_TRUE(default_logger().should_log(raft::level_enum::warn)); } std::string logged = ""; @@ -57,60 +57,61 @@ class loggerTest : public ::testing::Test { { flushCount = 0; logged = ""; - logger::get(RAFT_NAME).set_level(RAFT_LEVEL_TRACE); + default_logger().set_level(raft::level_enum::trace); } void TearDown() override { - logger::get(RAFT_NAME).set_callback(nullptr); - logger::get(RAFT_NAME).set_flush(nullptr); - logger::get(RAFT_NAME).set_level(RAFT_LEVEL_INFO); + default_logger().sinks().pop_back(); + default_logger().set_level(raft::level_enum::info); } }; -// The logging macros depend on `RAFT_ACTIVE_LEVEL` as well as the logger verbosity; -// The verbosity is set to `RAFT_LEVEL_TRACE`, but `RAFT_ACTIVE_LEVEL` is set outside of here. -auto check_if_logged(const std::string& msg, int log_level_def) -> bool +// The logging macros depend on `RAFT_LOG_ACTIVE_LEVEL` as well as the logger verbosity; +// The verbosity is set to `RAFT_LOG_LEVEL_TRACE`, but `RAFT_LOG_ACTIVE_LEVEL` is set outside of +// here. +auto check_if_logged(const std::string& msg, raft::level_enum log_level_def) -> bool { bool actually_logged = logged.find(msg) != std::string::npos; - bool should_be_logged = RAFT_ACTIVE_LEVEL >= log_level_def; + bool should_be_logged = RAFT_LOG_ACTIVE_LEVEL <= static_cast(log_level_def); return actually_logged == should_be_logged; } TEST_F(loggerTest, callback) { std::string testMsg; - logger::get(RAFT_NAME).set_callback(exampleCallback); + default_logger().sinks().push_back(std::make_shared(exampleCallback)); testMsg = "This is a critical message"; RAFT_LOG_CRITICAL(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_CRITICAL)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::critical)); testMsg = "This is an error message"; RAFT_LOG_ERROR(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_ERROR)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::error)); testMsg = "This is a warning message"; RAFT_LOG_WARN(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_WARN)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::warn)); testMsg = "This is an info message"; RAFT_LOG_INFO(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_INFO)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::info)); testMsg = "This is a debug message"; RAFT_LOG_DEBUG(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_DEBUG)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::debug)); testMsg = "This is a trace message"; RAFT_LOG_TRACE(testMsg.c_str()); - ASSERT_TRUE(check_if_logged(testMsg, RAFT_LEVEL_TRACE)); + ASSERT_TRUE(check_if_logged(testMsg, raft::level_enum::trace)); } TEST_F(loggerTest, flush) { - logger::get(RAFT_NAME).set_flush(exampleFlush); - logger::get(RAFT_NAME).flush(); + default_logger().sinks().push_back( + std::make_shared(exampleCallback, exampleFlush)); + default_logger().flush(); ASSERT_EQ(1, flushCount); } diff --git a/cpp/test/core/math_device.cu b/cpp/tests/core/math_device.cu similarity index 100% rename from cpp/test/core/math_device.cu rename to cpp/tests/core/math_device.cu diff --git a/cpp/test/core/math_host.cpp b/cpp/tests/core/math_host.cpp similarity index 100% rename from cpp/test/core/math_host.cpp rename to cpp/tests/core/math_host.cpp diff --git a/cpp/test/core/mdarray.cu b/cpp/tests/core/mdarray.cu similarity index 100% rename from cpp/test/core/mdarray.cu rename to cpp/tests/core/mdarray.cu diff --git a/cpp/test/core/mdbuffer.cu b/cpp/tests/core/mdbuffer.cu similarity index 100% rename from cpp/test/core/mdbuffer.cu rename to cpp/tests/core/mdbuffer.cu diff --git a/cpp/test/core/mdspan_copy.cpp b/cpp/tests/core/mdspan_copy.cpp similarity index 100% rename from cpp/test/core/mdspan_copy.cpp rename to cpp/tests/core/mdspan_copy.cpp diff --git a/cpp/test/core/mdspan_copy.cu b/cpp/tests/core/mdspan_copy.cu similarity index 100% rename from cpp/test/core/mdspan_copy.cu rename to cpp/tests/core/mdspan_copy.cu diff --git a/cpp/test/core/mdspan_utils.cu b/cpp/tests/core/mdspan_utils.cu similarity index 100% rename from cpp/test/core/mdspan_utils.cu rename to cpp/tests/core/mdspan_utils.cu diff --git a/cpp/test/core/memory_type.cpp b/cpp/tests/core/memory_type.cpp similarity index 100% rename from cpp/test/core/memory_type.cpp rename to cpp/tests/core/memory_type.cpp diff --git a/cpp/test/core/numpy_serializer.cu b/cpp/tests/core/numpy_serializer.cu similarity index 100% rename from cpp/test/core/numpy_serializer.cu rename to cpp/tests/core/numpy_serializer.cu diff --git a/cpp/test/core/nvtx.cpp b/cpp/tests/core/nvtx.cpp similarity index 100% rename from cpp/test/core/nvtx.cpp rename to cpp/tests/core/nvtx.cpp diff --git a/cpp/test/core/operators_device.cu b/cpp/tests/core/operators_device.cu similarity index 100% rename from cpp/test/core/operators_device.cu rename to cpp/tests/core/operators_device.cu diff --git a/cpp/test/core/operators_host.cpp b/cpp/tests/core/operators_host.cpp similarity index 100% rename from cpp/test/core/operators_host.cpp rename to cpp/tests/core/operators_host.cpp diff --git a/cpp/test/core/seive.cu b/cpp/tests/core/seive.cu similarity index 100% rename from cpp/test/core/seive.cu rename to cpp/tests/core/seive.cu diff --git a/cpp/test/core/span.cpp b/cpp/tests/core/span.cpp similarity index 100% rename from cpp/test/core/span.cpp rename to cpp/tests/core/span.cpp diff --git a/cpp/test/core/span.cu b/cpp/tests/core/span.cu similarity index 100% rename from cpp/test/core/span.cu rename to cpp/tests/core/span.cu diff --git a/cpp/test/core/sparse_matrix.cpp b/cpp/tests/core/sparse_matrix.cpp similarity index 100% rename from cpp/test/core/sparse_matrix.cpp rename to cpp/tests/core/sparse_matrix.cpp diff --git a/cpp/test/core/sparse_matrix.cu b/cpp/tests/core/sparse_matrix.cu similarity index 100% rename from cpp/test/core/sparse_matrix.cu rename to cpp/tests/core/sparse_matrix.cu diff --git a/cpp/test/core/stream_view.cpp b/cpp/tests/core/stream_view.cpp similarity index 100% rename from cpp/test/core/stream_view.cpp rename to cpp/tests/core/stream_view.cpp diff --git a/cpp/test/core/temporary_device_buffer.cu b/cpp/tests/core/temporary_device_buffer.cu similarity index 100% rename from cpp/test/core/temporary_device_buffer.cu rename to cpp/tests/core/temporary_device_buffer.cu diff --git a/cpp/test/core/test_span.hpp b/cpp/tests/core/test_span.hpp similarity index 100% rename from cpp/test/core/test_span.hpp rename to cpp/tests/core/test_span.hpp diff --git a/cpp/test/ext_headers/00_generate.py b/cpp/tests/ext_headers/00_generate.py similarity index 100% rename from cpp/test/ext_headers/00_generate.py rename to cpp/tests/ext_headers/00_generate.py diff --git a/cpp/test/ext_headers/raft_core_logger.cpp b/cpp/tests/ext_headers/raft_core_logger.cpp similarity index 100% rename from cpp/test/ext_headers/raft_core_logger.cpp rename to cpp/tests/ext_headers/raft_core_logger.cpp diff --git a/cpp/test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu b/cpp/tests/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu similarity index 100% rename from cpp/test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu rename to cpp/tests/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu diff --git a/cpp/test/ext_headers/raft_distance_distance.cu b/cpp/tests/ext_headers/raft_distance_distance.cu similarity index 100% rename from cpp/test/ext_headers/raft_distance_distance.cu rename to cpp/tests/ext_headers/raft_distance_distance.cu diff --git a/cpp/test/ext_headers/raft_distance_fused_l2_nn.cu b/cpp/tests/ext_headers/raft_distance_fused_l2_nn.cu similarity index 100% rename from cpp/test/ext_headers/raft_distance_fused_l2_nn.cu rename to cpp/tests/ext_headers/raft_distance_fused_l2_nn.cu diff --git a/cpp/test/ext_headers/raft_linalg_detail_coalesced_reduction.cu b/cpp/tests/ext_headers/raft_linalg_detail_coalesced_reduction.cu similarity index 100% rename from cpp/test/ext_headers/raft_linalg_detail_coalesced_reduction.cu rename to cpp/tests/ext_headers/raft_linalg_detail_coalesced_reduction.cu diff --git a/cpp/test/ext_headers/raft_matrix_detail_select_k.cu b/cpp/tests/ext_headers/raft_matrix_detail_select_k.cu similarity index 100% rename from cpp/test/ext_headers/raft_matrix_detail_select_k.cu rename to cpp/tests/ext_headers/raft_matrix_detail_select_k.cu diff --git a/cpp/test/ext_headers/raft_neighbors_ball_cover.cu b/cpp/tests/ext_headers/raft_neighbors_ball_cover.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_ball_cover.cu rename to cpp/tests/ext_headers/raft_neighbors_ball_cover.cu diff --git a/cpp/test/ext_headers/raft_neighbors_brute_force.cu b/cpp/tests/ext_headers/raft_neighbors_brute_force.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_brute_force.cu rename to cpp/tests/ext_headers/raft_neighbors_brute_force.cu diff --git a/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu b/cpp/tests/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu rename to cpp/tests/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu diff --git a/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu b/cpp/tests/ext_headers/raft_neighbors_detail_ivf_flat_search.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu rename to cpp/tests/ext_headers/raft_neighbors_detail_ivf_flat_search.cu diff --git a/cpp/test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu b/cpp/tests/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu rename to cpp/tests/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu diff --git a/cpp/test/ext_headers/raft_neighbors_ivf_flat.cu b/cpp/tests/ext_headers/raft_neighbors_ivf_flat.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_ivf_flat.cu rename to cpp/tests/ext_headers/raft_neighbors_ivf_flat.cu diff --git a/cpp/test/ext_headers/raft_neighbors_ivf_pq.cu b/cpp/tests/ext_headers/raft_neighbors_ivf_pq.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_ivf_pq.cu rename to cpp/tests/ext_headers/raft_neighbors_ivf_pq.cu diff --git a/cpp/test/ext_headers/raft_neighbors_refine.cu b/cpp/tests/ext_headers/raft_neighbors_refine.cu similarity index 100% rename from cpp/test/ext_headers/raft_neighbors_refine.cu rename to cpp/tests/ext_headers/raft_neighbors_refine.cu diff --git a/cpp/test/ext_headers/raft_sparse_matrix_detail_select_k.cu b/cpp/tests/ext_headers/raft_sparse_matrix_detail_select_k.cu similarity index 100% rename from cpp/test/ext_headers/raft_sparse_matrix_detail_select_k.cu rename to cpp/tests/ext_headers/raft_sparse_matrix_detail_select_k.cu diff --git a/cpp/test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu b/cpp/tests/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu similarity index 100% rename from cpp/test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu rename to cpp/tests/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu diff --git a/cpp/test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu b/cpp/tests/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu similarity index 100% rename from cpp/test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu rename to cpp/tests/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu diff --git a/cpp/test/label/label.cu b/cpp/tests/label/label.cu similarity index 98% rename from cpp/test/label/label.cu rename to cpp/tests/label/label.cu index 4c3479182f..34a336de59 100644 --- a/cpp/test/label/label.cu +++ b/cpp/tests/label/label.cu @@ -59,8 +59,8 @@ TEST_F(MakeMonotonicTest, Result) ASSERT_TRUE(devArrMatch(actual.data(), expected.data(), m, raft::Compare(), stream)); - delete data_h; - delete expected_h; + delete[] data_h; + delete[] expected_h; } TEST(labelTest, Classlabels) diff --git a/cpp/test/label/merge_labels.cu b/cpp/tests/label/merge_labels.cu similarity index 100% rename from cpp/test/label/merge_labels.cu rename to cpp/tests/label/merge_labels.cu diff --git a/cpp/test/lap/lap.cu b/cpp/tests/lap/lap.cu similarity index 100% rename from cpp/test/lap/lap.cu rename to cpp/tests/lap/lap.cu diff --git a/cpp/test/linalg/add.cu b/cpp/tests/linalg/add.cu similarity index 100% rename from cpp/test/linalg/add.cu rename to cpp/tests/linalg/add.cu diff --git a/cpp/test/linalg/add.cuh b/cpp/tests/linalg/add.cuh similarity index 100% rename from cpp/test/linalg/add.cuh rename to cpp/tests/linalg/add.cuh diff --git a/cpp/test/linalg/axpy.cu b/cpp/tests/linalg/axpy.cu similarity index 100% rename from cpp/test/linalg/axpy.cu rename to cpp/tests/linalg/axpy.cu diff --git a/cpp/test/linalg/binary_op.cu b/cpp/tests/linalg/binary_op.cu similarity index 100% rename from cpp/test/linalg/binary_op.cu rename to cpp/tests/linalg/binary_op.cu diff --git a/cpp/test/linalg/binary_op.cuh b/cpp/tests/linalg/binary_op.cuh similarity index 100% rename from cpp/test/linalg/binary_op.cuh rename to cpp/tests/linalg/binary_op.cuh diff --git a/cpp/test/linalg/cholesky_r1.cu b/cpp/tests/linalg/cholesky_r1.cu similarity index 99% rename from cpp/test/linalg/cholesky_r1.cu rename to cpp/tests/linalg/cholesky_r1.cu index f87e07402f..e506c89a79 100644 --- a/cpp/test/linalg/cholesky_r1.cu +++ b/cpp/tests/linalg/cholesky_r1.cu @@ -170,4 +170,4 @@ TYPED_TEST(CholeskyR1Test, update) { this->testR1Update(); } TYPED_TEST(CholeskyR1Test, throwError) { this->testR1Error(); } }; // namespace linalg -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/test/linalg/coalesced_reduction.cu b/cpp/tests/linalg/coalesced_reduction.cu similarity index 100% rename from cpp/test/linalg/coalesced_reduction.cu rename to cpp/tests/linalg/coalesced_reduction.cu diff --git a/cpp/test/linalg/divide.cu b/cpp/tests/linalg/divide.cu similarity index 100% rename from cpp/test/linalg/divide.cu rename to cpp/tests/linalg/divide.cu diff --git a/cpp/test/linalg/dot.cu b/cpp/tests/linalg/dot.cu similarity index 100% rename from cpp/test/linalg/dot.cu rename to cpp/tests/linalg/dot.cu diff --git a/cpp/test/linalg/eig.cu b/cpp/tests/linalg/eig.cu similarity index 100% rename from cpp/test/linalg/eig.cu rename to cpp/tests/linalg/eig.cu diff --git a/cpp/test/linalg/eig_sel.cu b/cpp/tests/linalg/eig_sel.cu similarity index 100% rename from cpp/test/linalg/eig_sel.cu rename to cpp/tests/linalg/eig_sel.cu diff --git a/cpp/test/linalg/eigen_solvers.cu b/cpp/tests/linalg/eigen_solvers.cu similarity index 100% rename from cpp/test/linalg/eigen_solvers.cu rename to cpp/tests/linalg/eigen_solvers.cu diff --git a/cpp/test/linalg/eltwise.cu b/cpp/tests/linalg/eltwise.cu similarity index 100% rename from cpp/test/linalg/eltwise.cu rename to cpp/tests/linalg/eltwise.cu diff --git a/cpp/test/linalg/gemm_layout.cu b/cpp/tests/linalg/gemm_layout.cu similarity index 100% rename from cpp/test/linalg/gemm_layout.cu rename to cpp/tests/linalg/gemm_layout.cu diff --git a/cpp/test/linalg/gemv.cu b/cpp/tests/linalg/gemv.cu similarity index 100% rename from cpp/test/linalg/gemv.cu rename to cpp/tests/linalg/gemv.cu diff --git a/cpp/test/linalg/map.cu b/cpp/tests/linalg/map.cu similarity index 100% rename from cpp/test/linalg/map.cu rename to cpp/tests/linalg/map.cu diff --git a/cpp/test/linalg/map_then_reduce.cu b/cpp/tests/linalg/map_then_reduce.cu similarity index 100% rename from cpp/test/linalg/map_then_reduce.cu rename to cpp/tests/linalg/map_then_reduce.cu diff --git a/cpp/test/linalg/matrix_vector.cu b/cpp/tests/linalg/matrix_vector.cu similarity index 100% rename from cpp/test/linalg/matrix_vector.cu rename to cpp/tests/linalg/matrix_vector.cu diff --git a/cpp/test/linalg/matrix_vector_op.cu b/cpp/tests/linalg/matrix_vector_op.cu similarity index 100% rename from cpp/test/linalg/matrix_vector_op.cu rename to cpp/tests/linalg/matrix_vector_op.cu diff --git a/cpp/test/linalg/matrix_vector_op.cuh b/cpp/tests/linalg/matrix_vector_op.cuh similarity index 100% rename from cpp/test/linalg/matrix_vector_op.cuh rename to cpp/tests/linalg/matrix_vector_op.cuh diff --git a/cpp/test/linalg/mean_squared_error.cu b/cpp/tests/linalg/mean_squared_error.cu similarity index 100% rename from cpp/test/linalg/mean_squared_error.cu rename to cpp/tests/linalg/mean_squared_error.cu diff --git a/cpp/test/linalg/multiply.cu b/cpp/tests/linalg/multiply.cu similarity index 100% rename from cpp/test/linalg/multiply.cu rename to cpp/tests/linalg/multiply.cu diff --git a/cpp/test/linalg/norm.cu b/cpp/tests/linalg/norm.cu similarity index 100% rename from cpp/test/linalg/norm.cu rename to cpp/tests/linalg/norm.cu diff --git a/cpp/test/linalg/normalize.cu b/cpp/tests/linalg/normalize.cu similarity index 100% rename from cpp/test/linalg/normalize.cu rename to cpp/tests/linalg/normalize.cu diff --git a/cpp/test/linalg/power.cu b/cpp/tests/linalg/power.cu similarity index 100% rename from cpp/test/linalg/power.cu rename to cpp/tests/linalg/power.cu diff --git a/cpp/test/linalg/randomized_svd.cu b/cpp/tests/linalg/randomized_svd.cu similarity index 100% rename from cpp/test/linalg/randomized_svd.cu rename to cpp/tests/linalg/randomized_svd.cu diff --git a/cpp/test/linalg/reduce.cu b/cpp/tests/linalg/reduce.cu similarity index 100% rename from cpp/test/linalg/reduce.cu rename to cpp/tests/linalg/reduce.cu diff --git a/cpp/test/linalg/reduce.cuh b/cpp/tests/linalg/reduce.cuh similarity index 100% rename from cpp/test/linalg/reduce.cuh rename to cpp/tests/linalg/reduce.cuh diff --git a/cpp/test/linalg/reduce_cols_by_key.cu b/cpp/tests/linalg/reduce_cols_by_key.cu similarity index 100% rename from cpp/test/linalg/reduce_cols_by_key.cu rename to cpp/tests/linalg/reduce_cols_by_key.cu diff --git a/cpp/test/linalg/reduce_rows_by_key.cu b/cpp/tests/linalg/reduce_rows_by_key.cu similarity index 100% rename from cpp/test/linalg/reduce_rows_by_key.cu rename to cpp/tests/linalg/reduce_rows_by_key.cu diff --git a/cpp/test/linalg/rsvd.cu b/cpp/tests/linalg/rsvd.cu similarity index 100% rename from cpp/test/linalg/rsvd.cu rename to cpp/tests/linalg/rsvd.cu diff --git a/cpp/test/linalg/sqrt.cu b/cpp/tests/linalg/sqrt.cu similarity index 100% rename from cpp/test/linalg/sqrt.cu rename to cpp/tests/linalg/sqrt.cu diff --git a/cpp/test/linalg/strided_reduction.cu b/cpp/tests/linalg/strided_reduction.cu similarity index 100% rename from cpp/test/linalg/strided_reduction.cu rename to cpp/tests/linalg/strided_reduction.cu diff --git a/cpp/test/linalg/subtract.cu b/cpp/tests/linalg/subtract.cu similarity index 100% rename from cpp/test/linalg/subtract.cu rename to cpp/tests/linalg/subtract.cu diff --git a/cpp/test/linalg/svd.cu b/cpp/tests/linalg/svd.cu similarity index 100% rename from cpp/test/linalg/svd.cu rename to cpp/tests/linalg/svd.cu diff --git a/cpp/test/linalg/ternary_op.cu b/cpp/tests/linalg/ternary_op.cu similarity index 100% rename from cpp/test/linalg/ternary_op.cu rename to cpp/tests/linalg/ternary_op.cu diff --git a/cpp/test/linalg/transpose.cu b/cpp/tests/linalg/transpose.cu similarity index 100% rename from cpp/test/linalg/transpose.cu rename to cpp/tests/linalg/transpose.cu diff --git a/cpp/test/linalg/unary_op.cu b/cpp/tests/linalg/unary_op.cu similarity index 100% rename from cpp/test/linalg/unary_op.cu rename to cpp/tests/linalg/unary_op.cu diff --git a/cpp/test/linalg/unary_op.cuh b/cpp/tests/linalg/unary_op.cuh similarity index 100% rename from cpp/test/linalg/unary_op.cuh rename to cpp/tests/linalg/unary_op.cuh diff --git a/cpp/test/matrix/argmax.cu b/cpp/tests/matrix/argmax.cu similarity index 99% rename from cpp/test/matrix/argmax.cu rename to cpp/tests/matrix/argmax.cu index cb3fd4a3fb..c0cf85cd38 100644 --- a/cpp/test/matrix/argmax.cu +++ b/cpp/tests/matrix/argmax.cu @@ -110,4 +110,4 @@ INSTANTIATE_TEST_SUITE_P(ArgMaxTest, ArgMaxTestF, ::testing::ValuesIn(inputsf)); INSTANTIATE_TEST_SUITE_P(ArgMaxTest, ArgMaxTestD, ::testing::ValuesIn(inputsd)); } // namespace matrix -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/test/matrix/argmin.cu b/cpp/tests/matrix/argmin.cu similarity index 99% rename from cpp/test/matrix/argmin.cu rename to cpp/tests/matrix/argmin.cu index 060b4a78db..f0cacacf3a 100644 --- a/cpp/test/matrix/argmin.cu +++ b/cpp/tests/matrix/argmin.cu @@ -110,4 +110,4 @@ INSTANTIATE_TEST_SUITE_P(ArgMinTest, ArgMinTestF, ::testing::ValuesIn(inputsf)); INSTANTIATE_TEST_SUITE_P(ArgMinTest, ArgMinTestD, ::testing::ValuesIn(inputsd)); } // namespace matrix -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/test/matrix/columnSort.cu b/cpp/tests/matrix/columnSort.cu similarity index 100% rename from cpp/test/matrix/columnSort.cu rename to cpp/tests/matrix/columnSort.cu diff --git a/cpp/test/matrix/diagonal.cu b/cpp/tests/matrix/diagonal.cu similarity index 99% rename from cpp/test/matrix/diagonal.cu rename to cpp/tests/matrix/diagonal.cu index c6e1f1a0d2..0a1f2af825 100644 --- a/cpp/test/matrix/diagonal.cu +++ b/cpp/tests/matrix/diagonal.cu @@ -116,4 +116,4 @@ INSTANTIATE_TEST_SUITE_P(DiagonalTest, DiagonalTestF, ::testing::ValuesIn(inputs INSTANTIATE_TEST_SUITE_P(DiagonalTest, DiagonalTestD, ::testing::ValuesIn(inputsd)); } // namespace matrix -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/test/matrix/eye.cu b/cpp/tests/matrix/eye.cu similarity index 100% rename from cpp/test/matrix/eye.cu rename to cpp/tests/matrix/eye.cu diff --git a/cpp/test/matrix/gather.cu b/cpp/tests/matrix/gather.cu similarity index 99% rename from cpp/test/matrix/gather.cu rename to cpp/tests/matrix/gather.cu index 4c13d0c1e9..f62805b2b8 100644 --- a/cpp/test/matrix/gather.cu +++ b/cpp/tests/matrix/gather.cu @@ -246,4 +246,4 @@ GATHER_TEST((GatherTest), GATHER_TEST((GatherTest), GatherInplaceTestFI64I64, inplace_inputs_i64); -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/test/matrix/linewise_op.cu b/cpp/tests/matrix/linewise_op.cu similarity index 100% rename from cpp/test/matrix/linewise_op.cu rename to cpp/tests/matrix/linewise_op.cu diff --git a/cpp/test/matrix/math.cu b/cpp/tests/matrix/math.cu similarity index 100% rename from cpp/test/matrix/math.cu rename to cpp/tests/matrix/math.cu diff --git a/cpp/test/matrix/matrix.cu b/cpp/tests/matrix/matrix.cu similarity index 100% rename from cpp/test/matrix/matrix.cu rename to cpp/tests/matrix/matrix.cu diff --git a/cpp/test/matrix/norm.cu b/cpp/tests/matrix/norm.cu similarity index 100% rename from cpp/test/matrix/norm.cu rename to cpp/tests/matrix/norm.cu diff --git a/cpp/test/matrix/reverse.cu b/cpp/tests/matrix/reverse.cu similarity index 100% rename from cpp/test/matrix/reverse.cu rename to cpp/tests/matrix/reverse.cu diff --git a/cpp/test/matrix/sample_rows.cu b/cpp/tests/matrix/sample_rows.cu similarity index 100% rename from cpp/test/matrix/sample_rows.cu rename to cpp/tests/matrix/sample_rows.cu diff --git a/cpp/test/matrix/scatter.cu b/cpp/tests/matrix/scatter.cu similarity index 99% rename from cpp/test/matrix/scatter.cu rename to cpp/tests/matrix/scatter.cu index 7f478c7b93..f539b9759a 100644 --- a/cpp/test/matrix/scatter.cu +++ b/cpp/tests/matrix/scatter.cu @@ -140,4 +140,4 @@ const std::vector> inputs_i64 = SCATTER_TEST((ScatterTest), ScatterTestFI32, inputs_i32); SCATTER_TEST((ScatterTest), ScatterTestFI64, inputs_i64); -} // end namespace raft \ No newline at end of file +} // end namespace raft diff --git a/cpp/test/matrix/select_k.cu b/cpp/tests/matrix/select_k.cu similarity index 100% rename from cpp/test/matrix/select_k.cu rename to cpp/tests/matrix/select_k.cu diff --git a/cpp/test/matrix/select_k.cuh b/cpp/tests/matrix/select_k.cuh similarity index 100% rename from cpp/test/matrix/select_k.cuh rename to cpp/tests/matrix/select_k.cuh diff --git a/cpp/test/matrix/select_large_k.cu b/cpp/tests/matrix/select_large_k.cu similarity index 100% rename from cpp/test/matrix/select_large_k.cu rename to cpp/tests/matrix/select_large_k.cu diff --git a/cpp/test/matrix/slice.cu b/cpp/tests/matrix/slice.cu similarity index 100% rename from cpp/test/matrix/slice.cu rename to cpp/tests/matrix/slice.cu diff --git a/cpp/test/matrix/triangular.cu b/cpp/tests/matrix/triangular.cu similarity index 100% rename from cpp/test/matrix/triangular.cu rename to cpp/tests/matrix/triangular.cu diff --git a/cpp/test/mr/device/buffer.cpp b/cpp/tests/mr/device/buffer.cpp similarity index 99% rename from cpp/test/mr/device/buffer.cpp rename to cpp/tests/mr/device/buffer.cpp index d14aa09b7a..3d5652a591 100644 --- a/cpp/test/mr/device/buffer.cpp +++ b/cpp/tests/mr/device/buffer.cpp @@ -92,4 +92,4 @@ TEST(Raft, DeviceBufferZeroResize) } // namespace device } // namespace mr -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/test/mr/host/buffer.cpp b/cpp/tests/mr/host/buffer.cpp similarity index 99% rename from cpp/test/mr/host/buffer.cpp rename to cpp/tests/mr/host/buffer.cpp index 5688ff6376..792160eb89 100644 --- a/cpp/test/mr/host/buffer.cpp +++ b/cpp/tests/mr/host/buffer.cpp @@ -69,4 +69,4 @@ TEST(Raft, DeviceToHostBuffer) } // namespace host } // namespace mr -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/test/neighbors/ball_cover.cu b/cpp/tests/neighbors/ball_cover.cu similarity index 100% rename from cpp/test/neighbors/ball_cover.cu rename to cpp/tests/neighbors/ball_cover.cu diff --git a/cpp/test/neighbors/epsilon_neighborhood.cu b/cpp/tests/neighbors/epsilon_neighborhood.cu similarity index 100% rename from cpp/test/neighbors/epsilon_neighborhood.cu rename to cpp/tests/neighbors/epsilon_neighborhood.cu diff --git a/cpp/test/neighbors/haversine.cu b/cpp/tests/neighbors/haversine.cu similarity index 100% rename from cpp/test/neighbors/haversine.cu rename to cpp/tests/neighbors/haversine.cu diff --git a/cpp/test/neighbors/knn_utils.cuh b/cpp/tests/neighbors/knn_utils.cuh similarity index 100% rename from cpp/test/neighbors/knn_utils.cuh rename to cpp/tests/neighbors/knn_utils.cuh diff --git a/cpp/test/neighbors/spatial_data.h b/cpp/tests/neighbors/spatial_data.h similarity index 98% rename from cpp/test/neighbors/spatial_data.h rename to cpp/tests/neighbors/spatial_data.h index d71b47cf1e..b4352f706d 100644 --- a/cpp/test/neighbors/spatial_data.h +++ b/cpp/tests/neighbors/spatial_data.h @@ -35,4 +35,4 @@ std::vector spatial_data = { 31.968599, -99.901813, 39.32098, -111.093731, 37.431573, -78.656894, 44.558803, -72.577841, 47.751074, -120.740139, 43.78444, -88.787868, 38.597626, -80.454903, 43.075968, -107.290284}; }; // namespace spatial -}; // namespace raft \ No newline at end of file +}; // namespace raft diff --git a/cpp/test/random/excess_sampling.cu b/cpp/tests/random/excess_sampling.cu similarity index 100% rename from cpp/test/random/excess_sampling.cu rename to cpp/tests/random/excess_sampling.cu diff --git a/cpp/test/random/make_blobs.cu b/cpp/tests/random/make_blobs.cu similarity index 100% rename from cpp/test/random/make_blobs.cu rename to cpp/tests/random/make_blobs.cu diff --git a/cpp/test/random/make_regression.cu b/cpp/tests/random/make_regression.cu similarity index 100% rename from cpp/test/random/make_regression.cu rename to cpp/tests/random/make_regression.cu diff --git a/cpp/test/random/multi_variable_gaussian.cu b/cpp/tests/random/multi_variable_gaussian.cu similarity index 100% rename from cpp/test/random/multi_variable_gaussian.cu rename to cpp/tests/random/multi_variable_gaussian.cu diff --git a/cpp/test/random/permute.cu b/cpp/tests/random/permute.cu similarity index 100% rename from cpp/test/random/permute.cu rename to cpp/tests/random/permute.cu diff --git a/cpp/test/random/rmat_rectangular_generator.cu b/cpp/tests/random/rmat_rectangular_generator.cu similarity index 79% rename from cpp/test/random/rmat_rectangular_generator.cu rename to cpp/tests/random/rmat_rectangular_generator.cu index 8d668f7a8a..10c00051b6 100644 --- a/cpp/test/random/rmat_rectangular_generator.cu +++ b/cpp/tests/random/rmat_rectangular_generator.cu @@ -155,10 +155,10 @@ RAFT_KERNEL compute_hist( size_t idx = (threadIdx.x + blockIdx.x * blockDim.x) * 2; if (idx + 1 < len) { auto src = out[idx], dst = out[idx + 1]; - for (size_t j = 0; j < max_scale; ++j) { - bool src_bit = j < r_scale ? src & (1 << (r_scale - j - 1)) : 0; - bool dst_bit = j < c_scale ? dst & (1 << (c_scale - j - 1)) : 0; - auto idx = j * 4 + src_bit * 2 + dst_bit; + for (size_t bit_pos = 0; bit_pos < max_scale; ++bit_pos) { + bool src_bit = bit_pos < r_scale ? src & (1 << bit_pos) : 0; + bool dst_bit = bit_pos < c_scale ? dst & (1 << bit_pos) : 0; + auto idx = bit_pos * 4 + src_bit * 2 + dst_bit; atomicAdd(hist + idx, 1); } } @@ -393,11 +393,101 @@ const std::vector inputs = { {18, 16, 200000, false, 456789ULL, TOLERANCE}, {18, 16, 200000, true, 456789ULL, TOLERANCE}}; +struct RmatForcedOutputs { + size_t r_scale; + size_t c_scale; + size_t r_node_id; + size_t c_node_id; +}; + +class RmatGenForceTest : public ::testing::TestWithParam { + public: + RmatGenForceTest() + : handle{}, + stream{resource::get_cuda_stream(handle)}, + params{::testing::TestWithParam::GetParam()}, + out{2, stream}, + out_src{1, stream}, + out_dst{1, stream}, + theta{0, stream}, + h_theta{}, + state{0, GeneratorType::GenPC}, + max_scale(std::max(params.r_scale, params.c_scale)) + { + theta.resize(4 * max_scale, stream); + h_theta.resize(theta.size(), 0.f); + for (size_t bit_pos = 0; bit_pos < max_scale; ++bit_pos) { + size_t row_bit = ((params.r_node_id & (1 << bit_pos)) != 0); + size_t col_bit = ((params.c_node_id & (1 << bit_pos)) != 0); + + // now force theta for bit -- 2x2 matrix row major + h_theta[4 * bit_pos + row_bit * 2 + col_bit] = 1.f; + } + + raft::update_device(theta.data(), h_theta.data(), max_scale * 4, stream); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + } + + protected: + void SetUp() override + { + rmat_rectangular_gen(out.data(), + out_src.data(), + out_dst.data(), + theta.data(), + params.r_scale, + params.c_scale, + size_t(1), + stream, + state); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + } + + void validate() + { + std::vector h_out(2, size_t(0)); + raft::update_host(h_out.data(), out.data(), 2, stream); + RAFT_CUDA_TRY(cudaGetLastError()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + + std::vector h_out_expect; + h_out_expect.push_back(params.r_node_id); + h_out_expect.push_back(params.c_node_id); + + ASSERT_TRUE(hostVecMatch(h_out_expect, h_out, raft::Compare())); + } + + protected: + raft::resources handle; + cudaStream_t stream; + + RmatForcedOutputs params; + size_t max_scale; + std::vector h_theta; + rmm::device_uvector out, out_src, out_dst; + rmm::device_uvector theta; + RngState state; +}; + +const std::vector forcedInputs = {{16, 16, 12425, 1233}, + {16, 16, 12, 424}, + {5, 5, 15, 15}, + {5, 6, 15, 15}, + {5, 15, 15, 15}, + {6, 5, 15, 15}, + {15, 5, 15, 15}, + {32, 16, 1253163, 60000}, + {16, 16, 12, 0}, + {16, 16, 0, 1255}}; + TEST_P(RmatGenTest, Result) { validate(); } INSTANTIATE_TEST_SUITE_P(RmatGenTests, RmatGenTest, ::testing::ValuesIn(inputs)); TEST_P(RmatGenMdspanTest, Result) { validate(); } INSTANTIATE_TEST_SUITE_P(RmatGenMdspanTests, RmatGenMdspanTest, ::testing::ValuesIn(inputs)); +TEST_P(RmatGenForceTest, Result) { validate(); } +INSTANTIATE_TEST_SUITE_P(RmatGenForceTests, RmatGenForceTest, ::testing::ValuesIn(forcedInputs)); + } // namespace random } // namespace raft diff --git a/cpp/test/random/rng.cu b/cpp/tests/random/rng.cu similarity index 99% rename from cpp/test/random/rng.cu rename to cpp/tests/random/rng.cu index a37f150d4c..172f94ae50 100644 --- a/cpp/test/random/rng.cu +++ b/cpp/tests/random/rng.cu @@ -407,8 +407,7 @@ TEST(Rng, MeanError) RngState r(seed, rtype); normal(handle, r, data.data(), len, 3.3f, 0.23f); // uniform(r, data, len, -1.0, 2.0); - raft::stats::mean( - mean_result.data(), data.data(), num_samples, num_experiments, false, false, stream); + raft::stats::mean(mean_result.data(), data.data(), num_samples, num_experiments, false, stream); raft::stats::stddev(std_result.data(), data.data(), mean_result.data(), diff --git a/cpp/test/random/rng_discrete.cu b/cpp/tests/random/rng_discrete.cu similarity index 100% rename from cpp/test/random/rng_discrete.cu rename to cpp/tests/random/rng_discrete.cu diff --git a/cpp/test/random/rng_int.cu b/cpp/tests/random/rng_int.cu similarity index 100% rename from cpp/test/random/rng_int.cu rename to cpp/tests/random/rng_int.cu diff --git a/cpp/test/random/rng_pcg_host_api.cu b/cpp/tests/random/rng_pcg_host_api.cu similarity index 100% rename from cpp/test/random/rng_pcg_host_api.cu rename to cpp/tests/random/rng_pcg_host_api.cu diff --git a/cpp/test/random/sample_without_replacement.cu b/cpp/tests/random/sample_without_replacement.cu similarity index 100% rename from cpp/test/random/sample_without_replacement.cu rename to cpp/tests/random/sample_without_replacement.cu diff --git a/cpp/test/sparse/add.cu b/cpp/tests/sparse/add.cu similarity index 100% rename from cpp/test/sparse/add.cu rename to cpp/tests/sparse/add.cu diff --git a/cpp/test/sparse/convert_coo.cu b/cpp/tests/sparse/convert_coo.cu similarity index 100% rename from cpp/test/sparse/convert_coo.cu rename to cpp/tests/sparse/convert_coo.cu diff --git a/cpp/test/sparse/convert_csr.cu b/cpp/tests/sparse/convert_csr.cu similarity index 57% rename from cpp/test/sparse/convert_csr.cu rename to cpp/tests/sparse/convert_csr.cu index 1cd49b0bbd..d74296a267 100644 --- a/cpp/test/sparse/convert_csr.cu +++ b/cpp/tests/sparse/convert_csr.cu @@ -17,6 +17,7 @@ #include "../test_utils.cuh" #include +#include #include #include #include @@ -249,7 +250,7 @@ class BitmapToCSRTest : public ::testing::TestWithParam& bitmap) { index_t total = static_cast(m * n); - index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t num_ones = static_cast((total * 1.0f) * (1.0f - sparsity)); index_t res = num_ones; for (auto& item : bitmap) { @@ -257,7 +258,7 @@ class BitmapToCSRTest : public ::testing::TestWithParam dis(0, total - 1); while (num_ones > 0) { @@ -318,8 +319,8 @@ class BitmapToCSRTest : public ::testing::TestWithParam cols1(col_indices1.begin() + start_idx, col_indices1.begin() + end_idx); - std::vector cols2(col_indices2.begin() + start_idx, col_indices2.begin() + end_idx); + std::vector cols1(col_indices1.begin() + start_idx, col_indices1.begin() + end_idx); + std::vector cols2(col_indices2.begin() + start_idx, col_indices2.begin() + end_idx); std::sort(cols1.begin(), cols1.end()); std::sort(cols2.begin(), cols2.end()); @@ -370,7 +371,7 @@ class BitmapToCSRTest : public ::testing::TestWithParam(handle, params.n_rows, params.n_cols, nnz); auto csr_view = csr.structure_view(); - convert::bitmap_to_csr(handle, bitmap, csr); + bitmap.to_csr(handle, csr); raft::copy(indptr_d.data(), csr_view.get_indptr().data(), indptr_d.size(), stream); raft::copy(indices_d.data(), csr_view.get_indices().data(), indices_d.size(), stream); raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); @@ -379,7 +380,7 @@ class BitmapToCSRTest : public ::testing::TestWithParam(handle, csr_view); - convert::bitmap_to_csr(handle, bitmap, csr); + bitmap.to_csr(handle, csr); raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); } resource::sync_stream(handle); @@ -396,9 +397,13 @@ class BitmapToCSRTest : public ::testing::TestWithParam( - values_expected_d.data(), values_d.data(), nnz, raft::Compare(), stream)); + EXPECT_TRUE(csr_compare(indptr_h, indices_h, indptr_expected_h, indices_expected_h)) + << " n_row: " << params.n_rows << ", n_cols: " << params.n_cols << ", nnz: " << nnz + << ", random_number: " << random_number; + EXPECT_TRUE(raft::devArrMatch( + values_expected_d.data(), values_d.data(), nnz, raft::Compare(), stream)) + << " n_row: " << params.n_rows << ", n_cols: " << params.n_cols << ", nnz: " << nnz + << ", random_number: " << random_number; } protected: @@ -418,6 +423,8 @@ class BitmapToCSRTest : public ::testing::TestWithParam indptr_expected_d; rmm::device_uvector indices_expected_d; rmm::device_uvector values_expected_d; + + unsigned int random_number; }; using BitmapToCSRTestI = BitmapToCSRTest; @@ -426,8 +433,295 @@ TEST_P(BitmapToCSRTestI, Result) { Run(); } using BitmapToCSRTestL = BitmapToCSRTest; TEST_P(BitmapToCSRTestL, Result) { Run(); } +using BitmapToCSRTestLOnLargeSize = BitmapToCSRTest; +TEST_P(BitmapToCSRTestLOnLargeSize, Result) { Run(); } + template const std::vector> bitmaptocsr_inputs = { + {0, 0, 0.8, false}, + {10, 32, 0.6, false}, + {10, 3, 0.8, false}, + {32, 1024, 0.6, false}, + {1024, 1048576, 0.99, false}, + {1024, 1024, 0.6, false}, + {64 * 1024 + 10, 2, 0.7, false}, // 64K + 10 is slightly over maximum of blockDim.y + {16, 16, 0.7, false}, // No peeling-remainder + {17, 16, 0.7, false}, // Check peeling-remainder + {18, 16, 0.7, false}, // Check peeling-remainder + {32 + 9, 33, 0.8, false}, // Check peeling-remainder + {2, 33, 0.8, false}, // Check peeling-remainder + {0, 0, 0.8, true}, + {10, 32, 0.6, true}, + {10, 3, 0.8, true}, + {32, 1024, 0.6, true}, + {1024, 1048576, 0.99, true}, + {1024, 1024, 0.6, true}, + {64 * 1024 + 10, 2, 0.7, true}, // 64K + 10 is slightly over maximum of blockDim.y + {16, 16, 0.7, true}, // No peeling-remainder + {17, 16, 0.7, true}, // Check peeling-remainder + {18, 16, 0.7, true}, // Check peeling-remainder + {32 + 9, 33, 0.8, true}, // Check peeling-remainder + {2, 33, 0.8, true}, // Check peeling-remainder +}; + +template +const std::vector> bitmaptocsr_large_inputs = { + {100, 100000000, 0.99, true}, {100, 100000000, 0.95, false}, {100, 100000000 + 17, 0.95, false}}; + +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitmapToCSRTestI, + ::testing::ValuesIn(bitmaptocsr_inputs)); +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitmapToCSRTestL, + ::testing::ValuesIn(bitmaptocsr_inputs)); +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitmapToCSRTestLOnLargeSize, + ::testing::ValuesIn(bitmaptocsr_large_inputs)); + +/******************************** bitset to csr ********************************/ + +template +struct BitsetToCSRInputs { + index_t n_repeat; + index_t n_cols; + float sparsity; + bool owning; +}; + +template +class BitsetToCSRTest : public ::testing::TestWithParam> { + public: + BitsetToCSRTest() + : stream(resource::get_cuda_stream(handle)), + params(::testing::TestWithParam>::GetParam()), + bitset_d(0, stream), + indices_d(0, stream), + indptr_d(0, stream), + values_d(0, stream), + indptr_expected_d(0, stream), + indices_expected_d(0, stream), + values_expected_d(0, stream) + { + } + + protected: + void repeat_cpu_bitset(std::vector& input, + size_t input_bits, + size_t repeat, + std::vector& output) + { + const size_t output_bits = input_bits * repeat; + const size_t output_units = (output_bits + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8); + + std::memset(output.data(), 0, output_units * sizeof(bitset_t)); + + size_t output_bit_index = 0; + + for (size_t r = 0; r < repeat; ++r) { + for (size_t i = 0; i < input_bits; ++i) { + size_t input_unit_index = i / (sizeof(bitset_t) * 8); + size_t input_bit_offset = i % (sizeof(bitset_t) * 8); + bool bit = (input[input_unit_index] >> input_bit_offset) & 1; + + size_t output_unit_index = output_bit_index / (sizeof(bitset_t) * 8); + size_t output_bit_offset = output_bit_index % (sizeof(bitset_t) * 8); + + output[output_unit_index] |= (static_cast(bit) << output_bit_offset); + + ++output_bit_index; + } + } + } + + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitset) + { + index_t total = static_cast(m * n); + index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t res = num_ones; + + for (auto& item : bitset) { + item = static_cast(0); + } + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, total - 1); + + while (num_ones > 0) { + index_t index = dis(gen); + + bitset_t& element = bitset[index / (8 * sizeof(bitset_t))]; + index_t bit_position = index % (8 * sizeof(bitset_t)); + + if (((element >> bit_position) & 1) == 0) { + element |= (static_cast(1) << bit_position); + num_ones--; + } + } + return res; + } + + void cpu_convert_to_csr(std::vector& bitset, + index_t rows, + index_t cols, + std::vector& indices, + std::vector& indptr) + { + index_t offset_indptr = 0; + index_t offset_values = 0; + indptr[offset_indptr++] = 0; + + index_t index = 0; + bitset_t element = 0; + index_t bit_position = 0; + + for (index_t i = 0; i < rows; ++i) { + for (index_t j = 0; j < cols; ++j) { + index = i * cols + j; + element = bitset[index / (8 * sizeof(bitset_t))]; + bit_position = index % (8 * sizeof(bitset_t)); + + if (((element >> bit_position) & 1)) { + indices[offset_values] = static_cast(j); + offset_values++; + } + } + indptr[offset_indptr++] = static_cast(offset_values); + } + } + + bool csr_compare(const std::vector& row_ptrs1, + const std::vector& col_indices1, + const std::vector& row_ptrs2, + const std::vector& col_indices2) + { + if (row_ptrs1.size() != row_ptrs2.size()) { return false; } + + if (col_indices1.size() != col_indices2.size()) { return false; } + + if (!std::equal(row_ptrs1.begin(), row_ptrs1.end(), row_ptrs2.begin())) { return false; } + + for (size_t i = 0; i < row_ptrs1.size() - 1; ++i) { + size_t start_idx = row_ptrs1[i]; + size_t end_idx = row_ptrs1[i + 1]; + + std::vector cols1(col_indices1.begin() + start_idx, col_indices1.begin() + end_idx); + std::vector cols2(col_indices2.begin() + start_idx, col_indices2.begin() + end_idx); + + std::sort(cols1.begin(), cols1.end()); + std::sort(cols2.begin(), cols2.end()); + + if (cols1 != cols2) { return false; } + } + + return true; + } + + void SetUp() override + { + index_t element = raft::ceildiv(1 * params.n_cols, index_t(sizeof(bitset_t) * 8)); + std::vector bitset_h(element); + std::vector bitset_repeat_h(element * params.n_repeat); + + nnz = create_sparse_matrix(1, params.n_cols, params.sparsity, bitset_h); + + repeat_cpu_bitset(bitset_h, size_t(params.n_cols), size_t(params.n_repeat), bitset_repeat_h); + nnz *= params.n_repeat; + + std::vector indices_h(nnz); + std::vector indptr_h(params.n_repeat + 1); + + cpu_convert_to_csr(bitset_repeat_h, params.n_repeat, params.n_cols, indices_h, indptr_h); + + bitset_d.resize(bitset_h.size(), stream); + indptr_d.resize(params.n_repeat + 1, stream); + indices_d.resize(nnz, stream); + + indptr_expected_d.resize(params.n_repeat + 1, stream); + indices_expected_d.resize(nnz, stream); + values_expected_d.resize(nnz, stream); + + thrust::fill_n(resource::get_thrust_policy(handle), values_expected_d.data(), nnz, value_t{1}); + + values_d.resize(nnz, stream); + + update_device(indices_expected_d.data(), indices_h.data(), indices_h.size(), stream); + update_device(indptr_expected_d.data(), indptr_h.data(), indptr_h.size(), stream); + update_device(bitset_d.data(), bitset_h.data(), bitset_h.size(), stream); + + resource::sync_stream(handle); + } + + void Run() + { + auto bitset = raft::core::bitset_view(bitset_d.data(), params.n_cols); + + if (params.owning) { + auto csr = + raft::make_device_csr_matrix(handle, params.n_repeat, params.n_cols, nnz); + auto csr_view = csr.structure_view(); + + bitset.to_csr(handle, csr); + raft::copy(indptr_d.data(), csr_view.get_indptr().data(), indptr_d.size(), stream); + raft::copy(indices_d.data(), csr_view.get_indices().data(), indices_d.size(), stream); + raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); + } else { + auto csr_view = raft::make_device_compressed_structure_view( + indptr_d.data(), indices_d.data(), params.n_repeat, params.n_cols, nnz); + auto csr = raft::make_device_csr_matrix(handle, csr_view); + + bitset.to_csr(handle, csr); + raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); + } + resource::sync_stream(handle); + + std::vector indices_h(indices_expected_d.size(), 0); + std::vector indices_expected_h(indices_expected_d.size(), 0); + update_host(indices_h.data(), indices_d.data(), indices_h.size(), stream); + update_host(indices_expected_h.data(), indices_expected_d.data(), indices_h.size(), stream); + + std::vector indptr_h(indptr_expected_d.size(), 0); + std::vector indptr_expected_h(indptr_expected_d.size(), 0); + update_host(indptr_h.data(), indptr_d.data(), indptr_h.size(), stream); + update_host(indptr_expected_h.data(), indptr_expected_d.data(), indptr_h.size(), stream); + + resource::sync_stream(handle); + + ASSERT_TRUE(csr_compare(indptr_h, indices_h, indptr_expected_h, indices_expected_h)); + ASSERT_TRUE(raft::devArrMatch( + values_expected_d.data(), values_d.data(), nnz, raft::Compare(), stream)); + } + + protected: + raft::resources handle; + cudaStream_t stream; + + BitsetToCSRInputs params; + + rmm::device_uvector bitset_d; + + index_t nnz; + + rmm::device_uvector indptr_d; + rmm::device_uvector indices_d; + rmm::device_uvector values_d; + + rmm::device_uvector indptr_expected_d; + rmm::device_uvector indices_expected_d; + rmm::device_uvector values_expected_d; +}; + +using BitsetToCSRTestI = BitsetToCSRTest; +TEST_P(BitsetToCSRTestI, Result) { Run(); } + +using BitsetToCSRTestL = BitsetToCSRTest; +TEST_P(BitsetToCSRTestL, Result) { Run(); } + +using BitsetToCSRTestLOnLargeSize = BitsetToCSRTest; +TEST_P(BitsetToCSRTestLOnLargeSize, Result) { Run(); } + +template +const std::vector> bitsettocsr_inputs = { {0, 0, 0.2, false}, {10, 32, 0.4, false}, {10, 3, 0.2, false}, @@ -454,12 +748,19 @@ const std::vector> bitmaptocsr_inputs = { {2, 33, 0.2, true}, // Check peeling-remainder }; +template +const std::vector> bitsettocsr_large_inputs = { + {100, 100000000, 0.01, true}, {100, 100000000, 0.05, false}, {100, 100000000 + 17, 0.05, false}}; + INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, - BitmapToCSRTestI, - ::testing::ValuesIn(bitmaptocsr_inputs)); + BitsetToCSRTestI, + ::testing::ValuesIn(bitsettocsr_inputs)); INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, - BitmapToCSRTestL, - ::testing::ValuesIn(bitmaptocsr_inputs)); + BitsetToCSRTestL, + ::testing::ValuesIn(bitsettocsr_inputs)); +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitsetToCSRTestLOnLargeSize, + ::testing::ValuesIn(bitsettocsr_large_inputs)); } // namespace sparse } // namespace raft diff --git a/cpp/test/sparse/csr_row_slice.cu b/cpp/tests/sparse/csr_row_slice.cu similarity index 100% rename from cpp/test/sparse/csr_row_slice.cu rename to cpp/tests/sparse/csr_row_slice.cu diff --git a/cpp/test/sparse/csr_to_dense.cu b/cpp/tests/sparse/csr_to_dense.cu similarity index 100% rename from cpp/test/sparse/csr_to_dense.cu rename to cpp/tests/sparse/csr_to_dense.cu diff --git a/cpp/test/sparse/csr_transpose.cu b/cpp/tests/sparse/csr_transpose.cu similarity index 100% rename from cpp/test/sparse/csr_transpose.cu rename to cpp/tests/sparse/csr_transpose.cu diff --git a/cpp/test/sparse/degree.cu b/cpp/tests/sparse/degree.cu similarity index 100% rename from cpp/test/sparse/degree.cu rename to cpp/tests/sparse/degree.cu diff --git a/cpp/test/sparse/dist_coo_spmv.cu b/cpp/tests/sparse/dist_coo_spmv.cu similarity index 100% rename from cpp/test/sparse/dist_coo_spmv.cu rename to cpp/tests/sparse/dist_coo_spmv.cu diff --git a/cpp/test/sparse/distance.cu b/cpp/tests/sparse/distance.cu similarity index 100% rename from cpp/test/sparse/distance.cu rename to cpp/tests/sparse/distance.cu diff --git a/cpp/test/sparse/filter.cu b/cpp/tests/sparse/filter.cu similarity index 100% rename from cpp/test/sparse/filter.cu rename to cpp/tests/sparse/filter.cu diff --git a/cpp/test/sparse/masked_matmul.cu b/cpp/tests/sparse/masked_matmul.cu similarity index 75% rename from cpp/test/sparse/masked_matmul.cu rename to cpp/tests/sparse/masked_matmul.cu index f883beae32..5ee1677015 100644 --- a/cpp/test/sparse/masked_matmul.cu +++ b/cpp/tests/sparse/masked_matmul.cu @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -46,6 +46,8 @@ struct MaskedMatmulInputs { unsigned long long int seed; }; +enum class BitsLayout { Bitset, Bitmap }; + template struct sum_abs_op { __host__ __device__ value_t operator()(const value_t& x, const value_t& y) const @@ -87,7 +89,8 @@ bool isCuSparseVersionGreaterThan_12_0_1() template class MaskedMatmulTest @@ -98,7 +101,7 @@ class MaskedMatmulTest stream(resource::get_cuda_stream(handle)), a_data_d(0, resource::get_cuda_stream(handle)), b_data_d(0, resource::get_cuda_stream(handle)), - bitmap_d(0, resource::get_cuda_stream(handle)), + bits_d(0, resource::get_cuda_stream(handle)), c_indptr_d(0, resource::get_cuda_stream(handle)), c_indices_d(0, resource::get_cuda_stream(handle)), c_data_d(0, resource::get_cuda_stream(handle)), @@ -107,14 +110,14 @@ class MaskedMatmulTest } protected: - index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitmap) + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bits) { index_t total = static_cast(m * n); index_t num_ones = static_cast((total * 1.0f) * sparsity); index_t res = num_ones; - for (auto& item : bitmap) { - item = static_cast(0); + for (auto& item : bits) { + item = static_cast(0); } std::random_device rd; @@ -124,8 +127,8 @@ class MaskedMatmulTest while (num_ones > 0) { index_t index = dis(gen); - bitmap_t& element = bitmap[index / (8 * sizeof(bitmap_t))]; - index_t bit_position = index % (8 * sizeof(bitmap_t)); + bits_t& element = bits[index / (8 * sizeof(bits_t))]; + index_t bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1) == 0) { element |= (static_cast(1) << bit_position); @@ -135,7 +138,27 @@ class MaskedMatmulTest return res; } - void cpu_convert_to_csr(std::vector& bitmap, + void repeat_cpu_bitset_inplace(std::vector& inout, size_t input_bits, size_t repeat) + { + size_t output_bit_index = input_bits; + + for (size_t r = 0; r < repeat; ++r) { + for (size_t i = 0; i < input_bits; ++i) { + size_t input_unit_index = i / (sizeof(bits_t) * 8); + size_t input_bit_offset = i % (sizeof(bits_t) * 8); + bool bit = (inout[input_unit_index] >> input_bit_offset) & 1; + + size_t output_unit_index = output_bit_index / (sizeof(bits_t) * 8); + size_t output_bit_offset = output_bit_index % (sizeof(bits_t) * 8); + + inout[output_unit_index] |= (static_cast(bit) << output_bit_offset); + + ++output_bit_index; + } + } + } + + void cpu_convert_to_csr(std::vector& bits, index_t rows, index_t cols, std::vector& indices, @@ -146,14 +169,14 @@ class MaskedMatmulTest indptr[offset_indptr++] = 0; index_t index = 0; - bitmap_t element = 0; + bits_t element = 0; index_t bit_position = 0; for (index_t i = 0; i < rows; ++i) { for (index_t j = 0; j < cols; ++j) { index = i * cols + j; - element = bitmap[index / (8 * sizeof(bitmap_t))]; - bit_position = index % (8 * sizeof(bitmap_t)); + element = bits[index / (8 * sizeof(bits_t))]; + bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1)) { indices[offset_values] = static_cast(j); @@ -201,15 +224,17 @@ class MaskedMatmulTest index_t b_size = params.k * params.n; index_t c_size = params.m * params.n; - index_t element = raft::ceildiv(params.m * params.n, index_t(sizeof(bitmap_t) * 8)); - std::vector bitmap_h(element); + index_t element = raft::ceildiv(params.m * params.n, index_t(sizeof(bits_t) * 8)); + std::vector bits_h(element); + + std::memset(bits_h.data(), 0, bits_h.size() * sizeof(bits_t)); std::vector a_data_h(a_size); std::vector b_data_h(b_size); a_data_d.resize(a_size, stream); b_data_d.resize(b_size, stream); - bitmap_d.resize(bitmap_h.size(), stream); + bits_d.resize(bits_h.size(), stream); auto blobs_a_b = raft::make_device_matrix(handle, 1, a_size + b_size); auto labels = raft::make_device_vector(handle, 1); @@ -262,18 +287,27 @@ class MaskedMatmulTest resource::sync_stream(handle); - index_t c_true_nnz = create_sparse_matrix(params.m, params.n, params.sparsity, bitmap_h); + index_t c_true_nnz = 0; + if constexpr (bits_layout == BitsLayout::Bitmap) { + c_true_nnz = create_sparse_matrix(params.m, params.n, params.sparsity, bits_h); + } else if constexpr (bits_layout == BitsLayout::Bitset) { + c_true_nnz = create_sparse_matrix(1, params.n, params.sparsity, bits_h); + repeat_cpu_bitset_inplace(bits_h, params.n, params.m - 1); + c_true_nnz *= params.m; + } else { + GTEST_SKIP() << "Unsupported BitsLayout!"; + } std::vector c_indptr_h(params.m + 1); std::vector c_indices_h(c_true_nnz); std::vector c_data_h(c_true_nnz); - cpu_convert_to_csr(bitmap_h, params.m, params.n, c_indices_h, c_indptr_h); + cpu_convert_to_csr(bits_h, params.m, params.n, c_indices_h, c_indptr_h); c_data_d.resize(c_data_h.size(), stream); update_device(c_data_d.data(), c_data_h.data(), c_data_h.size(), stream); - update_device(bitmap_d.data(), bitmap_h.data(), bitmap_h.size(), stream); + update_device(bits_d.data(), bits_h.data(), bits_h.size(), stream); resource::sync_stream(handle); cpu_sddmm(a_data_h, b_data_h, c_data_h, c_indices_h, c_indptr_h, true, true); @@ -304,9 +338,6 @@ class MaskedMatmulTest auto B = raft::make_device_matrix_view(b_data_d.data(), params.n, params.k); - auto mask = - raft::core::bitmap_view(bitmap_d.data(), params.m, params.n); - auto c_structure = raft::make_device_compressed_structure_view( c_indptr_d.data(), c_indices_d.data(), @@ -316,7 +347,15 @@ class MaskedMatmulTest auto C = raft::make_device_csr_matrix_view(c_data_d.data(), c_structure); - raft::sparse::linalg::masked_matmul(handle, A, B, mask, C); + if constexpr (bits_layout == BitsLayout::Bitmap) { + auto mask = raft::core::bitmap_view(bits_d.data(), params.m, params.n); + raft::sparse::linalg::masked_matmul(handle, A, B, mask, C); + } else if constexpr (bits_layout == BitsLayout::Bitset) { + auto mask = raft::core::bitset_view(bits_d.data(), params.n); + raft::sparse::linalg::masked_matmul(handle, A, B, mask, C); + } else { + GTEST_SKIP() << "Unsupported BitsLayout!"; + } resource::sync_stream(handle); @@ -344,7 +383,7 @@ class MaskedMatmulTest rmm::device_uvector a_data_d; rmm::device_uvector b_data_d; - rmm::device_uvector bitmap_d; + rmm::device_uvector bits_d; rmm::device_uvector c_indptr_d; rmm::device_uvector c_indices_d; @@ -353,14 +392,23 @@ class MaskedMatmulTest rmm::device_uvector c_expected_data_d; }; -using MaskedMatmulTestF = MaskedMatmulTest; -TEST_P(MaskedMatmulTestF, Result) { Run(); } +using MaskedMatmulOnBitmapTestF = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitmapTestF, Result) { Run(); } + +using MaskedMatmulOnBitmapTestD = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitmapTestD, Result) { Run(); } -using MaskedMatmulTestD = MaskedMatmulTest; -TEST_P(MaskedMatmulTestD, Result) { Run(); } +using MaskedMatmulOnBitmapTestH = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitmapTestH, Result) { Run(); } -using MaskedMatmulTestH = MaskedMatmulTest; -TEST_P(MaskedMatmulTestH, Result) { Run(); } +using MaskedMatmulOnBitsetTestF = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitsetTestF, Result) { Run(); } + +using MaskedMatmulOnBitsetTestD = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitsetTestD, Result) { Run(); } + +using MaskedMatmulOnBitsetTestH = MaskedMatmulTest; +TEST_P(MaskedMatmulOnBitsetTestH, Result) { Run(); } const std::vector> sddmm_inputs_f = { {0.001f, 2, 255, 1023, 0.19, 1234ULL}, @@ -419,11 +467,29 @@ const std::vector> sddmm_inputs_h = { {0.0003f, 31, 1025, 1025, 0.19, 1234ULL}, {0.001f, 1024, 1024, 1024, 0.1, 1234ULL}}; -INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, MaskedMatmulTestF, ::testing::ValuesIn(sddmm_inputs_f)); +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitmapTestF, + ::testing::ValuesIn(sddmm_inputs_f)); + +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitmapTestD, + ::testing::ValuesIn(sddmm_inputs_d)); + +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitmapTestH, + ::testing::ValuesIn(sddmm_inputs_h)); + +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitsetTestF, + ::testing::ValuesIn(sddmm_inputs_f)); -INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, MaskedMatmulTestD, ::testing::ValuesIn(sddmm_inputs_d)); +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitsetTestD, + ::testing::ValuesIn(sddmm_inputs_d)); -INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, MaskedMatmulTestH, ::testing::ValuesIn(sddmm_inputs_h)); +INSTANTIATE_TEST_CASE_P(MaskedMatmulTest, + MaskedMatmulOnBitsetTestH, + ::testing::ValuesIn(sddmm_inputs_h)); } // namespace sparse } // namespace raft diff --git a/cpp/test/sparse/mst.cu b/cpp/tests/sparse/mst.cu similarity index 100% rename from cpp/test/sparse/mst.cu rename to cpp/tests/sparse/mst.cu diff --git a/cpp/test/sparse/norm.cu b/cpp/tests/sparse/norm.cu similarity index 100% rename from cpp/test/sparse/norm.cu rename to cpp/tests/sparse/norm.cu diff --git a/cpp/test/sparse/normalize.cu b/cpp/tests/sparse/normalize.cu similarity index 100% rename from cpp/test/sparse/normalize.cu rename to cpp/tests/sparse/normalize.cu diff --git a/cpp/test/sparse/reduce.cu b/cpp/tests/sparse/reduce.cu similarity index 100% rename from cpp/test/sparse/reduce.cu rename to cpp/tests/sparse/reduce.cu diff --git a/cpp/test/sparse/row_op.cu b/cpp/tests/sparse/row_op.cu similarity index 100% rename from cpp/test/sparse/row_op.cu rename to cpp/tests/sparse/row_op.cu diff --git a/cpp/test/sparse/sddmm.cu b/cpp/tests/sparse/sddmm.cu similarity index 100% rename from cpp/test/sparse/sddmm.cu rename to cpp/tests/sparse/sddmm.cu diff --git a/cpp/test/sparse/select_k_csr.cu b/cpp/tests/sparse/select_k_csr.cu similarity index 100% rename from cpp/test/sparse/select_k_csr.cu rename to cpp/tests/sparse/select_k_csr.cu diff --git a/cpp/test/sparse/solver/lanczos.cu b/cpp/tests/sparse/solver/lanczos.cu similarity index 100% rename from cpp/test/sparse/solver/lanczos.cu rename to cpp/tests/sparse/solver/lanczos.cu diff --git a/cpp/test/sparse/sort.cu b/cpp/tests/sparse/sort.cu similarity index 100% rename from cpp/test/sparse/sort.cu rename to cpp/tests/sparse/sort.cu diff --git a/cpp/test/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu similarity index 100% rename from cpp/test/sparse/spectral_matrix.cu rename to cpp/tests/sparse/spectral_matrix.cu diff --git a/cpp/test/sparse/spgemmi.cu b/cpp/tests/sparse/spgemmi.cu similarity index 100% rename from cpp/test/sparse/spgemmi.cu rename to cpp/tests/sparse/spgemmi.cu diff --git a/cpp/test/sparse/spmm.cu b/cpp/tests/sparse/spmm.cu similarity index 100% rename from cpp/test/sparse/spmm.cu rename to cpp/tests/sparse/spmm.cu diff --git a/cpp/test/sparse/symmetrize.cu b/cpp/tests/sparse/symmetrize.cu similarity index 100% rename from cpp/test/sparse/symmetrize.cu rename to cpp/tests/sparse/symmetrize.cu diff --git a/cpp/test/stats/accuracy.cu b/cpp/tests/stats/accuracy.cu similarity index 100% rename from cpp/test/stats/accuracy.cu rename to cpp/tests/stats/accuracy.cu diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/tests/stats/adjusted_rand_index.cu similarity index 100% rename from cpp/test/stats/adjusted_rand_index.cu rename to cpp/tests/stats/adjusted_rand_index.cu diff --git a/cpp/test/stats/completeness_score.cu b/cpp/tests/stats/completeness_score.cu similarity index 100% rename from cpp/test/stats/completeness_score.cu rename to cpp/tests/stats/completeness_score.cu diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/tests/stats/contingencyMatrix.cu similarity index 100% rename from cpp/test/stats/contingencyMatrix.cu rename to cpp/tests/stats/contingencyMatrix.cu diff --git a/cpp/test/stats/cov.cu b/cpp/tests/stats/cov.cu similarity index 99% rename from cpp/test/stats/cov.cu rename to cpp/tests/stats/cov.cu index 602f356b9f..3f2a3dcebf 100644 --- a/cpp/test/stats/cov.cu +++ b/cpp/tests/stats/cov.cu @@ -72,7 +72,7 @@ class CovTest : public ::testing::TestWithParam> { cov_act.resize(cols * cols, stream); normal(handle, r, data.data(), len, params.mean, var); - raft::stats::mean(mean_act.data(), data.data(), cols, rows, false, params.rowMajor, stream); + raft::stats::mean(mean_act.data(), data.data(), cols, rows, params.rowMajor, stream); if (params.rowMajor) { using layout = raft::row_major; cov(handle, @@ -102,7 +102,7 @@ class CovTest : public ::testing::TestWithParam> { raft::update_device(data_cm.data(), data_h, 6, stream); raft::update_device(cov_cm_ref.data(), cov_cm_ref_h, 4, stream); - raft::stats::mean(mean_cm.data(), data_cm.data(), 2, 3, false, false, stream); + raft::stats::mean(mean_cm.data(), data_cm.data(), 2, 3, false, stream); cov(handle, cov_cm.data(), data_cm.data(), mean_cm.data(), 2, 3, true, false, true, stream); } diff --git a/cpp/test/stats/dispersion.cu b/cpp/tests/stats/dispersion.cu similarity index 100% rename from cpp/test/stats/dispersion.cu rename to cpp/tests/stats/dispersion.cu diff --git a/cpp/test/stats/entropy.cu b/cpp/tests/stats/entropy.cu similarity index 100% rename from cpp/test/stats/entropy.cu rename to cpp/tests/stats/entropy.cu diff --git a/cpp/test/stats/histogram.cu b/cpp/tests/stats/histogram.cu similarity index 100% rename from cpp/test/stats/histogram.cu rename to cpp/tests/stats/histogram.cu diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/tests/stats/homogeneity_score.cu similarity index 100% rename from cpp/test/stats/homogeneity_score.cu rename to cpp/tests/stats/homogeneity_score.cu diff --git a/cpp/test/stats/information_criterion.cu b/cpp/tests/stats/information_criterion.cu similarity index 100% rename from cpp/test/stats/information_criterion.cu rename to cpp/tests/stats/information_criterion.cu diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/tests/stats/kl_divergence.cu similarity index 100% rename from cpp/test/stats/kl_divergence.cu rename to cpp/tests/stats/kl_divergence.cu diff --git a/cpp/test/stats/mean.cu b/cpp/tests/stats/mean.cu similarity index 50% rename from cpp/test/stats/mean.cu rename to cpp/tests/stats/mean.cu index c5fe83d95b..e72d4eaf74 100644 --- a/cpp/test/stats/mean.cu +++ b/cpp/tests/stats/mean.cu @@ -33,7 +33,7 @@ template struct MeanInputs { T tolerance, mean; int rows, cols; - bool sample, rowMajor; + bool rowMajor; unsigned long long int seed; T stddev = (T)1.0; }; @@ -42,7 +42,7 @@ template ::std::ostream& operator<<(::std::ostream& os, const MeanInputs& dims) { return os << "{ " << dims.tolerance << ", " << dims.rows << ", " << dims.cols << ", " - << dims.sample << ", " << dims.rowMajor << ", " << dims.stddev << "}" << std::endl; + << ", " << dims.rowMajor << ", " << dims.stddev << "}" << std::endl; } template @@ -74,14 +74,12 @@ class MeanTest : public ::testing::TestWithParam> { using layout = raft::row_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - params.sample); + raft::make_device_vector_view(mean_act.data(), cols)); } else { using layout = raft::col_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - params.sample); + raft::make_device_vector_view(mean_act.data(), cols)); } } @@ -98,72 +96,51 @@ class MeanTest : public ::testing::TestWithParam> { // measured mean (of a normal distribution) will fall outside of an epsilon of // 0.15 only 4/10000 times. (epsilon of 0.1 will fail 30/100 times) const std::vector> inputsf = { - {0.15f, 1.f, 1024, 32, true, false, 1234ULL}, - {0.15f, 1.f, 1024, 64, true, false, 1234ULL}, - {0.15f, 1.f, 1024, 128, true, false, 1234ULL}, - {0.15f, 1.f, 1024, 256, true, false, 1234ULL}, - {0.15f, -1.f, 1024, 32, false, false, 1234ULL}, - {0.15f, -1.f, 1024, 64, false, false, 1234ULL}, - {0.15f, -1.f, 1024, 128, false, false, 1234ULL}, - {0.15f, -1.f, 1024, 256, false, false, 1234ULL}, - {0.15f, 1.f, 1024, 32, true, true, 1234ULL}, - {0.15f, 1.f, 1024, 64, true, true, 1234ULL}, - {0.15f, 1.f, 1024, 128, true, true, 1234ULL}, - {0.15f, 1.f, 1024, 256, true, true, 1234ULL}, - {0.15f, -1.f, 1024, 32, false, true, 1234ULL}, - {0.15f, -1.f, 1024, 64, false, true, 1234ULL}, - {0.15f, -1.f, 1024, 128, false, true, 1234ULL}, - {0.15f, -1.f, 1024, 256, false, true, 1234ULL}, - {0.15f, -1.f, 1030, 1, false, false, 1234ULL}, - {0.15f, -1.f, 1030, 60, true, false, 1234ULL}, - {2.0f, -1.f, 31, 120, false, false, 1234ULL}, - {2.0f, -1.f, 1, 130, false, false, 1234ULL}, - {0.15f, -1.f, 1030, 1, false, true, 1234ULL}, - {0.15f, -1.f, 1030, 60, true, true, 1234ULL}, - {2.0f, -1.f, 31, 120, false, true, 1234ULL}, - {2.0f, -1.f, 1, 130, false, true, 1234ULL}, - {2.0f, -1.f, 1, 1, false, false, 1234ULL}, - {2.0f, -1.f, 1, 1, false, true, 1234ULL}, - {2.0f, -1.f, 7, 23, false, false, 1234ULL}, - {2.0f, -1.f, 7, 23, false, true, 1234ULL}, - {2.0f, -1.f, 17, 5, false, false, 1234ULL}, - {2.0f, -1.f, 17, 5, false, true, 1234ULL}, - {0.0001f, 0.1f, 1 << 27, 2, false, false, 1234ULL, 0.0001f}, - {0.0001f, 0.1f, 1 << 27, 2, false, true, 1234ULL, 0.0001f}}; - -const std::vector> inputsd = { - {0.15, 1.0, 1024, 32, true, false, 1234ULL}, - {0.15, 1.0, 1024, 64, true, false, 1234ULL}, - {0.15, 1.0, 1024, 128, true, false, 1234ULL}, - {0.15, 1.0, 1024, 256, true, false, 1234ULL}, - {0.15, -1.0, 1024, 32, false, false, 1234ULL}, - {0.15, -1.0, 1024, 64, false, false, 1234ULL}, - {0.15, -1.0, 1024, 128, false, false, 1234ULL}, - {0.15, -1.0, 1024, 256, false, false, 1234ULL}, - {0.15, 1.0, 1024, 32, true, true, 1234ULL}, - {0.15, 1.0, 1024, 64, true, true, 1234ULL}, - {0.15, 1.0, 1024, 128, true, true, 1234ULL}, - {0.15, 1.0, 1024, 256, true, true, 1234ULL}, - {0.15, -1.0, 1024, 32, false, true, 1234ULL}, - {0.15, -1.0, 1024, 64, false, true, 1234ULL}, - {0.15, -1.0, 1024, 128, false, true, 1234ULL}, - {0.15, -1.0, 1024, 256, false, true, 1234ULL}, - {0.15, -1.0, 1030, 1, false, false, 1234ULL}, - {0.15, -1.0, 1030, 60, true, false, 1234ULL}, - {2.0, -1.0, 31, 120, false, false, 1234ULL}, - {2.0, -1.0, 1, 130, false, false, 1234ULL}, - {0.15, -1.0, 1030, 1, false, true, 1234ULL}, - {0.15, -1.0, 1030, 60, true, true, 1234ULL}, - {2.0, -1.0, 31, 120, false, true, 1234ULL}, - {2.0, -1.0, 1, 130, false, true, 1234ULL}, - {2.0, -1.0, 1, 1, false, false, 1234ULL}, - {2.0, -1.0, 1, 1, false, true, 1234ULL}, - {2.0, -1.0, 7, 23, false, false, 1234ULL}, - {2.0, -1.0, 7, 23, false, true, 1234ULL}, - {2.0, -1.0, 17, 5, false, false, 1234ULL}, - {2.0, -1.0, 17, 5, false, true, 1234ULL}, - {1e-8, 1e-1, 1 << 27, 2, false, false, 1234ULL, 0.0001}, - {1e-8, 1e-1, 1 << 27, 2, false, true, 1234ULL, 0.0001}}; + {0.15f, -1.f, 1024, 32, false, 1234ULL}, + {0.15f, -1.f, 1024, 64, false, 1234ULL}, + {0.15f, -1.f, 1024, 128, false, 1234ULL}, + {0.15f, -1.f, 1024, 256, false, 1234ULL}, + {0.15f, -1.f, 1024, 32, true, 1234ULL}, + {0.15f, -1.f, 1024, 64, true, 1234ULL}, + {0.15f, -1.f, 1024, 128, true, 1234ULL}, + {0.15f, -1.f, 1024, 256, true, 1234ULL}, + {0.15f, -1.f, 1030, 1, false, 1234ULL}, + {2.0f, -1.f, 31, 120, false, 1234ULL}, + {2.0f, -1.f, 1, 130, false, 1234ULL}, + {0.15f, -1.f, 1030, 1, true, 1234ULL}, + {2.0f, -1.f, 31, 120, true, 1234ULL}, + {2.0f, -1.f, 1, 130, true, 1234ULL}, + {2.0f, -1.f, 1, 1, false, 1234ULL}, + {2.0f, -1.f, 1, 1, true, 1234ULL}, + {2.0f, -1.f, 7, 23, false, 1234ULL}, + {2.0f, -1.f, 7, 23, true, 1234ULL}, + {2.0f, -1.f, 17, 5, false, 1234ULL}, + {2.0f, -1.f, 17, 5, true, 1234ULL}, + {0.0001f, 0.1f, 1 << 27, 2, false, 1234ULL, 0.0001f}, + {0.0001f, 0.1f, 1 << 27, 2, true, 1234ULL, 0.0001f}}; + +const std::vector> inputsd = {{0.15, -1.0, 1024, 32, false, 1234ULL}, + {0.15, -1.0, 1024, 64, false, 1234ULL}, + {0.15, -1.0, 1024, 128, false, 1234ULL}, + {0.15, -1.0, 1024, 256, false, 1234ULL}, + {0.15, -1.0, 1024, 32, true, 1234ULL}, + {0.15, -1.0, 1024, 64, true, 1234ULL}, + {0.15, -1.0, 1024, 128, true, 1234ULL}, + {0.15, -1.0, 1024, 256, true, 1234ULL}, + {0.15, -1.0, 1030, 1, false, 1234ULL}, + {2.0, -1.0, 31, 120, false, 1234ULL}, + {2.0, -1.0, 1, 130, false, 1234ULL}, + {0.15, -1.0, 1030, 1, true, 1234ULL}, + {2.0, -1.0, 31, 120, true, 1234ULL}, + {2.0, -1.0, 1, 130, true, 1234ULL}, + {2.0, -1.0, 1, 1, false, 1234ULL}, + {2.0, -1.0, 1, 1, true, 1234ULL}, + {2.0, -1.0, 7, 23, false, 1234ULL}, + {2.0, -1.0, 7, 23, true, 1234ULL}, + {2.0, -1.0, 17, 5, false, 1234ULL}, + {2.0, -1.0, 17, 5, true, 1234ULL}, + {1e-8, 1e-1, 1 << 27, 2, false, 1234ULL, 0.0001}, + {1e-8, 1e-1, 1 << 27, 2, true, 1234ULL, 0.0001}}; typedef MeanTest MeanTestF; TEST_P(MeanTestF, Result) diff --git a/cpp/tests/stats/mean_center.cu b/cpp/tests/stats/mean_center.cu new file mode 100644 index 0000000000..48bf50056c --- /dev/null +++ b/cpp/tests/stats/mean_center.cu @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../linalg/matrix_vector_op.cuh" +#include "../test_utils.cuh" + +#include +#include +#include +#include +#include + +#include + +namespace raft { +namespace stats { + +template +struct MeanCenterInputs { + T tolerance, mean; + IdxType rows, cols; + bool rowMajor, bcastAlongRows; + unsigned long long int seed; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const MeanCenterInputs& dims) +{ + return os; +} + +template +class MeanCenterTest : public ::testing::TestWithParam> { + public: + MeanCenterTest() + : params(::testing::TestWithParam>::GetParam()), + stream(resource::get_cuda_stream(handle)), + rows(params.rows), + cols(params.cols), + out(rows * cols, stream), + out_ref(rows * cols, stream), + data(rows * cols, stream), + meanVec(params.bcastAlongRows ? cols : rows, stream) + { + } + + protected: + void SetUp() override + { + raft::random::RngState r(params.seed); + auto len = rows * cols; + auto meanVecSize = params.bcastAlongRows ? cols : rows; + normal(handle, r, data.data(), len, params.mean, (T)1.0); + raft::stats::mean(meanVec.data(), data.data(), cols, rows, params.rowMajor, stream); + if (params.rowMajor) { + using layout = raft::row_major; + mean_center(handle, + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(out.data(), rows, cols), + params.bcastAlongRows); + } else { + using layout = raft::col_major; + mean_center(handle, + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(out.data(), rows, cols), + params.bcastAlongRows); + } + raft::linalg::naiveMatVec(out_ref.data(), + data.data(), + meanVec.data(), + cols, + rows, + params.rowMajor, + params.bcastAlongRows, + (T)-1.0, + stream); + resource::sync_stream(handle, stream); + } + + protected: + raft::resources handle; + cudaStream_t stream; + + MeanCenterInputs params; + int rows, cols; + rmm::device_uvector data, meanVec, out, out_ref; +}; + +const std::vector> inputsf_i32 = { + {0.05f, -1.f, 1024, 32, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 64, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 128, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 32, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 64, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 128, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 32, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 64, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 128, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 32, true, false, 1234ULL}, + {0.05f, -1.f, 1024, 64, true, false, 1234ULL}, + {0.05f, -1.f, 1024, 128, true, false, 1234ULL}}; +typedef MeanCenterTest MeanCenterTestF_i32; +TEST_P(MeanCenterTestF_i32, Result) +{ + ASSERT_TRUE(devArrMatch( + out.data(), out_ref.data(), params.cols, raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestF_i32, ::testing::ValuesIn(inputsf_i32)); + +const std::vector> inputsf_i64 = { + {0.05f, -1.f, 1024, 32, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 64, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 128, false, true, 1234ULL}, + {0.05f, -1.f, 1024, 32, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 64, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 128, true, true, 1234ULL}, + {0.05f, -1.f, 1024, 32, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 64, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 128, false, false, 1234ULL}, + {0.05f, -1.f, 1024, 32, true, false, 1234ULL}, + {0.05f, -1.f, 1024, 64, true, false, 1234ULL}, + {0.05f, -1.f, 1024, 128, true, false, 1234ULL}}; +typedef MeanCenterTest MeanCenterTestF_i64; +TEST_P(MeanCenterTestF_i64, Result) +{ + ASSERT_TRUE(devArrMatch( + out.data(), out_ref.data(), params.cols, raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestF_i64, ::testing::ValuesIn(inputsf_i64)); + +const std::vector> inputsd_i32 = { + {0.05, -1.0, 1024, 32, false, true, 1234ULL}, + {0.05, -1.0, 1024, 64, false, true, 1234ULL}, + {0.05, -1.0, 1024, 128, false, true, 1234ULL}, + {0.05, -1.0, 1024, 32, true, true, 1234ULL}, + {0.05, -1.0, 1024, 64, true, true, 1234ULL}, + {0.05, -1.0, 1024, 128, true, true, 1234ULL}, + {0.05, -1.0, 1024, 32, false, false, 1234ULL}, + {0.05, -1.0, 1024, 64, false, false, 1234ULL}, + {0.05, -1.0, 1024, 128, false, false, 1234ULL}, + {0.05, -1.0, 1024, 32, true, false, 1234ULL}, + {0.05, -1.0, 1024, 64, true, false, 1234ULL}, + {0.05, -1.0, 1024, 128, true, false, 1234ULL}}; +typedef MeanCenterTest MeanCenterTestD_i32; +TEST_P(MeanCenterTestD_i32, Result) +{ + ASSERT_TRUE(devArrMatch( + out.data(), out_ref.data(), params.cols, raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestD_i32, ::testing::ValuesIn(inputsd_i32)); + +const std::vector> inputsd_i64 = { + {0.05, -1.0, 1024, 32, false, true, 1234ULL}, + {0.05, -1.0, 1024, 64, false, true, 1234ULL}, + {0.05, -1.0, 1024, 128, false, true, 1234ULL}, + {0.05, -1.0, 1024, 32, true, true, 1234ULL}, + {0.05, -1.0, 1024, 64, true, true, 1234ULL}, + {0.05, -1.0, 1024, 128, true, true, 1234ULL}, + {0.05, -1.0, 1024, 32, false, false, 1234ULL}, + {0.05, -1.0, 1024, 64, false, false, 1234ULL}, + {0.05, -1.0, 1024, 128, false, false, 1234ULL}, + {0.05, -1.0, 1024, 32, true, false, 1234ULL}, + {0.05, -1.0, 1024, 64, true, false, 1234ULL}, + {0.05, -1.0, 1024, 128, true, false, 1234ULL}}; +typedef MeanCenterTest MeanCenterTestD_i64; +TEST_P(MeanCenterTestD_i64, Result) +{ + ASSERT_TRUE(devArrMatch( + out.data(), out_ref.data(), params.cols, raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_SUITE_P(MeanCenterTests, MeanCenterTestD_i64, ::testing::ValuesIn(inputsd_i64)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/meanvar.cu b/cpp/tests/stats/meanvar.cu similarity index 100% rename from cpp/test/stats/meanvar.cu rename to cpp/tests/stats/meanvar.cu diff --git a/cpp/test/stats/minmax.cu b/cpp/tests/stats/minmax.cu similarity index 100% rename from cpp/test/stats/minmax.cu rename to cpp/tests/stats/minmax.cu diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/tests/stats/mutual_info_score.cu similarity index 100% rename from cpp/test/stats/mutual_info_score.cu rename to cpp/tests/stats/mutual_info_score.cu diff --git a/cpp/test/stats/r2_score.cu b/cpp/tests/stats/r2_score.cu similarity index 100% rename from cpp/test/stats/r2_score.cu rename to cpp/tests/stats/r2_score.cu diff --git a/cpp/test/stats/rand_index.cu b/cpp/tests/stats/rand_index.cu similarity index 100% rename from cpp/test/stats/rand_index.cu rename to cpp/tests/stats/rand_index.cu diff --git a/cpp/test/stats/regression_metrics.cu b/cpp/tests/stats/regression_metrics.cu similarity index 100% rename from cpp/test/stats/regression_metrics.cu rename to cpp/tests/stats/regression_metrics.cu diff --git a/cpp/test/stats/stddev.cu b/cpp/tests/stats/stddev.cu similarity index 99% rename from cpp/test/stats/stddev.cu rename to cpp/tests/stats/stddev.cu index f4c5f92f49..a9a70b1e60 100644 --- a/cpp/test/stats/stddev.cu +++ b/cpp/tests/stats/stddev.cu @@ -81,8 +81,7 @@ class StdDevTest : public ::testing::TestWithParam> { using layout_t = raft::row_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - false); + raft::make_device_vector_view(mean_act.data(), cols)); stddev(handle, raft::make_device_matrix_view(data, rows, cols), @@ -99,8 +98,7 @@ class StdDevTest : public ::testing::TestWithParam> { using layout_t = raft::col_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - false); + raft::make_device_vector_view(mean_act.data(), cols)); stddev(handle, raft::make_device_matrix_view(data, rows, cols), diff --git a/cpp/test/stats/sum.cu b/cpp/tests/stats/sum.cu similarity index 100% rename from cpp/test/stats/sum.cu rename to cpp/tests/stats/sum.cu diff --git a/cpp/test/stats/v_measure.cu b/cpp/tests/stats/v_measure.cu similarity index 100% rename from cpp/test/stats/v_measure.cu rename to cpp/tests/stats/v_measure.cu diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/tests/stats/weighted_mean.cu similarity index 99% rename from cpp/test/stats/weighted_mean.cu rename to cpp/tests/stats/weighted_mean.cu index 407f3f14ea..e125fbc71e 100644 --- a/cpp/test/stats/weighted_mean.cu +++ b/cpp/tests/stats/weighted_mean.cu @@ -340,4 +340,4 @@ TEST_P(WeightedMeanTestD, Result) INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestD, ::testing::ValuesIn(inputsd)); }; // end namespace stats -}; // end namespace raft \ No newline at end of file +}; // end namespace raft diff --git a/cpp/test/test.cpp b/cpp/tests/test.cpp similarity index 100% rename from cpp/test/test.cpp rename to cpp/tests/test.cpp diff --git a/cpp/test/test_utils.cuh b/cpp/tests/test_utils.cuh similarity index 99% rename from cpp/test/test_utils.cuh rename to cpp/tests/test_utils.cuh index 810a0d7985..ac4ed4d24e 100644 --- a/cpp/test/test_utils.cuh +++ b/cpp/tests/test_utils.cuh @@ -330,4 +330,4 @@ inline std::vector read_csv(std::string filename, bool skip_first_n_colum return result; } -}; // end namespace raft \ No newline at end of file +}; // end namespace raft diff --git a/cpp/test/test_utils.h b/cpp/tests/test_utils.h similarity index 100% rename from cpp/test/test_utils.h rename to cpp/tests/test_utils.h diff --git a/cpp/test/util/bitonic_sort.cu b/cpp/tests/util/bitonic_sort.cu similarity index 100% rename from cpp/test/util/bitonic_sort.cu rename to cpp/tests/util/bitonic_sort.cu diff --git a/cpp/test/util/cudart_utils.cpp b/cpp/tests/util/cudart_utils.cpp similarity index 100% rename from cpp/test/util/cudart_utils.cpp rename to cpp/tests/util/cudart_utils.cpp diff --git a/cpp/test/util/device_atomics.cu b/cpp/tests/util/device_atomics.cu similarity index 100% rename from cpp/test/util/device_atomics.cu rename to cpp/tests/util/device_atomics.cu diff --git a/cpp/test/util/integer_utils.cpp b/cpp/tests/util/integer_utils.cpp similarity index 100% rename from cpp/test/util/integer_utils.cpp rename to cpp/tests/util/integer_utils.cpp diff --git a/cpp/test/util/integer_utils.cu b/cpp/tests/util/integer_utils.cu similarity index 100% rename from cpp/test/util/integer_utils.cu rename to cpp/tests/util/integer_utils.cu diff --git a/cpp/test/util/memory_type_dispatcher.cu b/cpp/tests/util/memory_type_dispatcher.cu similarity index 100% rename from cpp/test/util/memory_type_dispatcher.cu rename to cpp/tests/util/memory_type_dispatcher.cu diff --git a/cpp/test/util/popc.cu b/cpp/tests/util/popc.cu similarity index 100% rename from cpp/test/util/popc.cu rename to cpp/tests/util/popc.cu diff --git a/cpp/test/util/pow2_utils.cu b/cpp/tests/util/pow2_utils.cu similarity index 100% rename from cpp/test/util/pow2_utils.cu rename to cpp/tests/util/pow2_utils.cu diff --git a/cpp/test/util/reduction.cu b/cpp/tests/util/reduction.cu similarity index 100% rename from cpp/test/util/reduction.cu rename to cpp/tests/util/reduction.cu diff --git a/dependencies.yaml b/dependencies.yaml index 1772c5d539..c9befcb53a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,25 +3,26 @@ files: all: output: conda matrix: - cuda: ["11.8", "12.5"] + cuda: ["11.8", "12.8"] arch: [x86_64, aarch64] includes: - - rapids_build - - build_pylibraft + - build_common + - build_cython + - checks - cuda - cuda_version + - depends_on_cuda_python - depends_on_cupy - depends_on_distributed_ucxx + - depends_on_rmm - develop - - checks - - test_libraft - docs - - rapids_build_setuptools - rapids_build_skbuild - - run_raft_dask - run_pylibraft - - test_python_common + - run_raft_dask + - test_libraft - test_pylibraft + - test_python_common test_cpp: output: none includes: @@ -31,10 +32,10 @@ files: output: none includes: - cuda_version + - depends_on_cupy - py_version - - test_python_common - test_pylibraft - - depends_on_cupy + - test_python_common checks: output: none includes: @@ -48,6 +49,29 @@ files: - docs - py_version - test_pylibraft + py_build_libraft: + output: pyproject + pyproject_dir: python/libraft + extras: + table: build-system + includes: + - rapids_build_skbuild + py_rapids_build_libraft: + output: pyproject + pyproject_dir: python/libraft + extras: + table: tool.rapids-build-backend + key: requires + includes: + - build_common + - depends_on_librmm + py_run_libraft: + output: pyproject + pyproject_dir: python/libraft + extras: + table: project + includes: + - cuda_wheels py_build_pylibraft: output: pyproject pyproject_dir: python/pylibraft @@ -62,15 +86,21 @@ files: table: tool.rapids-build-backend key: requires includes: - - rapids_build - - build_pylibraft + - build_common + - build_cython + - depends_on_libraft + - depends_on_librmm + - depends_on_cuda_python + - depends_on_rmm py_run_pylibraft: output: pyproject pyproject_dir: python/pylibraft extras: table: project includes: - - cuda_wheels + - depends_on_libraft + - depends_on_cuda_python + - depends_on_rmm - run_pylibraft py_test_pylibraft: output: pyproject @@ -79,9 +109,9 @@ files: table: project.optional-dependencies key: test includes: - - test_python_common - - test_pylibraft - depends_on_cupy + - test_pylibraft + - test_python_common py_build_raft_dask: output: pyproject pyproject_dir: python/raft-dask @@ -96,7 +126,10 @@ files: table: tool.rapids-build-backend key: requires includes: - - rapids_build + - build_common + - build_cython + - depends_on_libraft + - depends_on_librmm - depends_on_ucx_build py_run_raft_dask: output: pyproject @@ -104,8 +137,9 @@ files: extras: table: project includes: - - run_raft_dask - depends_on_distributed_ucxx + - depends_on_libraft + - run_raft_dask py_test_raft_dask: output: pyproject pyproject_dir: python/raft-dask @@ -125,39 +159,53 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - &rapids_build_backend rapids-build-backend>=0.3.0,<0.4.0.dev0 + - rapids-build-backend>=0.3.0,<0.4.0.dev0 - output_types: [conda] packages: - scikit-build-core>=0.10.0 - output_types: [requirements, pyproject] packages: - scikit-build-core[pyproject]>=0.10.0 - rapids_build: + build_common: common: - output_types: [conda, requirements, pyproject] packages: - &cmake_ver cmake>=3.26.4,!=3.30.0 - - cython>=3.0.0,<3.1.0a0 - ninja - output_types: [conda] packages: - c-compiler - cxx-compiler + - libucxx==0.42.*,>=0.0.0a0 - nccl>=2.19 - - libucxx==0.41.*,>=0.0.0a0 + - spdlog>=1.14.1,<1.15 specific: - output_types: conda matrices: - matrix: arch: x86_64 + cuda: "11.8" packages: - gcc_linux-64=11.* - - sysroot_linux-64==2.17 + - sysroot_linux-64==2.28 - matrix: arch: aarch64 + cuda: "11.8" packages: - gcc_linux-aarch64=11.* - - sysroot_linux-aarch64==2.17 + - sysroot_linux-aarch64==2.28 + - matrix: + arch: x86_64 + cuda: "12.*" + packages: + - gcc_linux-64=13.* + - sysroot_linux-64==2.28 + - matrix: + arch: aarch64 + cuda: "12.*" + packages: + - gcc_linux-aarch64=13.* + - sysroot_linux-aarch64==2.28 - output_types: conda matrices: - matrix: {cuda: "12.*"} @@ -178,45 +226,11 @@ dependencies: packages: [nvcc_linux-64=11.2] - matrix: {cuda: "11.2", arch: aarch64} packages: [nvcc_linux-aarch64=11.2] - - build_pylibraft: + build_cython: common: - - output_types: [conda] - packages: - - &rmm_unsuffixed rmm==24.12.*,>=0.0.0a0 - - output_types: requirements - packages: - # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for rmm-cu{11,12}. - - --extra-index-url=https://pypi.nvidia.com - - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple - specific: - output_types: [conda, requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - packages: - - &cuda_python12 cuda-python>=12.0,<13.0a0,<=12.6.0 - - matrix: - cuda: "11.*" - packages: - - &cuda_python11 cuda-python>=11.7.1,<12.0a0,<=11.8.3 - - matrix: - packages: - - &cuda_python cuda-python - - output_types: [requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - cuda_suffixed: "true" - packages: - - &rmm_cu12 rmm-cu12==24.12.*,>=0.0.0a0 - - matrix: - cuda: "11.*" - cuda_suffixed: "true" - packages: - - &rmm_cu11 rmm-cu11==24.12.*,>=0.0.0a0 - - {matrix: null, packages: [*rmm_unsuffixed] } + packages: + - cython>=3.0.0,<3.1.0a0 checks: common: - output_types: [conda, requirements] @@ -260,6 +274,10 @@ dependencies: cuda: "12.5" packages: - cuda-version=12.5 + - matrix: + cuda: "12.8" + packages: + - cuda-version=12.8 cuda: specific: - output_types: conda @@ -344,11 +362,14 @@ dependencies: - nvidia-curand-cu12 - nvidia-cusolver-cu12 - nvidia-cusparse-cu12 - # CUDA 11 does not provide wheels, so use the system libraries instead - matrix: cuda: "11.*" use_cuda_wheels: "true" packages: + - nvidia-cublas-cu11 + - nvidia-curand-cu11 + - nvidia-cusolver-cu11 + - nvidia-cusparse-cu11 # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels # (e.g. for DLFW and pip devcontainers) - matrix: @@ -397,13 +418,6 @@ dependencies: - recommonmark - sphinx-copybutton - sphinx-markdown-tables - rapids_build_setuptools: - common: - - output_types: [requirements, pyproject] - packages: - - wheel - - setuptools - - *rapids_build_backend py_version: specific: - output_types: conda @@ -428,58 +442,95 @@ dependencies: - output_types: [conda, pyproject] packages: - numpy>=1.23,<3.0a0 - - output_types: [conda] + run_raft_dask: + common: + - output_types: [conda, pyproject] + packages: + - dask-cuda==25.2.*,>=0.0.0a0 + - rapids-dask-dependency==25.2.*,>=0.0.0a0 + - output_types: conda packages: - - *rmm_unsuffixed + - &pylibraft_unsuffixed pylibraft==25.2.*,>=0.0.0a0 + - &ucx_py_unsuffixed ucx-py==0.42.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for cudf and rmm. - --extra-index-url=https://pypi.nvidia.com - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - &pylibraft_cu12 pylibraft-cu12==25.2.*,>=0.0.0a0 + - &ucx_py_cu12 ucx-py-cu12==0.42.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - &pylibraft_cu11 pylibraft-cu11==25.2.*,>=0.0.0a0 + - &ucx_py_cu11 ucx-py-cu11==0.42.*,>=0.0.0a0 + - {matrix: null, packages: [*pylibraft_unsuffixed, *ucx_py_unsuffixed]} + test_python_common: + common: + - output_types: [conda, requirements, pyproject] + packages: + - pytest==7.* + - pytest-cov + test_pylibraft: + common: + - output_types: [conda, requirements, pyproject] + packages: + - scikit-learn + - scipy + depends_on_cuda_python: specific: - output_types: [conda, requirements, pyproject] matrices: - matrix: cuda: "12.*" packages: - - *cuda_python12 + - cuda-python>=12.6.2,<13.0a0 - matrix: cuda: "11.*" packages: - - *cuda_python11 + - cuda-python>=11.8.5,<12.0a0 - matrix: packages: - - *cuda_python + - cuda-python + depends_on_distributed_ucxx: + common: + - output_types: conda + packages: + # UCXX is not currently a hard-dependency thus only installed during tests, + # this will change in the future. + - &distributed_ucxx_unsuffixed distributed-ucxx==0.42.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: - output_types: [requirements, pyproject] matrices: - matrix: cuda: "12.*" cuda_suffixed: "true" packages: - - *rmm_cu12 + - distributed-ucxx-cu12==0.42.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - *rmm_cu11 - - {matrix: null, packages: [*rmm_unsuffixed]} - run_raft_dask: + - distributed-ucxx-cu11==0.42.*,>=0.0.0a0 + - {matrix: null, packages: [*distributed_ucxx_unsuffixed]} + depends_on_libraft: common: - - output_types: [conda, pyproject] - packages: - - dask-cuda==24.12.*,>=0.0.0a0 - - joblib>=0.11 - - numba>=0.57 - - rapids-dask-dependency==24.12.*,>=0.0.0a0 - - output_types: conda - packages: - - &pylibraft_unsuffixed pylibraft==24.12.*,>=0.0.0a0 - - &ucx_py_unsuffixed ucx-py==0.41.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for cudf and rmm. - --extra-index-url=https://pypi.nvidia.com - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: @@ -489,34 +540,46 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - &pylibraft_cu12 pylibraft-cu12==24.12.*,>=0.0.0a0 - - &ucx_py_cu12 ucx-py-cu12==0.41.*,>=0.0.0a0 + - libraft-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - &pylibraft_cu11 pylibraft-cu11==24.12.*,>=0.0.0a0 - - &ucx_py_cu11 ucx-py-cu11==0.41.*,>=0.0.0a0 - - {matrix: null, packages: [*pylibraft_unsuffixed, *ucx_py_unsuffixed]} - test_python_common: + - libraft-cu11==25.2.*,>=0.0.0a0 + - matrix: + packages: + - libraft==25.2.*,>=0.0.0a0 + depends_on_librmm: common: - - output_types: [conda, requirements, pyproject] + - output_types: conda packages: - - pytest==7.* - - pytest-cov - test_pylibraft: - common: - - output_types: [conda, requirements, pyproject] + - &librmm_unsuffixed librmm==25.2.*,>=0.0.0a0 + - output_types: requirements packages: - - scikit-learn - - scipy - depends_on_distributed_ucxx: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - librmm-cu12==25.2.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - librmm-cu11==25.2.*,>=0.0.0a0 + - matrix: + packages: + - *librmm_unsuffixed + depends_on_rmm: common: - output_types: conda packages: - # UCXX is not currently a hard-dependency thus only installed during tests, - # this will change in the future. - - &distributed_ucxx_unsuffixed distributed-ucxx==0.41.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -529,13 +592,15 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - distributed-ucxx-cu12==0.41.*,>=0.0.0a0 + - rmm-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - distributed-ucxx-cu11==0.41.*,>=0.0.0a0 - - {matrix: null, packages: [*distributed_ucxx_unsuffixed]} + - rmm-cu11==25.2.*,>=0.0.0a0 + - matrix: + packages: + - *rmm_unsuffixed depends_on_ucx_build: common: - output_types: conda diff --git a/docs/README.md b/docs/README.md index a09ccf41eb..aa5e114347 100644 --- a/docs/README.md +++ b/docs/README.md @@ -11,4 +11,4 @@ bash build.sh docs #### Once the process finishes, documentation can be found in build/html ```shell script xdg-open build/html/index.html` -``` \ No newline at end of file +``` diff --git a/docs/source/_static/references.css b/docs/source/_static/references.css index 225cf13ba9..d1f647233a 100644 --- a/docs/source/_static/references.css +++ b/docs/source/_static/references.css @@ -20,4 +20,4 @@ dl.citation > dt.label > span::before { /* Add closing bracket */ dl.citation > dt.label > span::after { content: "]"; -} \ No newline at end of file +} diff --git a/docs/source/build.md b/docs/source/build.md index 5a0dbf7e11..237c54ce6b 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -42,7 +42,7 @@ mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-vers ```bash # for CUDA 12.0 -mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-version=12.0 +mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft cuda-version=12.8 ``` Note that the above commands will also install `libraft-headers` and `libraft`. @@ -50,7 +50,7 @@ Note that the above commands will also install `libraft-headers` and `libraft`. You can also install the conda packages individually using the `mamba` command above. For example, if you'd like to install RAFT's headers to use in your project: ```bash # for CUDA 12.0 -mamba install -c rapidsai -c conda-forge -c nvidia libraft-headers cuda-version=12.0 +mamba install -c rapidsai -c conda-forge -c nvidia libraft-headers cuda-version=12.8 ``` ## Installing Python through Pip @@ -99,7 +99,7 @@ In addition to the libraries included with cudatoolkit 11.8+, there are some oth Conda environment scripts are provided for installing the necessary dependencies to build both the C++ and Python libraries from source. It is preferred to use `mamba`, as it provides significant speedup over `conda`: ```bash -mamba env create --name rapids_raft -f conda/environments/all_cuda-125_arch-x86_64.yaml +mamba env create --name rapids_raft -f conda/environments/all_cuda-128_arch-x86_64.yaml mamba activate rapids_raft ``` diff --git a/docs/source/conf.py b/docs/source/conf.py index 7a287b689f..e5e6e0871a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -208,7 +208,7 @@ def setup(app): linkcode_resolve = make_linkcode_resolve( "pylibraft", "https://github.com/rapidsai/raft" - "raft/blob/{revision}/python/pylibraft" + "/blob/{revision}/python/pylibraft/" "{package}/{path}#L{lineno}", ) diff --git a/docs/source/contributing.md b/docs/source/contributing.md index 1b4071d0a5..446e7b2a7b 100755 --- a/docs/source/contributing.md +++ b/docs/source/contributing.md @@ -89,5 +89,3 @@ implementation of the issue, ask them in the issue instead of the PR. ## Attribution Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md - - diff --git a/docs/source/cpp_api.rst b/docs/source/cpp_api.rst index 74f706bf46..837cfa0cb0 100644 --- a/docs/source/cpp_api.rst +++ b/docs/source/cpp_api.rst @@ -16,4 +16,4 @@ C++ API cpp_api/solver.rst cpp_api/sparse.rst cpp_api/stats.rst - cpp_api/utils.rst \ No newline at end of file + cpp_api/utils.rst diff --git a/docs/source/cpp_api/core.rst b/docs/source/cpp_api/core.rst index 4122a18506..f159c85af8 100644 --- a/docs/source/cpp_api/core.rst +++ b/docs/source/cpp_api/core.rst @@ -22,4 +22,4 @@ expose in public APIs. core_operators.rst core_math.rst core_bitset.rst - core_bitmap.rst \ No newline at end of file + core_bitmap.rst diff --git a/docs/source/cpp_api/core_bitmap.rst b/docs/source/cpp_api/core_bitmap.rst index 6c1dc607bf..532da58e71 100644 --- a/docs/source/cpp_api/core_bitmap.rst +++ b/docs/source/cpp_api/core_bitmap.rst @@ -12,4 +12,4 @@ namespace *raft::core* .. doxygengroup:: bitmap :project: RAFT :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/docs/source/cpp_api/core_bitset.rst b/docs/source/cpp_api/core_bitset.rst index af1cff6d37..117efc5466 100644 --- a/docs/source/cpp_api/core_bitset.rst +++ b/docs/source/cpp_api/core_bitset.rst @@ -12,4 +12,4 @@ namespace *raft::core* .. doxygengroup:: bitset :project: RAFT :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/docs/source/cpp_api/core_kvp.rst b/docs/source/cpp_api/core_kvp.rst index 60a0da078b..5f0cfd800a 100644 --- a/docs/source/cpp_api/core_kvp.rst +++ b/docs/source/cpp_api/core_kvp.rst @@ -12,4 +12,3 @@ namespace *raft::core* .. doxygenstruct:: raft::KeyValuePair :project: RAFT :members: - diff --git a/docs/source/cpp_api/core_logger.rst b/docs/source/cpp_api/core_logger.rst index 60714a63ea..569f17fac3 100644 --- a/docs/source/cpp_api/core_logger.rst +++ b/docs/source/cpp_api/core_logger.rst @@ -12,4 +12,3 @@ namespace *raft::core* .. doxygenclass:: raft::logger :project: RAFT :members: - diff --git a/docs/source/cpp_api/core_nvtx.rst b/docs/source/cpp_api/core_nvtx.rst index addcbdda30..051c66da0c 100644 --- a/docs/source/cpp_api/core_nvtx.rst +++ b/docs/source/cpp_api/core_nvtx.rst @@ -13,5 +13,3 @@ namespace *raft::core* :project: RAFT :members: :content-only: - - diff --git a/docs/source/cpp_api/linalg.rst b/docs/source/cpp_api/linalg.rst index 3cd928c9db..b9da44e431 100644 --- a/docs/source/cpp_api/linalg.rst +++ b/docs/source/cpp_api/linalg.rst @@ -4,7 +4,7 @@ Linear Algebra This page provides C++ class references for the publicly-exposed elements of the `raft/linalg` (dense) linear algebra headers. In addition to providing highly optimized arithmetic and matrix/vector operations, RAFT provides a consistent user experience by providing common BLAS routines, standard linear system solvers, factorization and eigenvalue solvers. Some of these routines -hide the complexities of lower-level C-based libraries provided in the CUDA toolkit +hide the complexities of lower-level C-based libraries provided in the CUDA toolkit .. role:: py(code) :language: c++ @@ -19,4 +19,4 @@ hide the complexities of lower-level C-based libraries provided in the CUDA tool linalg_map_reduce.rst linalg_matrix.rst linalg_matrix_vector.rst - linalg_solver.rst \ No newline at end of file + linalg_solver.rst diff --git a/docs/source/cpp_api/linalg_arithmetic.rst b/docs/source/cpp_api/linalg_arithmetic.rst index 7bc428b9f0..badb9f31a5 100644 --- a/docs/source/cpp_api/linalg_arithmetic.rst +++ b/docs/source/cpp_api/linalg_arithmetic.rst @@ -114,4 +114,3 @@ namespace *raft::linalg* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/linalg_matrix.rst b/docs/source/cpp_api/linalg_matrix.rst index e6024bcd02..30eef5f64f 100644 --- a/docs/source/cpp_api/linalg_matrix.rst +++ b/docs/source/cpp_api/linalg_matrix.rst @@ -16,4 +16,3 @@ namespace *raft::linalg* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/linalg_matrix_vector.rst b/docs/source/cpp_api/linalg_matrix_vector.rst index d92a3c9874..cc22327c74 100644 --- a/docs/source/cpp_api/linalg_matrix_vector.rst +++ b/docs/source/cpp_api/linalg_matrix_vector.rst @@ -29,4 +29,3 @@ namespace *raft::linalg* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/matrix_manipulation.rst b/docs/source/cpp_api/matrix_manipulation.rst index d0da51e4b7..5437ced99f 100644 --- a/docs/source/cpp_api/matrix_manipulation.rst +++ b/docs/source/cpp_api/matrix_manipulation.rst @@ -41,4 +41,3 @@ namespace *raft::matrix* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/matrix_reduction.rst b/docs/source/cpp_api/matrix_reduction.rst index 440a1528b4..92dcea6428 100644 --- a/docs/source/cpp_api/matrix_reduction.rst +++ b/docs/source/cpp_api/matrix_reduction.rst @@ -16,4 +16,4 @@ namespace *raft::matrix* .. doxygengroup:: matrix_norm :project: RAFT :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/docs/source/cpp_api/mdspan_representation.rst b/docs/source/cpp_api/mdspan_representation.rst index 386e6f14e9..939f1d51be 100644 --- a/docs/source/cpp_api/mdspan_representation.rst +++ b/docs/source/cpp_api/mdspan_representation.rst @@ -66,5 +66,3 @@ Accessors .. doxygentypedef:: raft::managed_accessor :project: RAFT - - diff --git a/docs/source/cpp_api/mdspan_span.rst b/docs/source/cpp_api/mdspan_span.rst index 870c4329d0..1b7d749810 100644 --- a/docs/source/cpp_api/mdspan_span.rst +++ b/docs/source/cpp_api/mdspan_span.rst @@ -25,4 +25,3 @@ span: One-dimensional Non-owning View :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/mnmg.rst b/docs/source/cpp_api/mnmg.rst index 9543cbb4ee..1f9f75dd46 100644 --- a/docs/source/cpp_api/mnmg.rst +++ b/docs/source/cpp_api/mnmg.rst @@ -47,4 +47,3 @@ NCCL+UCX Comms :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/random.rst b/docs/source/cpp_api/random.rst index 9f5cdc7a74..8eaa82c0b0 100644 --- a/docs/source/cpp_api/random.rst +++ b/docs/source/cpp_api/random.rst @@ -26,4 +26,3 @@ namespace *raft::random* random_sampling_univariate.rst random_sampling_multivariable.rst random_sampling_without_replacement.rst - diff --git a/docs/source/cpp_api/random_datagen.rst b/docs/source/cpp_api/random_datagen.rst index a07f5e0154..e97283598e 100644 --- a/docs/source/cpp_api/random_datagen.rst +++ b/docs/source/cpp_api/random_datagen.rst @@ -43,4 +43,3 @@ namespace *raft::random* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/random_sampling_without_replacement.rst b/docs/source/cpp_api/random_sampling_without_replacement.rst index ac0d3bea86..af5281a48b 100644 --- a/docs/source/cpp_api/random_sampling_without_replacement.rst +++ b/docs/source/cpp_api/random_sampling_without_replacement.rst @@ -22,5 +22,3 @@ namespace *raft::random* :project: RAFT :members: :content-only: - - diff --git a/docs/source/cpp_api/sparse.rst b/docs/source/cpp_api/sparse.rst index 64197accaf..ee170b3721 100644 --- a/docs/source/cpp_api/sparse.rst +++ b/docs/source/cpp_api/sparse.rst @@ -16,4 +16,3 @@ Core to RAFT's computational patterns for sparse data is its vocabulary of spars sparse_linalg.rst sparse_matrix.rst sparse_solver.rst - diff --git a/docs/source/cpp_api/sparse_types_coo_matrix.rst b/docs/source/cpp_api/sparse_types_coo_matrix.rst index 855d89fdea..c1d8748a64 100644 --- a/docs/source/cpp_api/sparse_types_coo_matrix.rst +++ b/docs/source/cpp_api/sparse_types_coo_matrix.rst @@ -36,4 +36,3 @@ Host COO Matrix :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/sparse_types_csr_matrix.rst b/docs/source/cpp_api/sparse_types_csr_matrix.rst index b704846c4e..22898a6399 100644 --- a/docs/source/cpp_api/sparse_types_csr_matrix.rst +++ b/docs/source/cpp_api/sparse_types_csr_matrix.rst @@ -36,4 +36,3 @@ Host CSR Matrix :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/stats_classification.rst b/docs/source/cpp_api/stats_classification.rst index 929d2808f3..bc472c831d 100644 --- a/docs/source/cpp_api/stats_classification.rst +++ b/docs/source/cpp_api/stats_classification.rst @@ -17,4 +17,3 @@ namespace *raft::stats* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/stats_probability.rst b/docs/source/cpp_api/stats_probability.rst index 457879d87c..a77a0d9132 100644 --- a/docs/source/cpp_api/stats_probability.rst +++ b/docs/source/cpp_api/stats_probability.rst @@ -53,4 +53,3 @@ namespace *raft::stats* :project: RAFT :members: :content-only: - diff --git a/docs/source/cpp_api/stats_regression.rst b/docs/source/cpp_api/stats_regression.rst index 8c172b441d..fed5f806a4 100644 --- a/docs/source/cpp_api/stats_regression.rst +++ b/docs/source/cpp_api/stats_regression.rst @@ -41,5 +41,3 @@ namespace *raft::stats* :project: RAFT :members: :content-only: - - diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index c4a099fabb..1a2626f2b2 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour 1. Do not split empty functions/records/namespaces. 2. Two-space indentation everywhere, including the line continuations. 3. Disable reflowing of comments. - The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/.clang-format). + The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/.clang-format). [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter. In order to run doxygen as a linter on C++/CUDA code, run @@ -205,13 +205,13 @@ you can run `codespell -i 3 -w .` from the repository root directory. This will bring up an interactive prompt to select which spelling fixes to apply. ### #include style -[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/scripts/include_checker.py) is used to enforce the include style as follows: +[include_checker.py](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/scripts/include_checker.py) is used to enforce the include style as follows: 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. 2. `#include <...>` should be used for referencing everything else Manually, run the following to bulk-fix include style issues: ```bash -python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list of folders which you want to fix] +python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/tests ... list of folders which you want to fix] ``` ### Copyright header @@ -230,7 +230,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` ## Logging ### Introduction -Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. +Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. ### Usage ```cpp @@ -256,14 +256,14 @@ There are 7 logging levels with each successive level becoming quieter: 7. RAFT_LEVEL_OFF Pass one of these as per your needs into the `set_level()` method as follows: ```cpp -raft::logger::get().set_level(RAFT_LEVEL_WARN); +raft::default_logger().set_level(RAFT_LEVEL_WARN); // From now onwards, this will print only WARN and above kind of messages ``` ### Changing logging pattern Pass the [format string](https://github.com/gabime/spdlog/wiki/3.-Custom-formatting) as follows in order use a different logging pattern than the default. ```cpp -raft::logger::get.set_pattern(YourFavoriteFormat); +raft::default_logger().set_pattern(YourFavoriteFormat); ``` One can also use the corresponding `get_pattern()` method to know the current format as well. @@ -298,9 +298,9 @@ RAFT is a heavily templated library. Several core functions are expensive to com **Macros.** We define the macros `RAFT_COMPILED` and `RAFT_EXPLICIT_INSTANTIATE_ONLY`. The `RAFT_COMPILED` macro is defined by `CMake` when compiling code that (1) is part of `libraft.so` or (2) is linked with `libraft.so`. It indicates that a precompiled `libraft.so` is present at runtime. -The `RAFT_EXPLICIT_INSTANTIATE_ONLY` macro is defined by `CMake` during compilation of `libraft.so` itself. When defined, it indicates that implicit instantiations of expensive function templates are forbidden (they result in a compiler error). In the RAFT project, we additionally define this macro during compilation of the tests and benchmarks. +The `RAFT_EXPLICIT_INSTANTIATE_ONLY` macro is defined by `CMake` during compilation of `libraft.so` itself. When defined, it indicates that implicit instantiations of expensive function templates are forbidden (they result in a compiler error). In the RAFT project, we additionally define this macro during compilation of the tests and benchmarks. -Below, we summarize which combinations of `RAFT_COMPILED` and `RAFT_EXPLICIT_INSTANTIATE_ONLY` are used in practice and what the effect of the combination is. +Below, we summarize which combinations of `RAFT_COMPILED` and `RAFT_EXPLICIT_INSTANTIATE_ONLY` are used in practice and what the effect of the combination is. | RAFT_COMPILED | RAFT_EXPLICIT_INSTANTIATE_ONLY | Which targets | |---------------|--------------------------------|------------------------------------------------------------------------------------------------------| @@ -349,7 +349,7 @@ The file `expensive-ext.cuh` contains the following: #ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY namespace raft { -// (1) define templates to raise an error in case of accidental instantiation +// (1) define templates to raise an error in case of accidental instantiation template void expensive(T arg) RAFT_EXPLICIT; } // namespace raft #endif //RAFT_EXPLICIT_INSTANTIATE_ONLY @@ -371,7 +371,7 @@ template void raft::expensive(int); template void raft::expensive(float); ``` -**Design considerations**: +**Design considerations**: 1. In the `-ext.cuh` header, do not include implementation headers. Only include function parameter types and types that are used to instantiate the templates. If a primitive takes custom parameter types, define them in a separate header called `_types.hpp`. (see [Common Design Considerations](https://github.com/rapidsai/raft/blob/7b065aff81a0b1976e2a9e2f3de6690361a1111b/docs/source/developer_guide.md#common-design-considerations)). @@ -381,7 +381,7 @@ template void raft::expensive(float); 4. If a header file defines multiple expensive templates, it can be that one of them is not instantiated. In this case, **do define** the template with `RAFT_EXPLICIT` in the `-ext` header. This way, when the template is instantiated, the developer gets a helpful error message instead of a confusing "function not found". -This header structure was proposed in [issue #1416](https://github.com/rapidsai/raft/issues/1416), which contains more background on the motivation of this structure and the mechanics of C++ template instantiation. +This header structure was proposed in [issue #1416](https://github.com/rapidsai/raft/issues/1416), which contains more background on the motivation of this structure and the mechanics of C++ template instantiation. ## Testing diff --git a/docs/source/pylibraft_api/random.rst b/docs/source/pylibraft_api/random.rst index 538d932757..dbfd7b2fa1 100644 --- a/docs/source/pylibraft_api/random.rst +++ b/docs/source/pylibraft_api/random.rst @@ -9,4 +9,4 @@ This page provides pylibraft class references for the publicly-exposed elements :class: highlight -.. autofunction:: pylibraft.random.rmat \ No newline at end of file +.. autofunction:: pylibraft.random.rmat diff --git a/docs/source/pylibraft_api/sparse.rst b/docs/source/pylibraft_api/sparse.rst index b2c3f7a2b1..9ba265c6c9 100644 --- a/docs/source/pylibraft_api/sparse.rst +++ b/docs/source/pylibraft_api/sparse.rst @@ -8,4 +8,4 @@ This page provides pylibraft class references for the publicly-exposed elements :language: python :class: highlight -.. autofunction:: pylibraft.sparse.linalg.eigsh \ No newline at end of file +.. autofunction:: pylibraft.sparse.linalg.eigsh diff --git a/docs/source/sphinxext/github_link.py b/docs/source/sphinxext/github_link.py index a7a46fdd9d..5712bbe5cb 100644 --- a/docs/source/sphinxext/github_link.py +++ b/docs/source/sphinxext/github_link.py @@ -1,5 +1,20 @@ # This contains code with copyright by the scikit-learn project, subject to the # license in /thirdparty/LICENSES/LICENSE.scikit_learn +# +# Copyright (c) 2024-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import inspect import os @@ -96,15 +111,14 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): # fn is expected to be the absolute path. fn = os.path.relpath(source_file, start=package) print("{}:{}".format( - os.path.abspath(os.path.join("..", "python", "cuml", fn)), + os.path.abspath(os.path.join("..", "python", "pylibraft", fn)), lineno)) else: return else: - # Test if we are absolute or not (pyx are relative) - if (not os.path.isabs(fn)): - # Should be relative to docs right now - fn = os.path.abspath(os.path.join("..", "python", fn)) + if fn.endswith(".pyx"): + sp_path = next(x for x in sys.path if re.match(".*site-packages$", x)) + fn = fn.replace("/opt/conda/conda-bld/work/python/pylibraft", sp_path) # Convert to relative from module root fn = os.path.relpath(fn, diff --git a/pyproject.toml b/pyproject.toml index 5042113388..460c0312a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ ignore_missing_imports = true # they are imported by a checked file. follow_imports = "skip" exclude = [ - "pylibraft/pylibraft/test", + "pylibraft/pylibraft/tests", ] [tool.codespell] @@ -45,6 +45,6 @@ exclude = [ skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild" # ignore short words, and typename parameters like OffsetT ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" -ignore-words-list = "inout,numer" +ignore-words-list = "inout,unparseable,numer" builtin = "clear" quiet-level = 3 diff --git a/python/libraft/CMakeLists.txt b/python/libraft/CMakeLists.txt new file mode 100644 index 0000000000..db81aa9507 --- /dev/null +++ b/python/libraft/CMakeLists.txt @@ -0,0 +1,56 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../../rapids_config.cmake) + +project( + libraft-python + VERSION "${RAPIDS_VERSION}" + LANGUAGES CXX +) + +# Check if raft is already available. If so, it is the user's responsibility to ensure that the +# CMake package is also available at build time of the Python raft package. +find_package(raft "${RAPIDS_VERSION}") + +if(raft_FOUND) + return() +endif() + +unset(raft_FOUND) + +# --- CUDA --- # +set(CUDA_STATIC_RUNTIME ON) +set(CUDA_STATIC_MATH_LIBRARIES OFF) + +# --- RAFT ---# +set(BUILD_TESTS OFF) +set(BUILD_PRIMS_BENCH OFF) +set(RAFT_COMPILE_DYNAMIC_ONLY ON) +set(RAFT_COMPILE_LIBRARY ON) + +add_subdirectory(../../cpp raft-cpp) + +# assumes libraft.so is installed 2 levels deep, e.g. site-packages/libraft/lib64/libraft.so +set_property( + TARGET raft_lib + PROPERTY INSTALL_RPATH + "$ORIGIN/../../nvidia/cublas/lib" + "$ORIGIN/../../nvidia/curand/lib" + "$ORIGIN/../../nvidia/cusolver/lib" + "$ORIGIN/../../nvidia/cusparse/lib" + "$ORIGIN/../../nvidia/nvjitlink/lib" +) diff --git a/python/libraft/LICENSE b/python/libraft/LICENSE new file mode 120000 index 0000000000..30cff7403d --- /dev/null +++ b/python/libraft/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/libraft/README.md b/python/libraft/README.md new file mode 120000 index 0000000000..fe84005413 --- /dev/null +++ b/python/libraft/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/python/libraft/libraft/VERSION b/python/libraft/libraft/VERSION new file mode 120000 index 0000000000..d62dc733ef --- /dev/null +++ b/python/libraft/libraft/VERSION @@ -0,0 +1 @@ +../../../VERSION \ No newline at end of file diff --git a/python/libraft/libraft/__init__.py b/python/libraft/libraft/__init__.py new file mode 100644 index 0000000000..9260f4e67c --- /dev/null +++ b/python/libraft/libraft/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libraft._version import __git_commit__, __version__ +from libraft.load import load_library diff --git a/python/libraft/libraft/_version.py b/python/libraft/libraft/_version.py new file mode 100644 index 0000000000..530bf8bea6 --- /dev/null +++ b/python/libraft/libraft/_version.py @@ -0,0 +1,33 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = ( + importlib.resources.files(__package__) + .joinpath("VERSION") + .read_text() + .strip() +) +try: + __git_commit__ = ( + importlib.resources.files(__package__) + .joinpath("GIT_COMMIT") + .read_text() + .strip() + ) +except FileNotFoundError: + __git_commit__ = "" + +__all__ = ["__git_commit__", "__version__"] diff --git a/python/libraft/libraft/load.py b/python/libraft/libraft/load.py new file mode 100644 index 0000000000..ad3db9e09c --- /dev/null +++ b/python/libraft/libraft/load.py @@ -0,0 +1,80 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ctypes +import os + +# Loading with RTLD_LOCAL adds the library itself to the loader's +# loaded library cache without loading any symbols into the global +# namespace. This allows libraries that express a dependency on +# this library to be loaded later and successfully satisfy this dependency +# without polluting the global symbol table with symbols from +# libraft that could conflict with symbols from other DSOs. +PREFERRED_LOAD_FLAG = ctypes.RTLD_LOCAL + + +def _load_system_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + Raises ``OSError`` if library cannot be loaded. + """ + return ctypes.CDLL(soname, PREFERRED_LOAD_FLAG) + + +def _load_wheel_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + Returns ``None`` if the library cannot be loaded. + """ + if os.path.isfile( + lib := os.path.join(os.path.dirname(__file__), "lib64", soname) + ): + return ctypes.CDLL(lib, PREFERRED_LOAD_FLAG) + return None + + +def load_library(): + """Dynamically load libraft.so and its dependencies""" + prefer_system_installation = ( + os.getenv("RAPIDS_LIBRAFT_PREFER_SYSTEM_LIBRARY", "false").lower() + != "false" + ) + + soname = "libraft.so" + libraft_lib = None + if prefer_system_installation: + # Prefer a system library if one is present to + # avoid clobbering symbols that other packages might expect, but if no + # other library is present use the one in the wheel. + try: + libraft_lib = _load_system_installation(soname) + except OSError: + libraft_lib = _load_wheel_installation(soname) + else: + # Prefer the libraries bundled in this package. If they aren't found + # (which might be the case in builds where the library was prebuilt + # before packaging the wheel), look for a system installation. + try: + libraft_lib = _load_wheel_installation(soname) + if libraft_lib is None: + libraft_lib = _load_system_installation(soname) + except OSError: + # If none of the searches above succeed, just silently return None + # and rely on other mechanisms (like RPATHs on other DSOs) to + # help the loader find the library. + pass + + # The caller almost never needs to do anything with this library, but no + # harm in offering the option since this object at least provides a handle + # to inspect where libraft was loaded from. + return libraft_lib diff --git a/python/libraft/pyproject.toml b/python/libraft/pyproject.toml new file mode 100644 index 0000000000..89b2834614 --- /dev/null +++ b/python/libraft/pyproject.toml @@ -0,0 +1,117 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] + +requires = [ + "rapids-build-backend>=0.3.0,<0.4.0.dev0", + "scikit-build-core[pyproject]>=0.10.0", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +build-backend = "rapids_build_backend.build" + +[project] +name = "libraft" +dynamic = ["version"] +description = "RAFT: Reusable Algorithms Functions and other Tools (C++)" +readme = { file = "README.md", content-type = "text/markdown" } +authors = [ + { name = "NVIDIA Corporation" }, +] +license = { text = "Apache 2.0" } +requires-python = ">=3.10" +dependencies = [ + "nvidia-cublas", + "nvidia-curand", + "nvidia-cusolver", + "nvidia-cusparse", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +classifiers = [ + "Intended Audience :: Developers", +] + +[project.urls] +Homepage = "https://github.com/rapidsai/raft" +Documentation = "https://docs.rapids.ai/api/raft/stable/" + +[project.entry-points."cmake.prefix"] +libraft = "libraft" + +[tool.isort] +line_length = 79 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +combine_as_imports = true +order_by_type = true +known_first_party = [ + "libraft", +] +default_section = "THIRDPARTY" +sections = [ + "FUTURE", + "STDLIB", + "THIRDPARTY", + "DASK", + "RAPIDS", + "FIRSTPARTY", + "LOCALFOLDER", +] +skip = [ + "thirdparty", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".tox", + ".venv", + "_build", + "buck-out", + "build", + "dist", + "__init__.py", +] + +[tool.scikit-build] +build-dir = "build/{wheel_tag}" +cmake.build-type = "Release" +cmake.version = "CMakeLists.txt" +minimum-version = "build-system.requires" +ninja.make-fallback = true +sdist.reproducible = true +wheel.install-dir = "libraft" +wheel.packages = ["libraft"] +wheel.py-api = "py3" + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "libraft/VERSION" +regex = "(?P.*)" + +[tool.rapids-build-backend] +build-backend = "scikit_build_core.build" +requires = [ + "cmake>=3.26.4,!=3.30.0", + "librmm==25.2.*,>=0.0.0a0", + "ninja", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" + +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' diff --git a/python/pylibraft/.coveragerc b/python/pylibraft/.coveragerc index fc087fb9c5..3269e10b8a 100644 --- a/python/pylibraft/.coveragerc +++ b/python/pylibraft/.coveragerc @@ -1,3 +1,3 @@ # Configuration file for Python coverage tests [run] -source = pylibraft \ No newline at end of file +source = pylibraft diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index 758c1e4711..83c262dc10 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -27,68 +27,13 @@ project( LANGUAGES CXX CUDA ) -option(FIND_RAFT_CPP "Search for existing RAFT C++ installations before defaulting to local files" - ON -) -option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) - -# If the user requested it we attempt to find RAFT. -if(FIND_RAFT_CPP) - find_package(raft "${RAPIDS_VERSION}" REQUIRED COMPONENTS compiled) - if(NOT TARGET raft::raft_lib) - message( - FATAL_ERROR - "Building against a preexisting libraft library requires the compiled libraft to have been built!" - ) - - endif() -else() - set(raft_FOUND OFF) -endif() +# an installed version of raft contains the other necessary targets (like CCCL and cuco) +find_package(raft "${RAPIDS_VERSION}" REQUIRED COMPONENTS raft compiled) include(rapids-cython-core) -if(NOT raft_FOUND) - find_package(CUDAToolkit REQUIRED) - - set(BUILD_TESTS OFF) - set(BUILD_PRIMS_BENCH OFF) - set(RAFT_COMPILE_LIBRARY ON) - set(CUDA_STATIC_RUNTIME ON) - set(CUDA_STATIC_MATH_LIBRARIES ON) - if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) - set(CUDA_STATIC_MATH_LIBRARIES OFF) - elseif(USE_CUDA_MATH_WHEELS) - message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") - endif() - - add_subdirectory(../../cpp raft-cpp EXCLUDE_FROM_ALL) - - if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) - set_property( - TARGET raft_lib - PROPERTY INSTALL_RPATH - "$ORIGIN/../nvidia/cublas/lib" - "$ORIGIN/../nvidia/curand/lib" - "$ORIGIN/../nvidia/cusolver/lib" - "$ORIGIN/../nvidia/cusparse/lib" - "$ORIGIN/../nvidia/nvjitlink/lib" - ) - endif() - - # When building the C++ libraries from source we must copy libraft.so alongside the - # pairwise_distance and random Cython libraries TODO: when we have a single 'compiled' raft - # library, we shouldn't need this - set(cython_lib_dir pylibraft) - install(TARGETS raft_lib DESTINATION ${cython_lib_dir}) -endif() - rapids_cython_init() add_subdirectory(pylibraft/common) add_subdirectory(pylibraft/random) add_subdirectory(pylibraft/sparse) - -if(DEFINED cython_lib_dir) - rapids_cython_add_rpath_entries(TARGET raft PATHS "${cython_lib_dir}") -endif() diff --git a/python/pylibraft/pylibraft/__init__.py b/python/pylibraft/pylibraft/__init__.py index b0869501f3..a01e02ec33 100644 --- a/python/pylibraft/pylibraft/__init__.py +++ b/python/pylibraft/pylibraft/__init__.py @@ -13,4 +13,15 @@ # limitations under the License. # +# If libraft was installed as a wheel, we must request it to load the library +# symbols. Otherwise, we assume that the library was installed in a system path that ld +# can find. +try: + import libraft +except ModuleNotFoundError: + pass +else: + libraft.load_library() + del libraft + from pylibraft._version import __git_commit__, __version__ diff --git a/python/pylibraft/pylibraft/common/CMakeLists.txt b/python/pylibraft/pylibraft/common/CMakeLists.txt index 53279bfaf7..d1c1acb3aa 100644 --- a/python/pylibraft/pylibraft/common/CMakeLists.txt +++ b/python/pylibraft/pylibraft/common/CMakeLists.txt @@ -20,5 +20,5 @@ set(linked_libraries raft::raft) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS raft MODULE_PREFIX common_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX common_ ) diff --git a/python/pylibraft/pylibraft/common/cuda.pxd b/python/pylibraft/pylibraft/common/cuda.pxd index a44d9aeb63..934573b51f 100644 --- a/python/pylibraft/pylibraft/common/cuda.pxd +++ b/python/pylibraft/pylibraft/common/cuda.pxd @@ -14,7 +14,7 @@ # limitations under the License. # -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t cdef class Stream: diff --git a/python/pylibraft/pylibraft/common/cuda.pyx b/python/pylibraft/pylibraft/common/cuda.pyx index c164a463ae..cda0fc7168 100644 --- a/python/pylibraft/pylibraft/common/cuda.pyx +++ b/python/pylibraft/pylibraft/common/cuda.pyx @@ -19,7 +19,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cuda.ccudart cimport ( +from cuda.bindings.cyruntime cimport ( cudaError_t, cudaGetErrorName, cudaGetErrorString, diff --git a/python/pylibraft/pylibraft/common/handle.pyx b/python/pylibraft/pylibraft/common/handle.pyx index d256e671bf..400b667789 100644 --- a/python/pylibraft/pylibraft/common/handle.pyx +++ b/python/pylibraft/pylibraft/common/handle.pyx @@ -21,7 +21,7 @@ import functools -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t from libc.stdint cimport uintptr_t from rmm.librmm.cuda_stream_view cimport ( diff --git a/python/pylibraft/pylibraft/common/interruptible.pyx b/python/pylibraft/pylibraft/common/interruptible.pyx index c489f2ee20..ceac387f58 100644 --- a/python/pylibraft/pylibraft/common/interruptible.pyx +++ b/python/pylibraft/pylibraft/common/interruptible.pyx @@ -22,7 +22,7 @@ import contextlib import signal -from cuda.ccudart cimport cudaStream_t +from cuda.bindings.cyruntime cimport cudaStream_t from cython.operator cimport dereference from rmm.librmm.cuda_stream_view cimport cuda_stream_view diff --git a/python/pylibraft/pylibraft/random/CMakeLists.txt b/python/pylibraft/pylibraft/random/CMakeLists.txt index 10ff776471..7d61855111 100644 --- a/python/pylibraft/pylibraft/random/CMakeLists.txt +++ b/python/pylibraft/pylibraft/random/CMakeLists.txt @@ -23,5 +23,5 @@ set(linked_libraries raft::raft raft::compiled) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS raft MODULE_PREFIX random_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX random_ ) diff --git a/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt b/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt index ef16981644..7b2c9f6162 100644 --- a/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt +++ b/python/pylibraft/pylibraft/sparse/linalg/CMakeLists.txt @@ -23,5 +23,5 @@ set(linked_libraries raft::raft raft::compiled) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS raft MODULE_PREFIX sparse_ + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX sparse_ ) diff --git a/python/pylibraft/pylibraft/test/__init__py b/python/pylibraft/pylibraft/tests/__init__py similarity index 100% rename from python/pylibraft/pylibraft/test/__init__py rename to python/pylibraft/pylibraft/tests/__init__py diff --git a/python/pylibraft/pylibraft/test/pytest.ini b/python/pylibraft/pylibraft/tests/pytest.ini similarity index 98% rename from python/pylibraft/pylibraft/test/pytest.ini rename to python/pylibraft/pylibraft/tests/pytest.ini index bf70c06f84..7b0a9f29fb 100644 --- a/python/pylibraft/pylibraft/test/pytest.ini +++ b/python/pylibraft/pylibraft/tests/pytest.ini @@ -2,4 +2,3 @@ [pytest] addopts = --tb=native - diff --git a/python/pylibraft/pylibraft/test/test_cai_wrapper.py b/python/pylibraft/pylibraft/tests/test_cai_wrapper.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_cai_wrapper.py rename to python/pylibraft/pylibraft/tests/test_cai_wrapper.py diff --git a/python/pylibraft/pylibraft/test/test_config.py b/python/pylibraft/pylibraft/tests/test_config.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_config.py rename to python/pylibraft/pylibraft/tests/test_config.py diff --git a/python/pylibraft/pylibraft/test/test_device_ndarray.py b/python/pylibraft/pylibraft/tests/test_device_ndarray.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_device_ndarray.py rename to python/pylibraft/pylibraft/tests/test_device_ndarray.py diff --git a/python/pylibraft/pylibraft/test/test_doctests.py b/python/pylibraft/pylibraft/tests/test_doctests.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_doctests.py rename to python/pylibraft/pylibraft/tests/test_doctests.py diff --git a/python/pylibraft/pylibraft/test/test_handle.py b/python/pylibraft/pylibraft/tests/test_handle.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_handle.py rename to python/pylibraft/pylibraft/tests/test_handle.py diff --git a/python/pylibraft/pylibraft/test/test_mdspan_serializer.py b/python/pylibraft/pylibraft/tests/test_mdspan_serializer.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_mdspan_serializer.py rename to python/pylibraft/pylibraft/tests/test_mdspan_serializer.py diff --git a/python/pylibraft/pylibraft/test/test_random.py b/python/pylibraft/pylibraft/tests/test_random.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_random.py rename to python/pylibraft/pylibraft/tests/test_random.py diff --git a/python/pylibraft/pylibraft/test/test_sparse.py b/python/pylibraft/pylibraft/tests/test_sparse.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_sparse.py rename to python/pylibraft/pylibraft/tests/test_sparse.py diff --git a/python/pylibraft/pylibraft/test/test_version.py b/python/pylibraft/pylibraft/tests/test_version.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_version.py rename to python/pylibraft/pylibraft/tests/test_version.py diff --git a/python/pylibraft/pylibraft/test/test_z_interruptible.py b/python/pylibraft/pylibraft/tests/test_z_interruptible.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_z_interruptible.py rename to python/pylibraft/pylibraft/tests/test_z_interruptible.py diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index bb01602b33..912f1ad947 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -32,12 +32,9 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "cuda-python", + "libraft==25.2.*,>=0.0.0a0", "numpy>=1.23,<3.0a0", - "nvidia-cublas", - "nvidia-curand", - "nvidia-cusolver", - "nvidia-cusparse", - "rmm==24.12.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -124,19 +121,21 @@ requires = [ "cmake>=3.26.4,!=3.30.0", "cuda-python", "cython>=3.0.0,<3.1.0a0", + "libraft==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "ninja", - "rmm==24.12.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" -matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" +matrix-entry = "cuda_suffixed=true" [tool.pydistcheck] select = [ "distro-too-large-compressed", ] -# detect when package size grows significantly -max_allowed_size_compressed = '825M' +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' [tool.pytest.ini_options] filterwarnings = [ diff --git a/python/pylibraft/setup.cfg b/python/pylibraft/setup.cfg deleted file mode 100644 index 7d1a0c9065..0000000000 --- a/python/pylibraft/setup.cfg +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. - -[isort] -line_length=79 -multi_line_output=3 -include_trailing_comma=True -force_grid_wrap=0 -combine_as_imports=True -order_by_type=True -known_dask= - dask - distributed - dask_cuda -known_rapids= - nvtext - cudf - cuml - cugraph - dask_cudf - rmm -known_first_party= - raft - pylibraft -default_section=THIRDPARTY -sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER -skip= - thirdparty - .eggs - .git - .hg - .mypy_cache - .tox - .venv - _build - buck-out - build - dist - __init__.py diff --git a/python/raft-dask/.coveragerc b/python/raft-dask/.coveragerc index 968c4b898a..8077c9ae90 100644 --- a/python/raft-dask/.coveragerc +++ b/python/raft-dask/.coveragerc @@ -1,3 +1,3 @@ # Configuration file for Python coverage tests [run] -source = raft_dask \ No newline at end of file +source = raft_dask diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt index 9ebbaa5298..1fcb40a58d 100644 --- a/python/raft-dask/CMakeLists.txt +++ b/python/raft-dask/CMakeLists.txt @@ -25,38 +25,16 @@ project( LANGUAGES CXX CUDA ) -option(FIND_RAFT_CPP "Search for existing RAFT C++ installations before defaulting to local files" - OFF -) - rapids_cpm_init() # Once https://github.com/rapidsai/ucxx/issues/173 is resolved we can remove this. find_package(ucx REQUIRED) include(cmake/thirdparty/get_ucxx.cmake) -# If the user requested it we attempt to find RAFT. -if(FIND_RAFT_CPP) - find_package(raft "${RAPIDS_VERSION}" REQUIRED COMPONENTS distributed) -else() - set(raft_FOUND OFF) -endif() - -if(NOT raft_FOUND) - # raft-dask doesn't actually use raft libraries, it just needs the headers, so we can turn off all - # library compilation and we don't need to install anything here. - set(BUILD_TESTS OFF) - set(BUILD_PRIMS_BENCH OFF) - set(RAFT_COMPILE_LIBRARIES OFF) - set(RAFT_COMPILE_DIST_LIBRARY OFF) - set(RAFT_COMPILE_NN_LIBRARY OFF) - set(CUDA_STATIC_RUNTIME ON) - set(CUDA_STATIC_MATH_LIBRARIES ON) - set(RAFT_DASK_UCXX_STATIC ON) - - add_subdirectory(../../cpp raft-cpp EXCLUDE_FROM_ALL) - list(APPEND CMAKE_MODULE_PATH ${CMAKE_BINARY_DIR}/cmake/find_modules) - find_package(NCCL REQUIRED) -endif() +# why these components: +# +# * 'raft' = the headers, needed to link against libraft +# * 'distributed' = needed for NCCL +find_package(raft "${RAPIDS_VERSION}" REQUIRED COMPONENTS raft distributed) include(rapids-cython-core) rapids_cython_init() diff --git a/python/raft-dask/cmake/thirdparty/get_ucxx.cmake b/python/raft-dask/cmake/thirdparty/get_ucxx.cmake index db9b5c6b4d..e6b9c4aa0e 100644 --- a/python/raft-dask/cmake/thirdparty/get_ucxx.cmake +++ b/python/raft-dask/cmake/thirdparty/get_ucxx.cmake @@ -45,11 +45,11 @@ function(find_and_configure_ucxx) endfunction() # Change pinned tag here to test a commit in CI -# To use a different RAFT locally, set the CMake variable -# CPM_raft_SOURCE=/path/to/local/raft -find_and_configure_ucxx(VERSION 0.41 +# To use a different ucxx locally, set the CMake variable +# CPM_ucxx_SOURCE=/path/to/local/ucxx +find_and_configure_ucxx(VERSION 0.42 FORK rapidsai - PINNED_TAG branch-0.41 + PINNED_TAG branch-0.42 EXCLUDE_FROM_ALL YES UCXX_STATIC ${RAFT_DASK_UCXX_STATIC} ) diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index a9f4de5dc3..d3a26db282 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -31,13 +31,12 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "dask-cuda==24.12.*,>=0.0.0a0", - "distributed-ucxx==0.41.*,>=0.0.0a0", - "joblib>=0.11", - "numba>=0.57", - "pylibraft==24.12.*,>=0.0.0a0", - "rapids-dask-dependency==24.12.*,>=0.0.0a0", - "ucx-py==0.41.*,>=0.0.0a0", + "dask-cuda==25.2.*,>=0.0.0a0", + "distributed-ucxx==0.42.*,>=0.0.0a0", + "libraft==25.2.*,>=0.0.0a0", + "pylibraft==25.2.*,>=0.0.0a0", + "rapids-dask-dependency==25.2.*,>=0.0.0a0", + "ucx-py==0.42.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -121,6 +120,8 @@ build-backend = "scikit_build_core.build" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.0,<3.1.0a0", + "libraft==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "libucx==1.15.0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/raft-dask/raft_dask/__init__.py b/python/raft-dask/raft_dask/__init__.py index 19a037ae75..78248fad7a 100644 --- a/python/raft-dask/raft_dask/__init__.py +++ b/python/raft-dask/raft_dask/__init__.py @@ -13,8 +13,6 @@ # limitations under the License. # -from raft_dask._version import __git_commit__, __version__ - # If libucx was installed as a wheel, we must request it to load the library symbols. # Otherwise, we assume that the library was installed in a system path that ld can find. try: @@ -24,3 +22,16 @@ else: libucx.load_library() del libucx + +# If libraft was installed as a wheel, we must request it to load the library +# symbols. Otherwise, we assume that the library was installed in a system path that ld +# can find. +try: + import libraft +except ModuleNotFoundError: + pass +else: + libraft.load_library() + del libraft + +from raft_dask._version import __git_commit__, __version__ diff --git a/python/raft-dask/raft_dask/common/CMakeLists.txt b/python/raft-dask/raft_dask/common/CMakeLists.txt index 65d5f06577..1279d5d501 100644 --- a/python/raft-dask/raft_dask/common/CMakeLists.txt +++ b/python/raft-dask/raft_dask/common/CMakeLists.txt @@ -15,6 +15,5 @@ set(cython_sources comms_utils.pyx nccl.pyx) set(linked_libraries raft::raft raft::distributed) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" ASSOCIATED_TARGETS raft LINKED_LIBRARIES "${linked_libraries}" - CXX + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" CXX ) diff --git a/python/raft-dask/raft_dask/include_test/CMakeLists.txt b/python/raft-dask/raft_dask/include_test/CMakeLists.txt index 2ff1cd9150..8839c57b91 100644 --- a/python/raft-dask/raft_dask/include_test/CMakeLists.txt +++ b/python/raft-dask/raft_dask/include_test/CMakeLists.txt @@ -15,6 +15,5 @@ set(cython_sources raft_include_test.pyx) set(linked_libraries raft::raft) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" ASSOCIATED_TARGETS raft LINKED_LIBRARIES "${linked_libraries}" - CXX + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" CXX ) diff --git a/python/raft-dask/raft_dask/test/conftest.py b/python/raft-dask/raft_dask/tests/conftest.py similarity index 100% rename from python/raft-dask/raft_dask/test/conftest.py rename to python/raft-dask/raft_dask/tests/conftest.py diff --git a/python/raft-dask/raft_dask/test/pytest.ini b/python/raft-dask/raft_dask/tests/pytest.ini similarity index 98% rename from python/raft-dask/raft_dask/test/pytest.ini rename to python/raft-dask/raft_dask/tests/pytest.ini index bf70c06f84..7b0a9f29fb 100644 --- a/python/raft-dask/raft_dask/test/pytest.ini +++ b/python/raft-dask/raft_dask/tests/pytest.ini @@ -2,4 +2,3 @@ [pytest] addopts = --tb=native - diff --git a/python/raft-dask/raft_dask/test/test_comms.py b/python/raft-dask/raft_dask/tests/test_comms.py similarity index 100% rename from python/raft-dask/raft_dask/test/test_comms.py rename to python/raft-dask/raft_dask/tests/test_comms.py diff --git a/python/raft-dask/raft_dask/test/test_raft.py b/python/raft-dask/raft_dask/tests/test_raft.py similarity index 100% rename from python/raft-dask/raft_dask/test/test_raft.py rename to python/raft-dask/raft_dask/tests/test_raft.py diff --git a/python/raft-dask/raft_dask/test/test_version.py b/python/raft-dask/raft_dask/tests/test_version.py similarity index 100% rename from python/raft-dask/raft_dask/test/test_version.py rename to python/raft-dask/raft_dask/tests/test_version.py diff --git a/rapids_config.cmake b/rapids_config.cmake index c8077f7f4b..a40d7130c0 100644 --- a/rapids_config.cmake +++ b/rapids_config.cmake @@ -22,13 +22,15 @@ else() string(REPLACE "\n" "\n " _rapids_version_formatted " ${_rapids_version}") message( FATAL_ERROR - "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}") + "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}" + ) endif() if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") file( DOWNLOAD "https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/RAPIDS.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") + "${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake" + ) endif() include("${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 94140d4d00..0000000000 --- a/setup.cfg +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. - -[flake8] -filename = *.py, *.pyx, *.pxd, *.pxi -exclude = __init__.py, *.egg, build, docs, .git -force-check = True -ignore = - # line break before binary operator - W503, - # whitespace before : - E203 -per-file-ignores = - # Rules ignored only in Cython: - # E211: whitespace before '(' (used in multi-line imports) - # E225: Missing whitespace around operators (breaks cython casting syntax like ) - # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) - # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) - # E275: Missing whitespace after keyword (Doesn't work with Cython except?) - # E402: invalid syntax (works for Python, not Cython) - # E999: invalid syntax (works for Python, not Cython) - # W504: line break after binary operator (breaks lines that end with a pointer) - *.pyx: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxd: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxi: E211, E225, E226, E227, E275, E402, E999, W504 - -[pydocstyle] -# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather -# than include using match-dir. Note that as discussed in -# https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle, -# unlike the match option above this match-dir will have no effect when -# pydocstyle is invoked from pre-commit. Therefore this exclusion list must -# also be maintained in the pre-commit config file. -match-dir = ^(?!(ci|cpp|conda|docs)).*$ -# Allow missing docstrings for docutils -ignore-decorators = .*(docutils|doc_apply|copy_docstring).* -select = - D201, D204, D206, D207, D208, D209, D210, D211, D214, D215, D300, D301, D302, D403, D405, D406, D407, D408, D409, D410, D411, D412, D414, D418 - # Would like to enable the following rules in the future: - # D200, D202, D205, D400 - -[mypy] -ignore_missing_imports = True -# If we don't specify this, then mypy will check excluded files if -# they are imported by a checked file. -follow_imports = skip - -[codespell] -# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - -# this is only to allow you to run codespell interactively -skip = ./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild -# ignore short words, and typename parameters like OffsetT -ignore-regex = \b(.{1,4}|[A-Z]\w*T)\b -ignore-words-list = inout,unparseable,numer -builtin = clear -quiet-level = 3 diff --git a/thirdparty/LICENSES/LICENSE.ann-benchmark b/thirdparty/LICENSES/LICENSE.ann-benchmark index 9f8e4222f6..4d04745ab4 100644 --- a/thirdparty/LICENSES/LICENSE.ann-benchmark +++ b/thirdparty/LICENSES/LICENSE.ann-benchmark @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/thirdparty/LICENSES/LICENSE.faiss b/thirdparty/LICENSES/LICENSE.faiss index 87cbf536c6..b96dcb0480 100644 --- a/thirdparty/LICENSES/LICENSE.faiss +++ b/thirdparty/LICENSES/LICENSE.faiss @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/thirdparty/LICENSES/LICENSE.pytorch b/thirdparty/LICENSES/LICENSE.pytorch index 7ad3d737a5..04f9ad1105 100644 --- a/thirdparty/LICENSES/LICENSE.pytorch +++ b/thirdparty/LICENSES/LICENSE.pytorch @@ -74,4 +74,4 @@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +POSSIBILITY OF SUCH DAMAGE. diff --git a/thirdparty/LICENSES/mdarray.license b/thirdparty/LICENSES/mdarray.license index e636b86032..5a491b0879 100644 --- a/thirdparty/LICENSES/mdarray.license +++ b/thirdparty/LICENSES/mdarray.license @@ -39,4 +39,4 @@ // // ************************************************************************ //@HEADER -*/ \ No newline at end of file +*/