From 3136d551ccec9df230e4896e40eaeab8e2277b66 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <dante.gamadessavre@gmail.com>
Date: Thu, 29 Feb 2024 20:12:48 -0600
Subject: [PATCH] Multiple Cython, build and CI improvements and fixes

---
 .../cuda11.8-conda/devcontainer.json          |   2 +-
 .devcontainer/cuda11.8-pip/devcontainer.json  |   2 +-
 .../cuda12.0-conda/devcontainer.json          |   2 +-
 .devcontainer/cuda12.0-pip/devcontainer.json  |   2 +-
 .github/CODEOWNERS                            |  18 ++-
 .github/workflows/build.yaml                  |  12 +-
 .github/workflows/pr.yaml                     |  22 +--
 .github/workflows/test.yaml                   |   8 +-
 README.md                                     |   2 +-
 VERSION                                       |   2 +-
 build.sh                                      |   2 +-
 ci/build_docs.sh                              |   2 +-
 cpp/CMakeLists.txt                            |   4 +-
 cpp/doxygen/Doxyfile                          |   2 +-
 .../cmake/thirdparty/fetch_rapids.cmake       |   2 +-
 cpp/src/neighbors/cagra_c.cpp                 |   6 +-
 docs/source/build.md                          |   4 +-
 docs/source/conf.py                           |   4 +-
 docs/source/developer_guide.md                |   8 +-
 fetch_rapids.cmake                            |   2 +-
 python/cuvs/CMakeLists.txt                    |  15 +-
 python/cuvs/README.md                         |   0
 python/cuvs/cuvs/__init__.py                  |   2 +-
 python/cuvs/cuvs/common/cydlpack.pxd          |   4 +-
 python/cuvs/cuvs/common/cydlpack.pyx          |  43 ++++--
 python/cuvs/cuvs/common/temp_raft.py          |   4 +-
 python/cuvs/cuvs/neighbors/cagra/__init__.py  |   9 +-
 python/cuvs/cuvs/neighbors/cagra/cagra.pxd    |  52 ++++---
 python/cuvs/cuvs/neighbors/cagra/cagra.pyx    | 134 ++++++++++--------
 python/cuvs/cuvs/test/test_cagra.py           |   4 +-
 python/cuvs/cuvs/test/test_doctests.py        |  19 +--
 python/cuvs/pyproject.toml                    |  17 ++-
 python/cuvs/setup.cfg                         |   3 +-
 python/cuvs/setup.py                          |  37 -----
 34 files changed, 235 insertions(+), 216 deletions(-)
 create mode 100644 python/cuvs/README.md
 delete mode 100644 python/cuvs/setup.py

diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
index 76ce8599a..cefbea72b 100644
--- a/.devcontainer/cuda11.8-conda/devcontainer.json
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index 3a126b36e..05518805a 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json
index 426aaef98..3f89836e2 100644
--- a/.devcontainer/cuda12.0-conda/devcontainer.json
+++ b/.devcontainer/cuda12.0-conda/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.0",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-mambaforge-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json
index 1ef2fdcb6..33f67cd3f 100644
--- a/.devcontainer/cuda12.0-pip/devcontainer.json
+++ b/.devcontainer/cuda12.0-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.0",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda12.0-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda12.0-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 407c5448e..01dbcfc83 100755
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -4,12 +4,18 @@ cpp/               @rapidsai/cuvs-cpp-codeowners
 #python code owners
 python/            @rapidsai/cuvs-python-codeowners
 
-#cmake code owners
-**/CMakeLists.txt  @rapidsai/cuvs-cmake-codeowners
-**/cmake/          @rapidsai/cuvs-cmake-codeowners
-python/setup.py    @rapidsai/cuvs-cmake-codeowners
-build.sh           @rapidsai/cuvs-cmake-codeowners
-**/build.sh        @rapidsai/cuvs-cmake-codeowners
+#rust code owners
+rust/              @rapidsai/cuvs-rust-codeowners
+
+#docs code owners
+docs/              @rapidsai/cuvs-docs-codeowners
+
+#build code owners
+**/CMakeLists.txt  @rapidsai/cuvs-build-codeowners
+**/cmake/          @rapidsai/cuvs-build-codeowners
+python/setup.py    @rapidsai/cuvs-build-codeowners
+build.sh           @rapidsai/cuvs-build-codeowners
+**/build.sh        @rapidsai/cuvs-build-codeowners
 
 #build/ops code owners
 .github/           @rapidsai/ops-codeowners
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index f079b5e78..34cf1f5b0 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -69,7 +69,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-build-cuvs:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -79,7 +79,7 @@ jobs:
   wheel-publish-cuvs:
     needs: wheel-build-cuvs
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index b190a2a62..5799f5108 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -23,29 +23,29 @@ jobs:
       - wheel-tests-cuvs
       - devcontainer
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04
   checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04
     with:
       enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
     with:
       build_type: pull-request
       node_type: cpu16
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
     with:
       build_type: pull-request
   conda-cpp-checks:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04
     with:
       build_type: pull-request
       enable_check_symbols: true
@@ -53,19 +53,19 @@ jobs:
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -75,20 +75,20 @@ jobs:
   wheel-build-cuvs:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
     with:
       build_type: pull-request
       script: ci/build_wheel_cuvs.sh
   wheel-tests-cuvs:
     needs: wheel-build-cuvs
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
     with:
       build_type: pull-request
       script: ci/test_wheel_cuvs.sh
   devcontainer:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04
     with:
       build_command: |
         sccache -z;
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index acea5755f..0e66113f2 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -26,7 +26,7 @@ jobs:
       symbol_exclusions: (void (thrust::|cub::)|_ZN\d+raft_cutlass)
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -34,7 +34,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -42,7 +42,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-cuvs:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/README.md b/README.md
index e6da1432e..dfba9eb4a 100755
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ mamba install -c conda-forge -c nvidia -c rapidsai pycuvs
 ### Nightlies
 If installing a version that has not yet been released, the `rapidsai` channel can be replaced with `rapidsai-nightly`:
 ```bash
-mamba install -c conda-forge -c nvidia -c rapidsai-nightly pycuvs=24.02*
+mamba install -c conda-forge -c nvidia -c rapidsai-nightly pycuvs=24.04*
 ```
 
 Please see the [Build and Install Guide](docs/source/build.md) for more information on installing cuVS and building from source.
diff --git a/VERSION b/VERSION
index 3c6c5e2b7..4a2fe8aa5 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-24.02.00
+24.04.00
diff --git a/build.sh b/build.sh
index db72bcf46..c6e09c5bf 100755
--- a/build.sh
+++ b/build.sh
@@ -60,7 +60,7 @@ BUILD_DIRS="${LIBCUVS_BUILD_DIR} ${PYTHON_BUILD_DIR} ${CUVS_DASK_BUILD_DIR}"
 CMAKE_LOG_LEVEL=""
 VERBOSE_FLAG=""
 BUILD_ALL_GPU_ARCH=0
-BUILD_TESTS=OFF
+BUILD_TESTS=ON
 BUILD_TYPE=Release
 COMPILE_LIBRARY=OFF
 INSTALL_TARGET=install
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 0706b1fca..983e97385 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -24,7 +24,7 @@ rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   libcuvs
 
-export RAPIDS_VERSION_NUMBER="24.02"
+export RAPIDS_VERSION_NUMBER="24.04"
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
 rapids-logger "Build CPP docs"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ea72eac63..c291c14e3 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -10,8 +10,8 @@
 # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
-set(RAPIDS_VERSION "24.02")
-set(CUVS_VERSION "24.02.00")
+set(RAPIDS_VERSION "24.04")
+set(CUVS_VERSION "24.04.00")
 
 cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 include(../fetch_rapids.cmake)
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 0a2c7f8f6..94304afe0 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "cuVS C++ API"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "24.02"
+PROJECT_NUMBER         = "24.04"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/cpp/examples/cmake/thirdparty/fetch_rapids.cmake b/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
index 4da917e26..c22f586ca 100644
--- a/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
+++ b/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
@@ -12,7 +12,7 @@
 # the License.
 
 # Use this variable to update RAPIDS and RAFT versions
-set(RAPIDS_VERSION "24.02")
+set(RAPIDS_VERSION "24.04")
 
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
     file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake
diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp
index b0154acf8..70e268fb2 100644
--- a/cpp/src/neighbors/cagra_c.cpp
+++ b/cpp/src/neighbors/cagra_c.cpp
@@ -53,7 +53,6 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor*
     auto mds          = cuvs::core::from_dlpack<mdspan_type>(dataset_tensor);
     cuvs::neighbors::cagra::build_host(*res_ptr, build_params, mds, *index);
   }
-
   return index;
 }
 
@@ -153,6 +152,9 @@ extern "C" cuvsError_t cuvsCagraBuild(cuvsResources_t res,
                 dataset.dtype.bits);
     }
     return CUVS_SUCCESS;
+  } catch (const std::exception& ex) {
+    std::cerr << "Error occurred: " << ex.what() << std::endl;
+    return CUVS_ERROR;
   } catch (...) {
     return CUVS_ERROR;
   }
@@ -197,6 +199,8 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
                 queries.dtype.bits);
     }
     return CUVS_SUCCESS;
+  } catch (const std::exception& ex) {
+    std::cerr << "Error occurred: " << ex.what() << std::endl;
   } catch (...) {
     return CUVS_ERROR;
   }
diff --git a/docs/source/build.md b/docs/source/build.md
index ae7734d0e..31de69b46 100644
--- a/docs/source/build.md
+++ b/docs/source/build.md
@@ -56,7 +56,7 @@ You can also install the conda packages individually using the `mamba` command a
 mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0
 ```
 
-If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above.
+If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.04/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above.
 
 ## Installing Python through Pip
 
@@ -315,4 +315,4 @@ The `raft::raft` CMake target is made available when including RAFT into your CM
 |-------------|---------------------|----------------------------------------------------------|----------------------------------------|
 | n/a         | `raft::raft`        | Full RAFT header library                                 | CUDA toolkit, RMM, NVTX, CCCL, CUTLASS |
 | compiled    | `raft::compiled`    | Pre-compiled template instantiations and runtime library | raft::raft                             |
-| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL         
\ No newline at end of file
+| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL         
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c09ab953f..1a5c9dfe8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -67,9 +67,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '24.02'
+version = '24.04'
 # The full version, including alpha/beta/rc tags.
-release = '24.02.00'
+release = '24.04.00'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md
index c5bcd03f6..d29130add 100644
--- a/docs/source/developer_guide.md
+++ b/docs/source/developer_guide.md
@@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour
 1. Do not split empty functions/records/namespaces.
 2. Two-space indentation everywhere, including the line continuations.
 3. Disable reflowing of comments.
-   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/.clang-format).
+   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/.clang-format).
 
 [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter.
 In order to run doxygen as a linter on C++/CUDA code, run
@@ -205,7 +205,7 @@ you can run  `codespell -i 3 -w .` from the repository root directory.
 This will bring up an interactive prompt to select which spelling fixes to apply.
 
 ### #include style
-[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/scripts/include_checker.py) is used to enforce the include style as follows:
+[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/scripts/include_checker.py) is used to enforce the include style as follows:
 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies.
 2. `#include <...>` should be used for referencing everything else
 
@@ -215,7 +215,7 @@ python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list
 ```
 
 ### Copyright header
-[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.02/ci/checks/copyright.py) checks the Copyright header for all git-modified files
+[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.04/ci/checks/copyright.py) checks the Copyright header for all git-modified files
 
 Manually, you can run the following to bulk-fix the header if only the years need to be updated:
 ```bash
@@ -229,7 +229,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY`
 ## Logging
 
 ### Introduction
-Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all.
+Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all.
 
 ### Usage
 ```cpp
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index e63165e1c..330270d66 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/RAPIDS.cmake
        ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake
   )
 endif()
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index 0938cf4a6..9a43eba0c 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -39,15 +39,22 @@ option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulti
 
 message("- FIND_CUVS_CPP: ${FIND_CUVS_CPP}")
 
+include(../../fetch_rapids.cmake)
+include(rapids-cmake)
+include(rapids-cpm)
+include(rapids-export)
+include(rapids-find)
+
+rapids_cpm_init()
+
 # If the user requested it we attempt to find CUVS.
 if(FIND_CUVS_CPP)
-  find_package(cuvs_c ${cuvs_version})
+  find_package(cuvs ${cuvs_version})
+  include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
 else()
   set(cuvs_FOUND OFF)
 endif()
 
-include(rapids-cython-core)
-
 if(NOT cuvs_FOUND)
   set(BUILD_TESTS OFF)
   set(CUDA_STATIC_RUNTIME ON)
@@ -60,6 +67,8 @@ if(NOT cuvs_FOUND)
   install(TARGETS cuvs DESTINATION ${cython_lib_dir})
 endif()
 
+include(rapids-cython-core)
+
 rapids_cython_init()
 
 add_subdirectory(cuvs/common)
diff --git a/python/cuvs/README.md b/python/cuvs/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/__init__.py b/python/cuvs/cuvs/__init__.py
index 94b3a200b..9f0481cb7 100644
--- a/python/cuvs/cuvs/__init__.py
+++ b/python/cuvs/cuvs/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/cuvs/cuvs/common/cydlpack.pxd b/python/cuvs/cuvs/common/cydlpack.pxd
index 66c9f3f03..73334e500 100644
--- a/python/cuvs/cuvs/common/cydlpack.pxd
+++ b/python/cuvs/cuvs/common/cydlpack.pxd
@@ -18,7 +18,7 @@
 from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint64_t
 
 
-cdef extern from 'dlpack.h' nogil:
+cdef extern from "dlpack/dlpack.h" nogil:
     ctypedef enum DLDeviceType:
         kDLCPU
         kDLCUDA
@@ -67,4 +67,4 @@ cdef extern from 'dlpack.h' nogil:
         void (*deleter)(DLManagedTensor*)  # noqa: E211
 
 
-cdef DLManagedTensor dlpack_c(ary)
+cdef DLManagedTensor* dlpack_c(ary)
diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx
index 76ec95756..526f6c78e 100644
--- a/python/cuvs/cuvs/common/cydlpack.pyx
+++ b/python/cuvs/cuvs/common/cydlpack.pyx
@@ -18,6 +18,7 @@
 import numpy as np
 
 from libc cimport stdlib
+from libc.stdint cimport uintptr_t
 
 
 cdef void deleter(DLManagedTensor* tensor) noexcept:
@@ -28,15 +29,16 @@ cdef void deleter(DLManagedTensor* tensor) noexcept:
     stdlib.free(tensor)
 
 
-cdef DLManagedTensor dlpack_c(ary):
+cdef DLManagedTensor* dlpack_c(ary):
     # todo(dgd): add checking options/parameters
     cdef DLDeviceType dev_type
     cdef DLDevice dev
     cdef DLDataType dtype
     cdef DLTensor tensor
-    cdef DLManagedTensor dlm
+    cdef DLManagedTensor* dlm = \
+        <DLManagedTensor*>stdlib.malloc(sizeof(DLManagedTensor))
 
-    if hasattr(ary, "__cuda_array_interface__"):
+    if ary.from_cai:
         dev_type = DLDeviceType.kDLCUDA
     else:
         dev_type = DLDeviceType.kDLCPU
@@ -51,24 +53,47 @@ cdef DLManagedTensor dlpack_c(ary):
     elif ary.dtype == np.float64:
         dtype.code = DLDataTypeCode.kDLFloat
         dtype.bits = 64
+    elif ary.dtype == np.int8:
+        dtype.code = DLDataTypeCode.kDLInt
+        dtype.bits = 8
     elif ary.dtype == np.int32:
         dtype.code = DLDataTypeCode.kDLInt
         dtype.bits = 32
     elif ary.dtype == np.int64:
-        dtype.code = DLDataTypeCode.kDLFloat
+        dtype.code = DLDataTypeCode.kDLInt
+        dtype.bits = 64
+    elif ary.dtype == np.uint8:
+        dtype.code = DLDataTypeCode.kDLUInt
+        dtype.bits = 8
+    elif ary.dtype == np.uint32:
+        dtype.code = DLDataTypeCode.kDLUInt
+        dtype.bits = 32
+    elif ary.dtype == np.uint64:
+        dtype.code = DLDataTypeCode.kDLUInt
         dtype.bits = 64
-    elif ary.dtype == np.bool:
+    elif ary.dtype == np.bool_:
         dtype.code = DLDataTypeCode.kDLFloat
+        dtype.bits = 8
 
-    if hasattr(ary, "__cuda_array_interface__"):
-        tensor_ptr = ary.__cuda_array_interface__["data"][0]
-    else:
-        tensor_ptr = ary.__array_interface__["data"][0]
+    dtype.lanes = 1
+
+    cdef size_t ndim = len(ary.shape)
+
+    cdef int64_t* shape = <int64_t*>stdlib.malloc(ndim * sizeof(int64_t))
+
+    for i in range(ndim):
+        shape[i] = ary.shape[i]
+
+    cdef uintptr_t tensor_ptr
+    tensor_ptr = <uintptr_t>ary.ai_["data"][0]
 
     tensor.data = <void*> tensor_ptr
     tensor.device = dev
     tensor.dtype = dtype
     tensor.strides = NULL
+    tensor.ndim = ndim
+    tensor.shape = shape
+    tensor.byte_offset = 0
 
     dlm.dl_tensor = tensor
     dlm.manager_ctx = NULL
diff --git a/python/cuvs/cuvs/common/temp_raft.py b/python/cuvs/cuvs/common/temp_raft.py
index 25d6ed0b4..67944eeab 100644
--- a/python/cuvs/cuvs/common/temp_raft.py
+++ b/python/cuvs/cuvs/common/temp_raft.py
@@ -39,12 +39,12 @@ def auto_sync_resources(f):
 
     @functools.wraps(f)
     def wrapper(*args, resources=None, **kwargs):
-        sync_handle = resources is None
+        sync_resources = resources is None
         resources = resources if resources is not None else DeviceResources()
 
         ret_value = f(*args, resources=resources, **kwargs)
 
-        if sync_handle:
+        if sync_resources:
             resources.sync()
 
         return ret_value
diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.py b/python/cuvs/cuvs/neighbors/cagra/__init__.py
index c3690da87..657c7d366 100644
--- a/python/cuvs/cuvs/neighbors/cagra/__init__.py
+++ b/python/cuvs/cuvs/neighbors/cagra/__init__.py
@@ -13,11 +13,6 @@
 # limitations under the License.
 
 
-from .cagra import Index, IndexParams, SearchParams, build_index
+from .cagra import Index, IndexParams, SearchParams, build_index, search
 
-__all__ = [
-    "Index",
-    "IndexParams",
-    "SearchParams",
-    "build_index",
-]
+__all__ = ["Index", "IndexParams", "SearchParams", "build_index", "search"]
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
index 269f046bf..4293bdc07 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
@@ -28,62 +28,68 @@ from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
 from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
 
 
-cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
+cdef extern from "cuvs/neighbors/cagra.h" nogil:
 
-    ctypedef enum cagraGraphBuildAlgo:
+    ctypedef enum cuvsCagraGraphBuildAlgo:
         IVF_PQ
         NN_DESCENT
 
-    ctypedef struct cagraIndexParams:
+    ctypedef struct cuvsCagraIndexParams:
         size_t intermediate_graph_degree
         size_t graph_degree
-        cagraGraphBuildAlgo build_algo
+        cuvsCagraGraphBuildAlgo build_algo
         size_t nn_descent_niter
 
-    ctypedef enum cagraSearchAlgo:
+    ctypedef cuvsCagraIndexParams* cuvsCagraIndexParams_t
+
+    ctypedef enum cuvsCagraSearchAlgo:
         SINGLE_CTA,
         MULTI_CTA,
         MULTI_KERNEL,
         AUTO
 
-    ctypedef enum cagraHashMode:
+    ctypedef enum cuvsCagraHashMode:
         HASH,
         SMALL,
         AUTO_HASH
 
-    ctypedef struct cagraSearchParams:
+    ctypedef struct cuvsCagraSearchParams:
         size_t max_queries
         size_t itopk_size
         size_t max_iterations
-        cagraSearchAlgo algo
+        cuvsCagraSearchAlgo algo
         size_t team_size
         size_t search_width
         size_t min_iterations
         size_t thread_block_size
-        cagraHashMode hashmap_mode
+        cuvsCagraHashMode hashmap_mode
         size_t hashmap_min_bitlen
         float hashmap_max_fill_rate
         uint32_t num_random_samplings
         uint64_t rand_xor_mask
 
-    ctypedef struct cagraIndex:
+    ctypedef struct cuvsCagraIndex:
         uintptr_t addr
         DLDataType dtype
 
-    ctypedef cagraIndex* cagraIndex_t
+    ctypedef cuvsCagraIndex* cuvsCagraIndex_t
+
+    cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params)
+
+    cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t index)
 
-    cuvsError_t cagraIndexCreate(cagraIndex_t* index)
+    cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index)
 
-    cuvsError_t cagraIndexDestroy(cagraIndex_t index)
+    cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index)
 
-    cuvsError_t cagraBuild(cuvsResources_t res,
-                           cagraIndexParams* params,
-                           DLManagedTensor* dataset,
-                           cagraIndex_t index)
+    cuvsError_t cuvsCagraBuild(cuvsResources_t res,
+                               cuvsCagraIndexParams* params,
+                               DLManagedTensor* dataset,
+                               cuvsCagraIndex_t index) except +
 
-    cuvsError_t cagraSearch(cuvsResources_t res,
-                            cagraSearchParams* params,
-                            cagraIndex_t index,
-                            DLManagedTensor* queries,
-                            DLManagedTensor* neighbors,
-                            DLManagedTensor* distances)
+    cuvsError_t cuvsCagraSearch(cuvsResources_t res,
+                                cuvsCagraSearchParams* params,
+                                cuvsCagraIndex_t index,
+                                DLManagedTensor* queries,
+                                DLManagedTensor* neighbors,
+                                DLManagedTensor* distances) except +
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
index f1c0c9af5..bf17fe6a5 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -22,11 +22,13 @@ cimport cuvs.common.cydlpack
 from cuvs.common.temp_raft import auto_sync_resources
 
 from cython.operator cimport dereference as deref
+from libcpp cimport bool, cast
 
 from cuvs.common cimport cydlpack
 
 from pylibraft.common import (
     DeviceResources,
+    Stream,
     auto_convert_output,
     cai_wrapper,
     device_ndarray,
@@ -43,9 +45,12 @@ from libc.stdint cimport (
     uint64_t,
     uintptr_t,
 )
-from pylibraft.common.handle cimport device_resources
 
-from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
+from cuvs.common.c_api cimport (
+    cuvsError_t,
+    cuvsResources_t,
+    cuvsResourcesCreate,
+)
 
 
 cdef class IndexParams:
@@ -70,7 +75,7 @@ cdef class IndexParams:
               building the knn graph. It is expected to be generally
               faster than ivf_pq.
     """
-    cdef cagraIndexParams params
+    cdef cuvsCagraIndexParams* params
 
     def __init__(self, *,
                  metric="sqeuclidean",
@@ -78,6 +83,9 @@ cdef class IndexParams:
                  graph_degree=64,
                  build_algo="ivf_pq",
                  nn_descent_niter=20):
+
+        cuvsCagraIndexParamsCreate(&self.params)
+
         # todo (dgd): enable once other metrics are present
         # and exposed in cuVS C API
         # self.params.metric = _get_metric(metric)
@@ -85,9 +93,9 @@ cdef class IndexParams:
         self.params.intermediate_graph_degree = intermediate_graph_degree
         self.params.graph_degree = graph_degree
         if build_algo == "ivf_pq":
-            self.params.build_algo = cagraGraphBuildAlgo.IVF_PQ
+            self.params.build_algo = cuvsCagraGraphBuildAlgo.IVF_PQ
         elif build_algo == "nn_descent":
-            self.params.build_algo = cagraGraphBuildAlgo.NN_DESCENT
+            self.params.build_algo = cuvsCagraGraphBuildAlgo.NN_DESCENT
         self.params.nn_descent_niter = nn_descent_niter
 
     # @property
@@ -112,22 +120,27 @@ cdef class IndexParams:
 
 
 cdef class Index:
-    cdef cagraIndex_t index
+    cdef cuvsCagraIndex_t index
+    cdef bool trained
 
     def __cinit__(self):
         cdef cuvsError_t index_create_status
-        index_create_status = cagraIndexCreate(&self.index)
+        index_create_status = cuvsCagraIndexCreate(&self.index)
         self.trained = False
 
         if index_create_status == cuvsError_t.CUVS_ERROR:
-            raise Exception("FAIL")
+            raise RuntimeError("Failed to create index.")
 
     def __dealloc__(self):
         cdef cuvsError_t index_destroy_status
         if self.index is not NULL:
-            index_destroy_status = cagraIndexDestroy(self.index)
+            index_destroy_status = cuvsCagraIndexDestroy(self.index)
             if index_destroy_status == cuvsError_t.CUVS_ERROR:
-                raise Exception("FAIL")
+                raise Exception("Failed to deallocate index.")
+
+    @property
+    def trained(self):
+        return self.trained
 
     def __repr__(self):
         # todo(dgd): update repr as we expose data through C API
@@ -165,19 +178,18 @@ def build_index(IndexParams index_params, dataset, resources=None):
     --------
 
     >>> import cupy as cp
-    >>> from pylibraft.neighbors import cagra
+    >>> from cuvs.neighbors import cagra
     >>> n_samples = 50000
     >>> n_features = 50
     >>> n_queries = 1000
     >>> k = 10
     >>> dataset = cp.random.random_sample((n_samples, n_features),
     ...                                   dtype=cp.float32)
-    >>> handle = DeviceResources()
     >>> build_params = cagra.IndexParams(metric="sqeuclidean")
     >>> index = cagra.build_index(build_params, dataset)
     >>> distances, neighbors = cagra.search(cagra.SearchParams(),
     ...                                      index, dataset,
-    ...                                      k, handle=handle)
+    ...                                      k)
     >>> distances = cp.asarray(distances)
     >>> neighbors = cp.asarray(neighbors)
     """
@@ -188,22 +200,24 @@ def build_index(IndexParams index_params, dataset, resources=None):
     _check_input_array(dataset_ai, [np.dtype('float32'), np.dtype('byte'),
                                     np.dtype('ubyte')])
 
-    if resources is None:
-        resources = DeviceResources()
-    cdef cuvsResources_t* resources_ = \
-        <cuvsResources_t*><size_t>resources.getHandle()
+    cdef cuvsResources_t res_
+    cdef cuvsError_t cstat
+
+    cstat = cuvsResourcesCreate(&res_)
+    if cstat == cuvsError_t.CUVS_ERROR:
+        raise RuntimeError("Error creating Device Reources.")
 
     cdef Index idx = Index()
     cdef cuvsError_t build_status
-    cdef cydlpack.DLManagedTensor dataset_dlpack = \
+    cdef cydlpack.DLManagedTensor* dataset_dlpack = \
         cydlpack.dlpack_c(dataset_ai)
-    cdef cagraIndexParams* params = &index_params.params
+    cdef cuvsCagraIndexParams* params = index_params.params
 
     with cuda_interruptible():
-        build_status = cagraBuild(
-            deref(resources_),
+        build_status = cuvsCagraBuild(
+            res_,
             params,
-            &dataset_dlpack,
+            dataset_dlpack,
             idx.index
         )
 
@@ -264,7 +278,7 @@ cdef class SearchParams:
     rand_xor_mask: int, default = 0x128394
         Bit mask used for initial random seed node selection.
     """
-    cdef cagraSearchParams params
+    cdef cuvsCagraSearchParams params
 
     def __init__(self, *,
                  max_queries=0,
@@ -284,13 +298,13 @@ cdef class SearchParams:
         self.params.itopk_size = itopk_size
         self.params.max_iterations = max_iterations
         if algo == "single_cta":
-            self.params.algo = cagraSearchAlgo.SINGLE_CTA
+            self.params.algo = cuvsCagraSearchAlgo.SINGLE_CTA
         elif algo == "multi_cta":
-            self.params.algo = cagraSearchAlgo.MULTI_CTA
+            self.params.algo = cuvsCagraSearchAlgo.MULTI_CTA
         elif algo == "multi_kernel":
-            self.params.algo = cagraSearchAlgo.MULTI_KERNEL
+            self.params.algo = cuvsCagraSearchAlgo.MULTI_KERNEL
         elif algo == "auto":
-            self.params.algo = cagraSearchAlgo.AUTO
+            self.params.algo = cuvsCagraSearchAlgo.AUTO
         else:
             raise ValueError("`algo` value not supported.")
 
@@ -299,11 +313,11 @@ cdef class SearchParams:
         self.params.min_iterations = min_iterations
         self.params.thread_block_size = thread_block_size
         if hashmap_mode == "hash":
-            self.params.hashmap_mode = cagraHashMode.HASH
+            self.params.hashmap_mode = cuvsCagraHashMode.HASH
         elif hashmap_mode == "small":
-            self.params.hashmap_mode = cagraHashMode.SMALL
+            self.params.hashmap_mode = cuvsCagraHashMode.SMALL
         elif hashmap_mode == "auto":
-            self.params.hashmap_mode = cagraHashMode.AUTO_HASH
+            self.params.hashmap_mode = cuvsCagraHashMode.AUTO_HASH
         else:
             raise ValueError("`hashmap_mode` value not supported.")
 
@@ -407,16 +421,14 @@ def search(SearchParams search_params,
     Examples
     --------
     >>> import cupy as cp
-    >>> from pylibraft.common import DeviceResources
-    >>> from pylibraft.neighbors import cagra
+    >>> from cuvs.neighbors import cagra
     >>> n_samples = 50000
     >>> n_features = 50
     >>> n_queries = 1000
     >>> dataset = cp.random.random_sample((n_samples, n_features),
     ...                                   dtype=cp.float32)
     >>> # Build index
-    >>> handle = DeviceResources()
-    >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle)
+    >>> index = cagra.build_index(cagra.IndexParams(), dataset)
     >>> # Search using the built index
     >>> queries = cp.random.random_sample((n_queries, n_features),
     ...                                   dtype=cp.float32)
@@ -429,62 +441,62 @@ def search(SearchParams search_params,
     >>> # creation during search. This is useful if multiple searches
     >>> # are performad with same query size.
     >>> distances, neighbors = cagra.search(search_params, index, queries,
-    ...                                     k, handle=handle)
-    >>> # pylibraft functions are often asynchronous so the
-    >>> # handle needs to be explicitly synchronized
-    >>> handle.sync()
+    ...                                     k)
     >>> neighbors = cp.asarray(neighbors)
     >>> distances = cp.asarray(distances)
     """
     if not index.trained:
-        raise ValueError("Index need to be built before calling search.")
+        raise ValueError("Index needs to be built before calling search.")
 
-    if resources is None:
-        resources = DeviceResources()
-    cdef device_resources* resources_ = \
-        <device_resources*><size_t>resources.getHandle()
+    cdef cuvsResources_t res_
+    cdef cuvsError_t cstat
+
+    cstat = cuvsResourcesCreate(&res_)
+    if cstat == cuvsError_t.CUVS_ERROR:
+        raise RuntimeError("Error creating Device Reources.")
 
     # todo(dgd): we can make the check of dtype a parameter of wrap_array
     # in RAFT to make this a single call
-    queries_cai = cai_wrapper(queries)
+    queries_cai = wrap_array(queries)
     _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
-                                     np.dtype('ubyte')],
-                       exp_cols=index.dim)
+                                     np.dtype('ubyte')])
 
     cdef uint32_t n_queries = queries_cai.shape[0]
 
     if neighbors is None:
         neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')
 
-    neighbors_cai = cai_wrapper(neighbors)
+    neighbors_cai = wrap_array(neighbors)
     _check_input_array(neighbors_cai, [np.dtype('uint32')],
                        exp_rows=n_queries, exp_cols=k)
 
     if distances is None:
         distances = device_ndarray.empty((n_queries, k), dtype='float32')
 
-    distances_cai = cai_wrapper(distances)
+    distances_cai = wrap_array(distances)
     _check_input_array(distances_cai, [np.dtype('float32')],
                        exp_rows=n_queries, exp_cols=k)
 
-    cdef cagraSearchParams* params = &search_params.params
-    cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
-    cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
-    cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
+    cdef cuvsCagraSearchParams* params = &search_params.params
+    cdef cuvsError_t search_status
+    cdef cydlpack.DLManagedTensor* queries_dlpack = \
+        cydlpack.dlpack_c(queries_cai)
+    cdef cydlpack.DLManagedTensor* neighbors_dlpack = \
+        cydlpack.dlpack_c(neighbors_cai)
+    cdef cydlpack.DLManagedTensor* distances_dlpack = \
+        cydlpack.dlpack_c(distances_cai)
 
     with cuda_interruptible():
-        cagraSearch(
-            deref(resources_),
+        search_status = cuvsCagraSearch(
+            res_,
             params,
             index.index,
-            &queries_dlpack,
-            &neighbors_dlpack,
-            &distances_dlpack
+            queries_dlpack,
+            neighbors_dlpack,
+            distances_dlpack
         )
 
-        if build_status == cuvsError_t.CUVS_ERROR:
-            raise RuntimeError("Index failed to build.")
-        else:
-            idx.trained = True
+        if search_status == cuvsError_t.CUVS_ERROR:
+            raise RuntimeError("Search failed.")
 
     return (distances, neighbors)
diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py
index 78fbc5828..6074eee3a 100644
--- a/python/cuvs/cuvs/test/test_cagra.py
+++ b/python/cuvs/cuvs/test/test_cagra.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -56,8 +56,6 @@ def run_cagra_build_search_test(
     else:
         index = cagra.build_index(build_params, dataset)
 
-    assert index.trained
-
     if not add_data_on_build:
         dataset_1 = dataset[: n_rows // 2, :]
         dataset_2 = dataset[n_rows // 2 :, :]
diff --git a/python/cuvs/cuvs/test/test_doctests.py b/python/cuvs/cuvs/test/test_doctests.py
index 331b0f7f3..6d56ffaa2 100644
--- a/python/cuvs/cuvs/test/test_doctests.py
+++ b/python/cuvs/cuvs/test/test_doctests.py
@@ -20,11 +20,7 @@
 
 import pytest
 
-import pylibraft.cluster
-import pylibraft.distance
-import pylibraft.matrix
-import pylibraft.neighbors
-import pylibraft.random
+import cuvs.neighbors
 
 # Code adapted from https://github.com/rapidsai/cudf/blob/branch-23.02/python/cudf/cudf/tests/test_doctests.py  # noqa
 
@@ -92,17 +88,8 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None):
 # since the root pylibraft module doesn't import submodules (or define an
 # __all__) we are explicitly adding all the submodules we want to run
 # doctests for here
-DOC_STRINGS = list(_find_doctests_in_obj(pylibraft.cluster))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.common))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.distance))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.matrix.select_k))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.brute_force))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.cagra))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_flat))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_pq))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.refine))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.random))
+DOC_STRINGS = list(_find_doctests_in_obj(cuvs.neighbors))
+DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.neighbors.cagra))
 
 
 @pytest.mark.parametrize(
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index cba8d4adf..1f9bc29d4 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ requires = [
     "setuptools",
     "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-build-backend = "setuptools.build_meta"
+build-backend = "scikit_build_core.build"
 
 [project]
 name = "cuvs"
@@ -109,3 +109,16 @@ skip = [
     "dist",
     "__init__.py",
 ]
+
+[tool.scikit-build]
+build-dir = "build/{wheel_tag}"
+cmake.build-type = "Release"
+cmake.minimum-version = "3.26.4"
+ninja.make-fallback = true
+sdist.reproducible = true
+wheel.packages = ["cuvs"]
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "cuvs/VERSION"
+regex = "(?P<value>.*)"
diff --git a/python/cuvs/setup.cfg b/python/cuvs/setup.cfg
index 3574b4416..57b4954bc 100644
--- a/python/cuvs/setup.cfg
+++ b/python/cuvs/setup.cfg
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
 [isort]
 line_length=79
@@ -12,6 +12,7 @@ known_dask=
     distributed
     dask_cuda
 known_rapids=
+    cuvs
     nvtext
     cudf
     cuml
diff --git a/python/cuvs/setup.py b/python/cuvs/setup.py
deleted file mode 100644
index 4e825dab2..000000000
--- a/python/cuvs/setup.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from setuptools import find_packages
-from skbuild import setup
-
-
-def exclude_libcxx_symlink(cmake_manifest):
-    return list(
-        filter(
-            lambda name: not ("include/rapids/libcxx/include" in name),
-            cmake_manifest,
-        )
-    )
-
-
-packages = find_packages(include=["cuvs*"])
-setup(
-    # Don't want libcxx getting pulled into wheel builds.
-    cmake_process_manifest_hook=exclude_libcxx_symlink,
-    packages=packages,
-    package_data={key: ["VERSION", "*.pxd"] for key in packages},
-    zip_safe=False,
-)