From de8525706f7cfe04b183169bd20f62d909c77bef Mon Sep 17 00:00:00 2001
From: William Hicks <whicks@nvidia.com>
Date: Thu, 13 Feb 2025 09:03:15 -0800
Subject: [PATCH 1/6] Correct computation of Laplacian in spectral partitioning

In the previous implementation of spectral partitioning, the eigenvalue computation was actually performed on the original adjacency matrix, not the graph Laplacian thereof. This change provides a utility for computing the graph Laplacian, which is then fed into the new implementation of the Lanczos solver. An additional test is provided to ensure that the end-to-end spectral partitioning quality is as expected.
---
 cpp/include/raft/core/sparse_types.hpp        |   2 +-
 .../raft/spectral/detail/matrix_wrappers.hpp  |  16 +++
 .../raft/spectral/detail/partition.hpp        |   8 +-
 cpp/include/raft/spectral/eigen_solvers.cuh   |  53 ++++++--
 cpp/include/raft/spectral/laplacian.cuh       | 122 ++++++++++++++++++
 cpp/tests/linalg/eigen_solvers.cu             |  84 ++++++++++++
 cpp/tests/sparse/spectral_matrix.cu           |  58 +++++++++
 7 files changed, 325 insertions(+), 18 deletions(-)
 create mode 100644 cpp/include/raft/spectral/laplacian.cuh
diff --git a/cpp/include/raft/core/sparse_types.hpp b/cpp/include/raft/core/sparse_types.hpp
index 6e5092f50f..8f3357c553 100644
--- a/cpp/include/raft/core/sparse_types.hpp
+++ b/cpp/include/raft/core/sparse_types.hpp
@@ -168,7 +168,7 @@ class sparse_matrix {
                 row_type n_rows,
                 col_type n_cols,
                 nnz_type nnz = 0) noexcept(std::is_nothrow_default_constructible_v<container_type>)
-    : structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, 0)} {};
+    : structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, nnz)} {};
 
   // Constructor that owns the data but not the structure
   // This constructor is only callable with a `structure_type == *_structure_view`
diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
index af4a838bd5..417db8648d 100644
--- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
+++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
@@ -15,6 +15,8 @@
  */
 #pragma once
 
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/device_span.hpp>
 #include <raft/core/resource/cublas_handle.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/resource/cusparse_handle.hpp>
@@ -33,6 +35,7 @@
 #include <thrust/system/cuda/execution_policy.h>
 
 #include <algorithm>
+#include <cstddef>
 
 // =========================================================
 // Useful macros
@@ -181,6 +184,19 @@ struct sparse_matrix_t {
   {
   }
 
+  auto to_csr_matrix_view() const
+  {
+    // The usage of sparse_matrix_t prior to introduction of this method
+    // assumed that all data was strictly on device. We will make the same
+    // assumption for construction of the csr_matrix_view
+    return device_csr_matrix_view<value_type const, index_type const, index_type const, index_type>{
+      device_span<value_type const>{values_, std::uint64_t(nnz_)},
+      device_compressed_structure_view<index_type const, index_type const, index_type>{
+        device_span<index_type const>{row_offsets_, std::uint64_t(nrows_ + 1)},
+        device_span<index_type const>{col_indices_, std::uint64_t(nnz_)},
+        ncols_}};
+  }
+
   virtual ~sparse_matrix_t(void) =
     default;  // virtual because used as base for following matrix types
 
diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp
index 26e4a73f9d..6f2c6b3907 100644
--- a/cpp/include/raft/spectral/detail/partition.hpp
+++ b/cpp/include/raft/spectral/detail/partition.hpp
@@ -21,6 +21,7 @@
 #include <raft/spectral/cluster_solvers.cuh>
 #include <raft/spectral/detail/spectral_util.cuh>
 #include <raft/spectral/eigen_solvers.cuh>
+#include <raft/spectral/laplacian.cuh>
 #include <raft/spectral/matrix_wrappers.hpp>
 
 #include <cuda.h>
@@ -97,14 +98,15 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(
   // Compute eigenvectors of Laplacian
 
   // Initialize Laplacian
-  /// sparse_matrix_t<vertex_t, weight_t> A{handle, graph};
-  spectral::matrix::laplacian_matrix_t<vertex_t, weight_t, nnz_t> L{handle, csr_m};
+  auto laplacian =
+    raft::spectral::matrix::compute_graph_laplacian(handle, csr_m.to_csr_matrix_view());
 
   auto eigen_config = eigen_solver.get_config();
   auto nEigVecs     = eigen_config.n_eigVecs;
 
   // Compute smallest eigenvalues and eigenvectors
-  std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, L, eigVals, eigVecs);
+  std::get<0>(stats) =
+    eigen_solver.solve_smallest_eigenvectors(handle, laplacian.view(), eigVals, eigVecs);
 
   // Whiten eigenvector matrix
   transform_eigen_matrix(handle, n, nEigVecs, eigVecs);
diff --git a/cpp/include/raft/spectral/eigen_solvers.cuh b/cpp/include/raft/spectral/eigen_solvers.cuh
index 03448e2b5e..0d511a2333 100644
--- a/cpp/include/raft/spectral/eigen_solvers.cuh
+++ b/cpp/include/raft/spectral/eigen_solvers.cuh
@@ -18,6 +18,7 @@
 
 #pragma once
 
+#include <raft/core/mdspan.hpp>
 #include <raft/sparse/solver/lanczos.cuh>
 #include <raft/spectral/matrix_wrappers.hpp>
 
@@ -49,28 +50,52 @@ struct lanczos_solver_t {
   {
   }
 
-  index_type_t solve_smallest_eigenvectors(
+  auto solve_smallest_eigenvectors(
     raft::resources const& handle,
     matrix::sparse_matrix_t<index_type_t, value_type_t, size_type_t> const& A,
     value_type_t* __restrict__ eigVals,
     value_type_t* __restrict__ eigVecs) const
+  {
+    auto csr_structure =
+      raft::make_device_compressed_structure_view<index_type_t, index_type_t, index_type_t>(
+        const_cast<index_type_t*>(A.row_offsets_),
+        const_cast<index_type_t*>(A.col_indices_),
+        A.nrows_,
+        A.ncols_,
+        A.nnz_);
+
+    auto csr_matrix =
+      raft::make_device_csr_matrix_view<value_type_t, index_type_t, index_type_t, index_type_t>(
+        const_cast<value_type_t*>(A.values_), csr_structure);
+    return solve_smallest_eigenvectors(handle, csr_matrix, eigVals, eigVecs);
+  }
+
+  auto solve_smallest_eigenvectors(
+    raft::resources const& handle,
+    device_csr_matrix_view<value_type_t, index_type_t, index_type_t, index_type_t> input,
+    value_type_t* __restrict__ eigVals,
+    value_type_t* __restrict__ eigVecs) const
   {
     RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
     RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
-    index_type_t iters{};
-    sparse::solver::computeSmallestEigenvectors(handle,
-                                                A,
-                                                config_.n_eigVecs,
-                                                config_.maxIter,
-                                                config_.restartIter,
-                                                config_.tol,
-                                                config_.reorthogonalize,
-                                                iters,
-                                                eigVals,
-                                                eigVecs,
-                                                config_.seed);
 
-    return iters;
+    auto lanczos_config = raft::sparse::solver::lanczos_solver_config<value_type_t>{
+      config_.n_eigVecs, config_.maxIter, config_.restartIter, config_.tol, config_.seed};
+    auto v0_opt = std::optional<raft::device_vector_view<value_type_t, uint32_t, raft::row_major>>{
+      std::nullopt};
+    auto input_structure = input.structure_view();
+
+    auto solver_iterations = sparse::solver::lanczos_compute_smallest_eigenvectors(
+      handle,
+      lanczos_config,
+      input,
+      v0_opt,
+      raft::make_device_vector_view<value_type_t, uint32_t, raft::col_major>(eigVals,
+                                                                             config_.n_eigVecs),
+      raft::make_device_matrix_view<value_type_t, uint32_t, raft::col_major>(
+        eigVecs, input_structure.get_n_rows(), config_.n_eigVecs));
+
+    return index_type_t{solver_iterations};
   }
 
   index_type_t solve_largest_eigenvectors(
diff --git a/cpp/include/raft/spectral/laplacian.cuh b/cpp/include/raft/spectral/laplacian.cuh
new file mode 100644
index 0000000000..1d937d69a6
--- /dev/null
+++ b/cpp/include/raft/spectral/laplacian.cuh
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/resources.hpp>
+
+#include <type_traits>
+
+namespace raft {
+namespace spectral {
+namespace matrix {
+namespace detail {
+
+/* Compute the graph Laplacian of an adjacency matrix
+ *
+ * This kernel implements the necessary logic for computing a graph
+ * Laplacian for an adjacency matrix in CSR format. A custom kernel is
+ * required because cusparse does not conveniently implement matrix subtraction with 64-bit
+ * indices. The custom kernel also allows the computation to be completed
+ * with no extra allocations or compute.
+ */
+template <typename ElementType, typename IndptrType, typename IndicesType>
+RAFT_KERNEL compute_graph_laplacian_kernel(ElementType* output_values,
+                                           IndicesType* output_indices,
+                                           IndptrType* output_indptr,
+                                           IndptrType dim,
+                                           ElementType const* adj_values,
+                                           IndicesType const* adj_indices,
+                                           IndptrType const* adj_indptr)
+{
+  /* The graph Laplacian L of an adjacency matrix A is given by:
+   * L = D - A
+   * where D is the degree matrix of A. The degree matrix is itself defined
+   * as the sum of each row of A and represents the degree of the node
+   * indicated by the index of the row. */
+
+  for (auto row = threadIdx.x + blockIdx.x * blockDim.x; row < dim; row += blockDim.x * gridDim.x) {
+    auto row_begin = adj_indptr[row];
+    auto row_end   = adj_indptr[row + 1];
+    // All output indexes will need to be offset by the row, since every row will
+    // gain exactly one new non-zero element. degree_output_index is the index
+    // where we will store the degree of each row
+    auto degree_output_index = row_begin + row;
+    auto degree_value        = ElementType{};
+    // value_index indicates the index of the current value in the original
+    // adjacency matrix
+    for (auto value_index = row_begin; value_index < row_end; ++value_index) {
+      auto col_index         = adj_indices[value_index];
+      auto is_lower_diagonal = col_index < row;
+      auto output_index      = value_index + row + !is_lower_diagonal;
+      auto input_value       = adj_values[value_index];
+      degree_value += input_value;
+      output_values[output_index]  = ElementType{-1} * input_value;
+      output_indices[output_index] = col_index;
+      // Increment the index where we will store the degree for every non-zero
+      // element before we reach the diagonal
+      degree_output_index += is_lower_diagonal;
+    }
+    output_values[degree_output_index]  = degree_value;
+    output_indices[degree_output_index] = row;
+    output_indptr[row]                  = row_begin + row;
+    output_indptr[row + 1]              = row_end + row + 1;
+  }
+}
+
+}  // namespace detail
+
+/** Given a CSR adjacency matrix, return the graph Laplacian
+ *
+ * Note that for non-symmetric matrices, the out-degree Laplacian is returned.
+ */
+template <typename ElementType, typename IndptrType, typename IndicesType, typename NZType>
+auto compute_graph_laplacian(
+  raft::resources const& res,
+  device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType> input)
+{
+  auto input_structure = input.structure_view();
+  auto dim             = input_structure.get_n_rows();
+  RAFT_EXPECTS(dim == input_structure.get_n_cols(),
+               "The graph Laplacian can only be computed on a square adjacency matrix");
+  auto result = make_device_csr_matrix<std::remove_const_t<ElementType>,
+                                       std::remove_const_t<IndptrType>,
+                                       std::remove_const_t<IndicesType>,
+                                       std::remove_const_t<NZType>>(
+    res,
+    dim,
+    dim,
+    /* The nnz for the result will be the dimension of the (square) input matrix plus the number of
+     * non-zero elements in the original matrix, since we introduce non-zero elements along the
+     * diagonal to represent the degree of each node. */
+    input_structure.get_nnz() + dim);
+  auto result_structure                         = result.structure_view();
+  auto static constexpr const threads_per_block = 256;
+  auto blocks = std::min(int((dim + threads_per_block - 1) / threads_per_block), 65535);
+  auto stream = resource::get_cuda_stream(res);
+  detail::compute_graph_laplacian_kernel<<<threads_per_block, blocks, 0, stream>>>(
+    result.get_elements().data(),
+    result_structure.get_indices().data(),
+    result_structure.get_indptr().data(),
+    dim,
+    input.get_elements().data(),
+    input_structure.get_indices().data(),
+    input_structure.get_indptr().data());
+  return result;
+}
+
+}  // namespace matrix
+}  // namespace spectral
+}  // namespace raft
diff --git a/cpp/tests/linalg/eigen_solvers.cu b/cpp/tests/linalg/eigen_solvers.cu
index 250deb6f33..f43081690c 100644
--- a/cpp/tests/linalg/eigen_solvers.cu
+++ b/cpp/tests/linalg/eigen_solvers.cu
@@ -14,12 +14,16 @@
  * limitations under the License.
  */
 
+#include <raft/core/handle.hpp>
 #include <raft/core/nvtx.hpp>
 #include <raft/core/resource/device_id.hpp>
 #include <raft/core/resources.hpp>
 #include <raft/spectral/eigen_solvers.cuh>
+#include <raft/spectral/modularity_maximization.cuh>
 #include <raft/spectral/partition.cuh>
 
+#include <thrust/device_vector.h>
+
 #include <gtest/gtest.h>
 
 #include <cstddef>
@@ -117,5 +121,85 @@ TEST(Raft, SpectralSolvers)
   EXPECT_ANY_THROW(spectral::analyzePartition(h, sm, k, clusters, edgeCut, cost));
 }
 
+TEST(Raft, SpectralPartition)
+{
+  auto offsets = thrust::device_vector<int>(std::vector<int>{
+    0,  16, 25, 35, 41, 44, 48, 52,  56,  61,  63,  66,  67,  69,  74,  76,  78, 80,
+    82, 84, 87, 89, 91, 93, 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156});
+  auto indices = thrust::device_vector<int>(std::vector<int>{
+    1,  2,  3,  4,  5,  6,  7,  8,  10, 11, 12, 13, 17, 19, 21, 31, 0,  2,  3,  7,  13, 17, 19,
+    21, 30, 0,  1,  3,  7,  8,  9,  13, 27, 28, 32, 0,  1,  2,  7,  12, 13, 0,  6,  10, 0,  6,
+    10, 16, 0,  4,  5,  16, 0,  1,  2,  3,  0,  2,  30, 32, 33, 2,  33, 0,  4,  5,  0,  0,  3,
+    0,  1,  2,  3,  33, 32, 33, 32, 33, 5,  6,  0,  1,  32, 33, 0,  1,  33, 32, 33, 0,  1,  32,
+    33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2,  23, 24, 33, 2,  31, 33, 23, 26,
+    32, 33, 1,  8,  32, 33, 0,  24, 25, 28, 32, 33, 2,  8,  14, 15, 18, 20, 22, 23, 29, 30, 31,
+    33, 8,  9,  13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32});
+  auto values  = thrust::device_vector<float>(std::vector<float>{
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0});
+
+  auto num_verts = int(offsets.size() - 1);
+  auto num_edges = indices.size();
+
+  auto result_v = thrust::device_vector<int>(std::vector<int>(num_verts, -1));
+
+  auto constexpr const n_clusters  = 8;
+  auto constexpr const n_eig_vects = std::uint64_t{8};
+
+  auto constexpr const evs_tolerance   = .00001f;
+  auto constexpr const kmean_tolerance = .00001f;
+  auto constexpr const evs_max_iter    = 100;
+  auto constexpr const kmean_max_iter  = 100;
+
+  auto eig_vals  = thrust::device_vector<float>(n_eig_vects);
+  auto eig_vects = thrust::device_vector<float>(n_eig_vects * num_verts);
+
+  auto handle = raft::handle_t{};
+
+  auto restartIter_lanczos = int{15 + n_eig_vects};
+
+  auto seed_eig_solver     = std::uint64_t{1234567};
+  auto seed_cluster_solver = std::uint64_t{12345678};
+
+  auto const adj_matrix = raft::spectral::matrix::sparse_matrix_t<int, float>{handle,
+                                                                              offsets.data().get(),
+                                                                              indices.data().get(),
+                                                                              values.data().get(),
+                                                                              num_verts,
+                                                                              num_verts,
+                                                                              num_edges};
+
+  auto eig_cfg = raft::spectral::eigen_solver_config_t<int, float>{
+    n_eig_vects, evs_max_iter, restartIter_lanczos, evs_tolerance, false, seed_eig_solver};
+  auto eigen_solver = raft::spectral::lanczos_solver_t<int, float>{eig_cfg};
+
+  auto clust_cfg = raft::spectral::cluster_solver_config_t<int, float>{
+    n_clusters, kmean_max_iter, kmean_tolerance, seed_cluster_solver};
+  auto cluster_solver = raft::spectral::kmeans_solver_t<int, float>{clust_cfg};
+
+  partition(handle,
+            adj_matrix,
+            eigen_solver,
+            cluster_solver,
+            result_v.data().get(),
+            eig_vals.data().get(),
+            eig_vects.data().get());
+
+  auto edge_cut = float{};
+  auto cost     = float{};
+
+  raft::spectral::analyzePartition(
+    handle, adj_matrix, n_clusters, result_v.data().get(), edge_cut, cost);
+
+  ASSERT_LT(edge_cut, 55.0);
+}
+
 }  // namespace spectral
 }  // namespace raft
diff --git a/cpp/tests/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu
index 6d52dfb1bb..e3ce3e978a 100644
--- a/cpp/tests/sparse/spectral_matrix.cu
+++ b/cpp/tests/sparse/spectral_matrix.cu
@@ -17,7 +17,9 @@
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/resource/device_id.hpp>
 #include <raft/core/resources.hpp>
+#include <raft/spectral/laplacian.cuh>
 #include <raft/spectral/matrix_wrappers.hpp>
+#include <raft/util/cudart_utils.hpp>
 
 #include <gtest/gtest.h>
 
@@ -84,6 +86,62 @@ TEST(Raft, SpectralMatrices)
   EXPECT_ANY_THROW(cnstr_mm2());  // because of nullptr ptr args
 }
 
+TEST(Raft, ComputeGraphLaplacian)
+{
+  // The following adjacency matrix will be used to allow for manual
+  // verification of results:
+  // [[0 1 1 1]
+  //  [1 0 0 1]
+  //  [1 0 0 0]
+  //  [1 1 0 0]]
+
+  auto data    = std::vector<float>{1, 1, 1, 1, 1, 1, 1, 1};
+  auto indices = std::vector<int>{1, 2, 3, 0, 3, 0, 0, 1};
+  auto indptr  = std::vector<int>{0, 3, 5, 6, 8};
+
+  auto res = raft::resources{};
+  auto adjacency_matrix =
+    make_device_csr_matrix<float>(res, int(indptr.size() - 1), int(indptr.size() - 1), data.size());
+  auto adjacency_structure = adjacency_matrix.structure_view();
+  raft::copy(adjacency_matrix.get_elements().data(),
+             &(data[0]),
+             data.size(),
+             raft::resource::get_cuda_stream(res));
+  raft::copy(adjacency_structure.get_indices().data(),
+             &(indices[0]),
+             indices.size(),
+             raft::resource::get_cuda_stream(res));
+  raft::copy(adjacency_structure.get_indptr().data(),
+             &(indptr[0]),
+             indptr.size(),
+             raft::resource::get_cuda_stream(res));
+  auto laplacian           = compute_graph_laplacian(res, adjacency_matrix.view());
+  auto laplacian_structure = laplacian.structure_view();
+  auto laplacian_data      = std::vector<float>(laplacian_structure.get_nnz());
+  auto laplacian_indices   = std::vector<int>(laplacian_structure.get_nnz());
+  auto laplacian_indptr    = std::vector<int>(laplacian_structure.get_n_rows() + 1);
+  raft::copy(&(laplacian_data[0]),
+             laplacian.get_elements().data(),
+             laplacian_structure.get_nnz(),
+             raft::resource::get_cuda_stream(res));
+  raft::copy(&(laplacian_indices[0]),
+             laplacian_structure.get_indices().data(),
+             laplacian_structure.get_nnz(),
+             raft::resource::get_cuda_stream(res));
+  raft::copy(&(laplacian_indptr[0]),
+             laplacian_structure.get_indptr().data(),
+             laplacian_structure.get_n_rows() + 1,
+             raft::resource::get_cuda_stream(res));
+  auto expected_data    = std::vector<float>{3, -1, -1, -1, -1, 2, -1, -1, 1, -1, -1, 2};
+  auto expected_indices = std::vector<int>{0, 1, 2, 3, 0, 1, 3, 0, 2, 0, 1, 3};
+  auto expected_indptr  = std::vector<int>{0, 4, 7, 9, 12};
+  raft::resource::sync_stream(res);
+
+  EXPECT_EQ(expected_data, laplacian_data);
+  EXPECT_EQ(expected_indices, laplacian_indices);
+  EXPECT_EQ(expected_indptr, laplacian_indptr);
+}
+
 }  // namespace matrix
 }  // namespace spectral
 }  // namespace raft

From 08e9f0ff132d0b43bc875c1c08668ed7b06480c4 Mon Sep 17 00:00:00 2001
From: William Hicks <whicks@nvidia.com>
Date: Thu, 13 Feb 2025 13:54:57 -0800
Subject: [PATCH 2/6] Update namespace for compute_graph_laplacian

---
 .../{spectral => sparse/linalg}/laplacian.cuh |  8 +-
 .../raft/spectral/detail/partition.hpp        |  4 +-
 cpp/tests/CMakeLists.txt                      |  1 +
 cpp/tests/sparse/laplacian.cu                 | 89 +++++++++++++++++++
 cpp/tests/sparse/spectral_matrix.cu           | 58 +-----------
 5 files changed, 97 insertions(+), 63 deletions(-)
 rename cpp/include/raft/{spectral => sparse/linalg}/laplacian.cuh (98%)
 create mode 100644 cpp/tests/sparse/laplacian.cu

diff --git a/cpp/include/raft/spectral/laplacian.cuh b/cpp/include/raft/sparse/linalg/laplacian.cuh
similarity index 98%
rename from cpp/include/raft/spectral/laplacian.cuh
rename to cpp/include/raft/sparse/linalg/laplacian.cuh
index 1d937d69a6..7c2449c937 100644
--- a/cpp/include/raft/spectral/laplacian.cuh
+++ b/cpp/include/raft/sparse/linalg/laplacian.cuh
@@ -20,8 +20,8 @@
 #include <type_traits>
 
 namespace raft {
-namespace spectral {
-namespace matrix {
+namespace sparse {
+namespace linalg {
 namespace detail {
 
 /* Compute the graph Laplacian of an adjacency matrix
@@ -117,6 +117,6 @@ auto compute_graph_laplacian(
   return result;
 }
 
-}  // namespace matrix
-}  // namespace spectral
+}  // namespace linalg
+}  // namespace sparse
 }  // namespace raft
diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp
index 6f2c6b3907..5b0cb08985 100644
--- a/cpp/include/raft/spectral/detail/partition.hpp
+++ b/cpp/include/raft/spectral/detail/partition.hpp
@@ -21,7 +21,7 @@
 #include <raft/spectral/cluster_solvers.cuh>
 #include <raft/spectral/detail/spectral_util.cuh>
 #include <raft/spectral/eigen_solvers.cuh>
-#include <raft/spectral/laplacian.cuh>
+#include <raft/sparse/linalg/laplacian.cuh>
 #include <raft/spectral/matrix_wrappers.hpp>
 
 #include <cuda.h>
@@ -99,7 +99,7 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(
 
   // Initialize Laplacian
   auto laplacian =
-    raft::spectral::matrix::compute_graph_laplacian(handle, csr_m.to_csr_matrix_view());
+    raft::sparse::linalg::compute_graph_laplacian(handle, csr_m.to_csr_matrix_view());
 
   auto eigen_config = eigen_solver.get_config();
   auto nEigVecs     = eigen_config.n_eigVecs;
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 34bea67fbe..12da3bec13 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -259,6 +259,7 @@ if(BUILD_TESTS)
     sparse/csr_transpose.cu
     sparse/degree.cu
     sparse/filter.cu
+    sparse/laplacian.cu
     sparse/masked_matmul.cu
     sparse/norm.cu
     sparse/normalize.cu
diff --git a/cpp/tests/sparse/laplacian.cu b/cpp/tests/sparse/laplacian.cu
new file mode 100644
index 0000000000..ea1d158f3e
--- /dev/null
+++ b/cpp/tests/sparse/laplacian.cu
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/sparse/linalg/laplacian.cuh>
+#include <raft/util/cudart_utils.hpp>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace raft {
+namespace sparse {
+namespace linalg {
+
+TEST(Raft, ComputeGraphLaplacian)
+{
+  // The following adjacency matrix will be used to allow for manual
+  // verification of results:
+  // [[0 1 1 1]
+  //  [1 0 0 1]
+  //  [1 0 0 0]
+  //  [1 1 0 0]]
+
+  auto data    = std::vector<float>{1, 1, 1, 1, 1, 1, 1, 1};
+  auto indices = std::vector<int>{1, 2, 3, 0, 3, 0, 0, 1};
+  auto indptr  = std::vector<int>{0, 3, 5, 6, 8};
+
+  auto res = raft::resources{};
+  auto adjacency_matrix =
+    make_device_csr_matrix<float>(res, int(indptr.size() - 1), int(indptr.size() - 1), data.size());
+  auto adjacency_structure = adjacency_matrix.structure_view();
+  raft::copy(adjacency_matrix.get_elements().data(),
+             &(data[0]),
+             data.size(),
+             raft::resource::get_cuda_stream(res));
+  raft::copy(adjacency_structure.get_indices().data(),
+             &(indices[0]),
+             indices.size(),
+             raft::resource::get_cuda_stream(res));
+  raft::copy(adjacency_structure.get_indptr().data(),
+             &(indptr[0]),
+             indptr.size(),
+             raft::resource::get_cuda_stream(res));
+  auto laplacian           = compute_graph_laplacian(res, adjacency_matrix.view());
+  auto laplacian_structure = laplacian.structure_view();
+  auto laplacian_data      = std::vector<float>(laplacian_structure.get_nnz());
+  auto laplacian_indices   = std::vector<int>(laplacian_structure.get_nnz());
+  auto laplacian_indptr    = std::vector<int>(laplacian_structure.get_n_rows() + 1);
+  raft::copy(&(laplacian_data[0]),
+             laplacian.get_elements().data(),
+             laplacian_structure.get_nnz(),
+             raft::resource::get_cuda_stream(res));
+  raft::copy(&(laplacian_indices[0]),
+             laplacian_structure.get_indices().data(),
+             laplacian_structure.get_nnz(),
+             raft::resource::get_cuda_stream(res));
+  raft::copy(&(laplacian_indptr[0]),
+             laplacian_structure.get_indptr().data(),
+             laplacian_structure.get_n_rows() + 1,
+             raft::resource::get_cuda_stream(res));
+  auto expected_data    = std::vector<float>{3, -1, -1, -1, -1, 2, -1, -1, 1, -1, -1, 2};
+  auto expected_indices = std::vector<int>{0, 1, 2, 3, 0, 1, 3, 0, 2, 0, 1, 3};
+  auto expected_indptr  = std::vector<int>{0, 4, 7, 9, 12};
+  raft::resource::sync_stream(res);
+
+  EXPECT_EQ(expected_data, laplacian_data);
+  EXPECT_EQ(expected_indices, laplacian_indices);
+  EXPECT_EQ(expected_indptr, laplacian_indptr);
+}
+
+}  // namespace linalg
+}  // namespace sparse
+}  // namespace raft
diff --git a/cpp/tests/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu
index e3ce3e978a..962fd25d04 100644
--- a/cpp/tests/sparse/spectral_matrix.cu
+++ b/cpp/tests/sparse/spectral_matrix.cu
@@ -17,7 +17,7 @@
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/resource/device_id.hpp>
 #include <raft/core/resources.hpp>
-#include <raft/spectral/laplacian.cuh>
+#include <raft/sparse/linalg/laplacian.cuh>
 #include <raft/spectral/matrix_wrappers.hpp>
 #include <raft/util/cudart_utils.hpp>
 
@@ -86,62 +86,6 @@ TEST(Raft, SpectralMatrices)
   EXPECT_ANY_THROW(cnstr_mm2());  // because of nullptr ptr args
 }
 
-TEST(Raft, ComputeGraphLaplacian)
-{
-  // The following adjacency matrix will be used to allow for manual
-  // verification of results:
-  // [[0 1 1 1]
-  //  [1 0 0 1]
-  //  [1 0 0 0]
-  //  [1 1 0 0]]
-
-  auto data    = std::vector<float>{1, 1, 1, 1, 1, 1, 1, 1};
-  auto indices = std::vector<int>{1, 2, 3, 0, 3, 0, 0, 1};
-  auto indptr  = std::vector<int>{0, 3, 5, 6, 8};
-
-  auto res = raft::resources{};
-  auto adjacency_matrix =
-    make_device_csr_matrix<float>(res, int(indptr.size() - 1), int(indptr.size() - 1), data.size());
-  auto adjacency_structure = adjacency_matrix.structure_view();
-  raft::copy(adjacency_matrix.get_elements().data(),
-             &(data[0]),
-             data.size(),
-             raft::resource::get_cuda_stream(res));
-  raft::copy(adjacency_structure.get_indices().data(),
-             &(indices[0]),
-             indices.size(),
-             raft::resource::get_cuda_stream(res));
-  raft::copy(adjacency_structure.get_indptr().data(),
-             &(indptr[0]),
-             indptr.size(),
-             raft::resource::get_cuda_stream(res));
-  auto laplacian           = compute_graph_laplacian(res, adjacency_matrix.view());
-  auto laplacian_structure = laplacian.structure_view();
-  auto laplacian_data      = std::vector<float>(laplacian_structure.get_nnz());
-  auto laplacian_indices   = std::vector<int>(laplacian_structure.get_nnz());
-  auto laplacian_indptr    = std::vector<int>(laplacian_structure.get_n_rows() + 1);
-  raft::copy(&(laplacian_data[0]),
-             laplacian.get_elements().data(),
-             laplacian_structure.get_nnz(),
-             raft::resource::get_cuda_stream(res));
-  raft::copy(&(laplacian_indices[0]),
-             laplacian_structure.get_indices().data(),
-             laplacian_structure.get_nnz(),
-             raft::resource::get_cuda_stream(res));
-  raft::copy(&(laplacian_indptr[0]),
-             laplacian_structure.get_indptr().data(),
-             laplacian_structure.get_n_rows() + 1,
-             raft::resource::get_cuda_stream(res));
-  auto expected_data    = std::vector<float>{3, -1, -1, -1, -1, 2, -1, -1, 1, -1, -1, 2};
-  auto expected_indices = std::vector<int>{0, 1, 2, 3, 0, 1, 3, 0, 2, 0, 1, 3};
-  auto expected_indptr  = std::vector<int>{0, 4, 7, 9, 12};
-  raft::resource::sync_stream(res);
-
-  EXPECT_EQ(expected_data, laplacian_data);
-  EXPECT_EQ(expected_indices, laplacian_indices);
-  EXPECT_EQ(expected_indptr, laplacian_indptr);
-}
-
 }  // namespace matrix
 }  // namespace spectral
 }  // namespace raft

From a319a9fc2aa8ee18f47dfee4f47f1dd4f8ef646a Mon Sep 17 00:00:00 2001
From: William Hicks <whicks@nvidia.com>
Date: Thu, 13 Feb 2025 15:59:18 -0800
Subject: [PATCH 3/6] Update style

---
 cpp/include/raft/spectral/detail/partition.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp
index 5b0cb08985..e99996275b 100644
--- a/cpp/include/raft/spectral/detail/partition.hpp
+++ b/cpp/include/raft/spectral/detail/partition.hpp
@@ -18,10 +18,10 @@
 #include <raft/core/resource/cublas_handle.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
+#include <raft/sparse/linalg/laplacian.cuh>
 #include <raft/spectral/cluster_solvers.cuh>
 #include <raft/spectral/detail/spectral_util.cuh>
 #include <raft/spectral/eigen_solvers.cuh>
-#include <raft/sparse/linalg/laplacian.cuh>
 #include <raft/spectral/matrix_wrappers.hpp>
 
 #include <cuda.h>

From 9f8fbce57c4ebaccfcca3147067d459953d8def8 Mon Sep 17 00:00:00 2001
From: William Hicks <whicks@nvidia.com>
Date: Fri, 14 Feb 2025 07:16:48 -0800
Subject: [PATCH 4/6] Move Laplacian kernel to detail header

---
 .../raft/sparse/linalg/detail/laplacian.cuh   | 79 +++++++++++++++++++
 cpp/include/raft/sparse/linalg/laplacian.cuh  | 56 +------------
 2 files changed, 80 insertions(+), 55 deletions(-)
 create mode 100644 cpp/include/raft/sparse/linalg/detail/laplacian.cuh

diff --git a/cpp/include/raft/sparse/linalg/detail/laplacian.cuh b/cpp/include/raft/sparse/linalg/detail/laplacian.cuh
new file mode 100644
index 0000000000..bfbd187846
--- /dev/null
+++ b/cpp/include/raft/sparse/linalg/detail/laplacian.cuh
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include <raft/core/detail/macros.hpp>
+
+namespace raft {
+namespace sparse {
+namespace linalg {
+namespace detail {
+
+/* Compute the graph Laplacian of an adjacency matrix
+ *
+ * This kernel implements the necessary logic for computing a graph
+ * Laplacian for an adjacency matrix in CSR format. A custom kernel is
+ * required because cusparse does not conveniently implement matrix subtraction with 64-bit
+ * indices. The custom kernel also allows the computation to be completed
+ * with no extra allocations or compute.
+ */
+template <typename ElementType, typename IndptrType, typename IndicesType>
+RAFT_KERNEL compute_graph_laplacian_kernel(ElementType* output_values,
+                                           IndicesType* output_indices,
+                                           IndptrType* output_indptr,
+                                           IndptrType dim,
+                                           ElementType const* adj_values,
+                                           IndicesType const* adj_indices,
+                                           IndptrType const* adj_indptr)
+{
+  /* The graph Laplacian L of an adjacency matrix A is given by:
+   * L = D - A
+   * where D is the degree matrix of A. The degree matrix is itself defined
+   * as the sum of each row of A and represents the degree of the node
+   * indicated by the index of the row. */
+
+  for (auto row = threadIdx.x + blockIdx.x * blockDim.x; row < dim; row += blockDim.x * gridDim.x) {
+    auto row_begin = adj_indptr[row];
+    auto row_end   = adj_indptr[row + 1];
+    // All output indexes will need to be offset by the row, since every row will
+    // gain exactly one new non-zero element. degree_output_index is the index
+    // where we will store the degree of each row
+    auto degree_output_index = row_begin + row;
+    auto degree_value        = ElementType{};
+    // value_index indicates the index of the current value in the original
+    // adjacency matrix
+    for (auto value_index = row_begin; value_index < row_end; ++value_index) {
+      auto col_index         = adj_indices[value_index];
+      auto is_lower_diagonal = col_index < row;
+      auto output_index      = value_index + row + !is_lower_diagonal;
+      auto input_value       = adj_values[value_index];
+      degree_value += input_value;
+      output_values[output_index]  = ElementType{-1} * input_value;
+      output_indices[output_index] = col_index;
+      // Increment the index where we will store the degree for every non-zero
+      // element before we reach the diagonal
+      degree_output_index += is_lower_diagonal;
+    }
+    output_values[degree_output_index]  = degree_value;
+    output_indices[degree_output_index] = row;
+    output_indptr[row]                  = row_begin + row;
+    output_indptr[row + 1]              = row_end + row + 1;
+  }
+}
+
+}  // namespace detail
+}  // namespace linalg
+}  // namespace sparse
+}  // namespace raft
diff --git a/cpp/include/raft/sparse/linalg/laplacian.cuh b/cpp/include/raft/sparse/linalg/laplacian.cuh
index 7c2449c937..ae08696f16 100644
--- a/cpp/include/raft/sparse/linalg/laplacian.cuh
+++ b/cpp/include/raft/sparse/linalg/laplacian.cuh
@@ -16,67 +16,13 @@
 #pragma once
 #include <raft/core/device_csr_matrix.hpp>
 #include <raft/core/resources.hpp>
+#include <raft/sparse/linalg/detail/laplacian.cuh>
 
 #include <type_traits>
 
 namespace raft {
 namespace sparse {
 namespace linalg {
-namespace detail {
-
-/* Compute the graph Laplacian of an adjacency matrix
- *
- * This kernel implements the necessary logic for computing a graph
- * Laplacian for an adjacency matrix in CSR format. A custom kernel is
- * required because cusparse does not conveniently implement matrix subtraction with 64-bit
- * indices. The custom kernel also allows the computation to be completed
- * with no extra allocations or compute.
- */
-template <typename ElementType, typename IndptrType, typename IndicesType>
-RAFT_KERNEL compute_graph_laplacian_kernel(ElementType* output_values,
-                                           IndicesType* output_indices,
-                                           IndptrType* output_indptr,
-                                           IndptrType dim,
-                                           ElementType const* adj_values,
-                                           IndicesType const* adj_indices,
-                                           IndptrType const* adj_indptr)
-{
-  /* The graph Laplacian L of an adjacency matrix A is given by:
-   * L = D - A
-   * where D is the degree matrix of A. The degree matrix is itself defined
-   * as the sum of each row of A and represents the degree of the node
-   * indicated by the index of the row. */
-
-  for (auto row = threadIdx.x + blockIdx.x * blockDim.x; row < dim; row += blockDim.x * gridDim.x) {
-    auto row_begin = adj_indptr[row];
-    auto row_end   = adj_indptr[row + 1];
-    // All output indexes will need to be offset by the row, since every row will
-    // gain exactly one new non-zero element. degree_output_index is the index
-    // where we will store the degree of each row
-    auto degree_output_index = row_begin + row;
-    auto degree_value        = ElementType{};
-    // value_index indicates the index of the current value in the original
-    // adjacency matrix
-    for (auto value_index = row_begin; value_index < row_end; ++value_index) {
-      auto col_index         = adj_indices[value_index];
-      auto is_lower_diagonal = col_index < row;
-      auto output_index      = value_index + row + !is_lower_diagonal;
-      auto input_value       = adj_values[value_index];
-      degree_value += input_value;
-      output_values[output_index]  = ElementType{-1} * input_value;
-      output_indices[output_index] = col_index;
-      // Increment the index where we will store the degree for every non-zero
-      // element before we reach the diagonal
-      degree_output_index += is_lower_diagonal;
-    }
-    output_values[degree_output_index]  = degree_value;
-    output_indices[degree_output_index] = row;
-    output_indptr[row]                  = row_begin + row;
-    output_indptr[row + 1]              = row_end + row + 1;
-  }
-}
-
-}  // namespace detail
 
 /** Given a CSR adjacency matrix, return the graph Laplacian
  *

From 99779f2d8ebfee49e847f6d835be88b66f720d9d Mon Sep 17 00:00:00 2001
From: William Hicks <whicks@nvidia.com>
Date: Tue, 18 Feb 2025 07:44:03 -0800
Subject: [PATCH 5/6] Move entire implementation to detail

---
 .../raft/sparse/linalg/detail/laplacian.cuh   | 39 +++++++++++++++++++
 cpp/include/raft/sparse/linalg/laplacian.cuh  | 31 +--------------
 2 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/cpp/include/raft/sparse/linalg/detail/laplacian.cuh b/cpp/include/raft/sparse/linalg/detail/laplacian.cuh
index bfbd187846..fe6d63ab95 100644
--- a/cpp/include/raft/sparse/linalg/detail/laplacian.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/laplacian.cuh
@@ -14,7 +14,11 @@
  * limitations under the License.
  */
 #pragma once
+#include <raft/core/device_csr_matrix.hpp>
 #include <raft/core/detail/macros.hpp>
+#include <raft/core/resources.hpp>
+
+#include <type_traits>
 
 namespace raft {
 namespace sparse {
@@ -73,6 +77,41 @@ RAFT_KERNEL compute_graph_laplacian_kernel(ElementType* output_values,
   }
 }
 
+template <typename ElementType, typename IndptrType, typename IndicesType, typename NZType>
+auto compute_graph_laplacian(
+  raft::resources const& res,
+  device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType> input)
+{
+  auto input_structure = input.structure_view();
+  auto dim             = input_structure.get_n_rows();
+  RAFT_EXPECTS(dim == input_structure.get_n_cols(),
+               "The graph Laplacian can only be computed on a square adjacency matrix");
+  auto result = make_device_csr_matrix<std::remove_const_t<ElementType>,
+                                       std::remove_const_t<IndptrType>,
+                                       std::remove_const_t<IndicesType>,
+                                       std::remove_const_t<NZType>>(
+    res,
+    dim,
+    dim,
+    /* The nnz for the result will be the dimension of the (square) input matrix plus the number of
+     * non-zero elements in the original matrix, since we introduce non-zero elements along the
+     * diagonal to represent the degree of each node. */
+    input_structure.get_nnz() + dim);
+  auto result_structure                         = result.structure_view();
+  auto static constexpr const threads_per_block = 256;
+  auto blocks = std::min(int((dim + threads_per_block - 1) / threads_per_block), 65535);
+  auto stream = resource::get_cuda_stream(res);
+  detail::compute_graph_laplacian_kernel<<<threads_per_block, blocks, 0, stream>>>(
+    result.get_elements().data(),
+    result_structure.get_indices().data(),
+    result_structure.get_indptr().data(),
+    dim,
+    input.get_elements().data(),
+    input_structure.get_indices().data(),
+    input_structure.get_indptr().data());
+  return result;
+}
+
 }  // namespace detail
 }  // namespace linalg
 }  // namespace sparse
diff --git a/cpp/include/raft/sparse/linalg/laplacian.cuh b/cpp/include/raft/sparse/linalg/laplacian.cuh
index ae08696f16..69633f717c 100644
--- a/cpp/include/raft/sparse/linalg/laplacian.cuh
+++ b/cpp/include/raft/sparse/linalg/laplacian.cuh
@@ -18,8 +18,6 @@
 #include <raft/core/resources.hpp>
 #include <raft/sparse/linalg/detail/laplacian.cuh>
 
-#include <type_traits>
-
 namespace raft {
 namespace sparse {
 namespace linalg {
@@ -33,34 +31,7 @@ auto compute_graph_laplacian(
   raft::resources const& res,
   device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType> input)
 {
-  auto input_structure = input.structure_view();
-  auto dim             = input_structure.get_n_rows();
-  RAFT_EXPECTS(dim == input_structure.get_n_cols(),
-               "The graph Laplacian can only be computed on a square adjacency matrix");
-  auto result = make_device_csr_matrix<std::remove_const_t<ElementType>,
-                                       std::remove_const_t<IndptrType>,
-                                       std::remove_const_t<IndicesType>,
-                                       std::remove_const_t<NZType>>(
-    res,
-    dim,
-    dim,
-    /* The nnz for the result will be the dimension of the (square) input matrix plus the number of
-     * non-zero elements in the original matrix, since we introduce non-zero elements along the
-     * diagonal to represent the degree of each node. */
-    input_structure.get_nnz() + dim);
-  auto result_structure                         = result.structure_view();
-  auto static constexpr const threads_per_block = 256;
-  auto blocks = std::min(int((dim + threads_per_block - 1) / threads_per_block), 65535);
-  auto stream = resource::get_cuda_stream(res);
-  detail::compute_graph_laplacian_kernel<<<threads_per_block, blocks, 0, stream>>>(
-    result.get_elements().data(),
-    result_structure.get_indices().data(),
-    result_structure.get_indptr().data(),
-    dim,
-    input.get_elements().data(),
-    input_structure.get_indices().data(),
-    input_structure.get_indptr().data());
-  return result;
+  return detail::compute_graph_laplacian(res, input);
 }
 
 }  // namespace linalg

From 2ac1e343c6253695867a3ac21b1c9fd9602a8a2a Mon Sep 17 00:00:00 2001
From: William Hicks <whicks@nvidia.com>
Date: Tue, 18 Feb 2025 07:45:21 -0800
Subject: [PATCH 6/6] Update style

---
 cpp/include/raft/sparse/linalg/detail/laplacian.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/raft/sparse/linalg/detail/laplacian.cuh b/cpp/include/raft/sparse/linalg/detail/laplacian.cuh
index fe6d63ab95..95318c74ed 100644
--- a/cpp/include/raft/sparse/linalg/detail/laplacian.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/laplacian.cuh
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 #pragma once
-#include <raft/core/device_csr_matrix.hpp>
 #include <raft/core/detail/macros.hpp>
+#include <raft/core/device_csr_matrix.hpp>
 #include <raft/core/resources.hpp>
 
 #include <type_traits>