Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into batchnnd-dedup
Browse files Browse the repository at this point in the history
  • Loading branch information
cjnolet authored Feb 25, 2025
2 parents 312913c + cb6fe7c commit f286983
Show file tree
Hide file tree
Showing 35 changed files with 458 additions and 70 deletions.
15 changes: 8 additions & 7 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,21 @@ cpp/ @rapidsai/raft-cpp-codeowners
python/ @rapidsai/raft-python-codeowners

#cmake code owners
**/CMakeLists.txt @rapidsai/raft-cmake-codeowners
CMakeLists.txt @rapidsai/raft-cmake-codeowners
**/cmake/ @rapidsai/raft-cmake-codeowners
*.cmake @rapidsai/raft-cmake-codeowners
python/setup.py @rapidsai/raft-cmake-codeowners
build.sh @rapidsai/raft-cmake-codeowners
**/build.sh @rapidsai/raft-cmake-codeowners

#CI code owners
/.github/ @rapidsai/ci-codeowners
/ci/ @rapidsai/ci-codeowners
/.pre-commit-config.yaml @rapidsai/ci-codeowners

#packaging code owners
/.devcontainer/ @rapidsai/packaging-codeowners
/conda/ @rapidsai/packaging-codeowners
/dependencies.yaml @rapidsai/packaging-codeowners
/build.sh @rapidsai/packaging-codeowners
pyproject.toml @rapidsai/packaging-codeowners
/.pre-commit-config.yaml @rapidsai/packaging-codeowners
/.devcontainer/ @rapidsai/packaging-codeowners
/conda/ @rapidsai/packaging-codeowners
dependencies.yaml @rapidsai/packaging-codeowners
/build.sh @rapidsai/packaging-codeowners
pyproject.toml @rapidsai/packaging-codeowners
4 changes: 3 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ repos:
hooks:
- id: check-json
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.4.0
rev: v0.5.0
hooks:
- id: verify-copyright
files: |
Expand All @@ -122,6 +122,8 @@ repos:
cpp/include/raft/thirdparty/|
docs/source/sphinxext/github_link[.]py|
- id: verify-alpha-spec
- id: verify-codeowners
args: [--fix, --project-prefix=raft]
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.17.0
hooks:
Expand Down
3 changes: 2 additions & 1 deletion conda/environments/all_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies:
- c-compiler
- clang-tools=16.0.6
- clang==16.0.6
- cmake>=3.26.4,!=3.30.0
- cmake>=3.30.4
- cuda-nvtx=11.8
- cuda-profiler-api=11.8.86
- cuda-python>=11.8.5,<12.0a0
Expand Down Expand Up @@ -55,6 +55,7 @@ dependencies:
- scipy
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<8.2.0
- sysroot_linux-aarch64==2.28
- ucx-py==0.43.*,>=0.0.0a0
name: all_cuda-118_arch-aarch64
3 changes: 2 additions & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies:
- c-compiler
- clang-tools=16.0.6
- clang==16.0.6
- cmake>=3.26.4,!=3.30.0
- cmake>=3.30.4
- cuda-nvtx=11.8
- cuda-profiler-api=11.8.86
- cuda-python>=11.8.5,<12.0a0
Expand Down Expand Up @@ -55,6 +55,7 @@ dependencies:
- scipy
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<8.2.0
- sysroot_linux-64==2.28
- ucx-py==0.43.*,>=0.0.0a0
name: all_cuda-118_arch-x86_64
3 changes: 2 additions & 1 deletion conda/environments/all_cuda-128_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies:
- c-compiler
- clang-tools=16.0.6
- clang==16.0.6
- cmake>=3.26.4,!=3.30.0
- cmake>=3.30.4
- cuda-cudart-dev
- cuda-nvcc
- cuda-nvtx-dev
Expand Down Expand Up @@ -51,6 +51,7 @@ dependencies:
- scipy
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<8.2.0
- sysroot_linux-aarch64==2.28
- ucx-py==0.43.*,>=0.0.0a0
name: all_cuda-128_arch-aarch64
3 changes: 2 additions & 1 deletion conda/environments/all_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies:
- c-compiler
- clang-tools=16.0.6
- clang==16.0.6
- cmake>=3.26.4,!=3.30.0
- cmake>=3.30.4
- cuda-cudart-dev
- cuda-nvcc
- cuda-nvtx-dev
Expand Down Expand Up @@ -51,6 +51,7 @@ dependencies:
- scipy
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<8.2.0
- sysroot_linux-64==2.28
- ucx-py==0.43.*,>=0.0.0a0
name: all_cuda-128_arch-x86_64
2 changes: 1 addition & 1 deletion conda/recipes/libraft/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ c_stdlib_version:
- "2.28"

cmake_version:
- ">=3.26.4,!=3.30.0"
- ">=3.30.4"

# The CTK libraries below are missing from the conda-forge::cudatoolkit package
# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/pylibraft/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ c_stdlib_version:
- "2.28"

cmake_version:
- ">=3.26.4,!=3.30.0"
- ">=3.30.4"
2 changes: 1 addition & 1 deletion conda/recipes/raft-dask/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ ucxx_version:
- "0.43.*"

cmake_version:
- ">=3.26.4,!=3.30.0"
- ">=3.30.4"

nccl_version:
- ">=2.19"
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
include(../rapids_config.cmake)
include(rapids-cmake)
include(rapids-cpm)
Expand Down
8 changes: 4 additions & 4 deletions cpp/include/raft/cluster/detail/kmeans.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

#include <cuda.h>
#include <thrust/fill.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/transform.h>

#include <algorithm>
Expand Down Expand Up @@ -443,13 +444,12 @@ void kmeans_fit_main(raft::resources const& handle,
params.batch_centroids,
workspace);

// Using TransformInputIteratorT to dereference an array of
// Using thrust::transform_iterator to dereference an array of
// raft::KeyValuePair and converting them to just return the Key to be used
// in reduce_rows_by_key prims
detail::KeyValueIndexOp<IndexT, DataT> conversion_op;
cub::TransformInputIterator<IndexT,
detail::KeyValueIndexOp<IndexT, DataT>,
raft::KeyValuePair<IndexT, DataT>*>
thrust::transform_iterator<detail::KeyValueIndexOp<IndexT, DataT>,
raft::KeyValuePair<IndexT, DataT>*>
itr(minClusterAndDistance.data_handle(), conversion_op);

update_centroids(handle,
Expand Down
7 changes: 5 additions & 2 deletions cpp/include/raft/cluster/detail/kmeans_balanced.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include <rmm/resource_ref.hpp>

#include <thrust/gather.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/transform.h>

#include <limits>
Expand Down Expand Up @@ -288,7 +289,8 @@ void calc_centers_and_sizes(const raft::resources& handle,
dataset, dim, labels, nullptr, n_rows, dim, n_clusters, centers, stream, reset_counters);
} else {
// todo(lsugy): use iterator from KV output of fusedL2NN
cub::TransformInputIterator<MathT, MappingOpT, const T*> mapping_itr(dataset, mapping_op);
thrust::transform_iterator<MappingOpT, const T*, thrust::use_default, MathT> mapping_itr(
dataset, mapping_op);
raft::linalg::reduce_rows_by_key(
mapping_itr, dim, labels, nullptr, n_rows, dim, n_clusters, centers, stream, reset_counters);
}
Expand Down Expand Up @@ -894,7 +896,8 @@ auto build_fine_clusters(const raft::resources& handle,
"Number of fine clusters must be non-zero for a non-empty mesocluster");
}

cub::TransformInputIterator<MathT, MappingOpT, const T*> mapping_itr(dataset_mptr, mapping_op);
thrust::transform_iterator<MappingOpT, const T*, thrust::use_default, MathT> mapping_itr(
dataset_mptr, mapping_op);
raft::matrix::gather(mapping_itr, dim, n_rows, mc_trainset_ids, k, mc_trainset, stream);
if (params.metric == raft::distance::DistanceType::L2Expanded ||
params.metric == raft::distance::DistanceType::L2SqrtExpanded) {
Expand Down
12 changes: 6 additions & 6 deletions cpp/include/raft/cluster/detail/kmeans_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <cuda.h>
#include <thrust/fill.h>
#include <thrust/for_each.h>
#include <thrust/iterator/transform_iterator.h>

#include <algorithm>
#include <cmath>
Expand Down Expand Up @@ -199,8 +200,8 @@ void computeClusterCost(raft::resources const& handle,
{
cudaStream_t stream = resource::get_cuda_stream(handle);

cub::TransformInputIterator<OutputT, MainOpT, InputT*> itr(minClusterDistance.data_handle(),
main_op);
thrust::transform_iterator<MainOpT, InputT*, thrust::use_default, OutputT> itr(
minClusterDistance.data_handle(), main_op);

size_t temp_storage_bytes = 0;
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(nullptr,
Expand Down Expand Up @@ -641,13 +642,12 @@ void countSamplesInCluster(raft::resources const& handle,
params.batch_centroids,
workspace);

// Using TransformInputIteratorT to dereference an array of raft::KeyValuePair
// Using thrust::transform_iterator to dereference an array of raft::KeyValuePair
// and converting them to just return the Key to be used in reduce_rows_by_key
// prims
detail::KeyValueIndexOp<IndexT, DataT> conversion_op;
cub::TransformInputIterator<IndexT,
detail::KeyValueIndexOp<IndexT, DataT>,
raft::KeyValuePair<IndexT, DataT>*>
thrust::transform_iterator<detail::KeyValueIndexOp<IndexT, DataT>,
raft::KeyValuePair<IndexT, DataT>*>
itr(minClusterAndDistance.data_handle(), conversion_op);

// count # of samples in each cluster
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/core/sparse_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class sparse_matrix {
row_type n_rows,
col_type n_cols,
nnz_type nnz = 0) noexcept(std::is_nothrow_default_constructible_v<container_type>)
: structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, 0)} {};
: structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, nnz)} {};

// Constructor that owns the data but not the structure
// This constructor is only callable with a `structure_type == *_structure_view`
Expand Down
24 changes: 12 additions & 12 deletions cpp/include/raft/linalg/detail/strided_reduction.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,19 @@ namespace detail {
// Note that the compensation will only be performed 'per-block' for performance
// reasons and therefore not be equivalent to a sequential compensation.

template <typename Type, typename MainLambda>
template <typename Type, typename IdxType, typename MainLambda>
RAFT_KERNEL stridedSummationKernel(
Type* out, const Type* data, int D, int N, Type init, MainLambda main_op)
Type* out, const Type* data, IdxType D, IdxType N, Type init, MainLambda main_op)
{
// Thread reduction
Type thread_sum = Type(init);
Type thread_c = Type(0);
int colStart = blockIdx.x * blockDim.x + threadIdx.x;
Type thread_sum = Type(init);
Type thread_c = Type(0);
IdxType colStart = blockIdx.x * blockDim.x + threadIdx.x;
if (colStart < D) {
int rowStart = blockIdx.y * blockDim.y + threadIdx.y;
int stride = blockDim.y * gridDim.y;
for (int j = rowStart; j < N; j += stride) {
int idx = colStart + j * D;
IdxType rowStart = blockIdx.y * blockDim.y + threadIdx.y;
IdxType stride = blockDim.y * gridDim.y;
for (IdxType j = rowStart; j < N; j += stride) {
auto idx = colStart + j * D;

// KahanBabushkaNeumaierSum
const Type cur_value = main_op(data[idx], j);
Expand Down Expand Up @@ -97,8 +97,8 @@ template <typename InType,
typename ReduceLambda>
RAFT_KERNEL stridedReductionKernel(OutType* dots,
const InType* data,
int D,
int N,
IdxType D,
IdxType N,
OutType init,
MainLambda main_op,
ReduceLambda reduce_op)
Expand Down Expand Up @@ -167,7 +167,7 @@ void stridedReduction(OutType* dots,
raft::min((IdxType)MaxBlocksDimY, raft::ceildiv(N, (IdxType)MinRowsPerBlk)));
const size_t shmemSize = sizeof(OutType) * Block.x * 2;

stridedSummationKernel<InType>
stridedSummationKernel<InType, IdxType>
<<<grid, Block, shmemSize, stream>>>(dots, data, D, N, init, main_op);
} else {
// Arbitrary numbers for now, probably need to tune
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@

#include <cuda_fp16.h>
#include <thrust/extrema.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/scan.h>

#include <memory>
Expand Down Expand Up @@ -180,8 +181,8 @@ void select_residuals(raft::resources const& handle,
rmm::device_uvector<float> tmp(size_t(n_rows) * size_t(dim), stream, device_memory);
// Note: the number of rows of the input dataset isn't actually n_rows, but matrix::gather doesn't
// need to know it, any strictly positive number would work.
cub::TransformInputIterator<float, utils::mapping<float>, const T*> mapping_itr(
dataset, utils::mapping<float>{});
thrust::transform_iterator<utils::mapping<float>, const T*> mapping_itr(dataset,
utils::mapping<float>{});
raft::matrix::gather(mapping_itr, (IdxT)dim, n_rows, row_ids, n_rows, tmp.data(), stream);

raft::matrix::linewise_op(handle,
Expand Down
Loading

0 comments on commit f286983

Please sign in to comment.