From bb6aca83b407acae0a5fd41f3fa764ce9d5ba380 Mon Sep 17 00:00:00 2001 From: jeff <1093656867@qq.com> Date: Thu, 4 Jan 2024 09:57:51 +0800 Subject: [PATCH 1/2] Fix compile failure on RTX 4090 and update Copyright year. related issue (#2073) Signed-off-by: jeff <1093656867@qq.com> --- cpp/include/raft/neighbors/detail/nn_descent.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 6e0636c37a..1141d77640 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -686,12 +686,12 @@ __device__ __forceinline__ void remove_duplicates( // Per // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications, // MAX_RESIDENT_THREAD_PER_SM = BLOCK_SIZE * BLOCKS_PER_SM = 2048 -// For architectures 750 and 860, the values for MAX_RESIDENT_THREAD_PER_SM +// For architectures 750 and 860 (890), the values for MAX_RESIDENT_THREAD_PER_SM // is 1024 and 1536 respectively, which means the bounds don't work anymore template > RAFT_KERNEL #ifdef __CUDA_ARCH__ -#if (__CUDA_ARCH__) == 750 || (__CUDA_ARCH__) == 860 +#if (__CUDA_ARCH__) == 750 || (__CUDA_ARCH__) == 860 || (__CUDA_ARCH__) == 890 __launch_bounds__(BLOCK_SIZE) #else __launch_bounds__(BLOCK_SIZE, 4) From 661f637cc7a84a38b72c85e54e544b6757d532e2 Mon Sep 17 00:00:00 2001 From: JiefengWang <146178560+JieFengWang@users.noreply.github.com> Date: Wed, 10 Jan 2024 09:29:01 +0800 Subject: [PATCH 2/2] Update cpp/include/raft/neighbors/detail/nn_descent.cuh Co-authored-by: Corey J. Nolet --- cpp/include/raft/neighbors/detail/nn_descent.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 1141d77640..f624a6015b 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -691,7 +691,7 @@ __device__ __forceinline__ void remove_duplicates( template > RAFT_KERNEL #ifdef __CUDA_ARCH__ -#if (__CUDA_ARCH__) == 750 || (__CUDA_ARCH__) == 860 || (__CUDA_ARCH__) == 890 +#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) __launch_bounds__(BLOCK_SIZE) #else __launch_bounds__(BLOCK_SIZE, 4)