From 21e0f0ead374c7718974019a3ef9d9cdc2eb03d9 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 24 Jan 2025 01:22:24 -0500 Subject: [PATCH] Add cuda 12.8 support (#605) CUDA 12.8 introduces sm_120 that requires a reduced number of threads per sm. We also need to pass -static-global-template-stub=false when building with 12.8 as we violate CUDA ODR kernel rules Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - https://github.com/jakirkham URL: https://github.com/rapidsai/cuvs/pull/605 --- cpp/cmake/modules/ConfigureCUDA.cmake | 7 ++++++- cpp/src/neighbors/detail/nn_descent.cuh | 5 +++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index 3e91d9995..e99a53049 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -35,6 +35,11 @@ if(CMAKE_COMPILER_IS_GNUCXX) endif() endif() +# Allow invalid CUDA kernels in the short term +if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0) + list(APPEND CUVS_CUDA_FLAGS -static-global-template-stub=false) +endif() + if(CUDA_LOG_COMPILE_TIME) list(APPEND CUVS_CUDA_FLAGS "--time=nvcc_compile_log.csv") endif() diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh index 4c577a4d2..310d4e7a6 100644 --- a/cpp/src/neighbors/detail/nn_descent.cuh +++ b/cpp/src/neighbors/detail/nn_descent.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -706,7 +706,8 @@ __device__ __forceinline__ void remove_duplicates( template > RAFT_KERNEL #ifdef __CUDA_ARCH__ -#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) +#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) || \ + (__CUDA_ARCH__) == 1200 __launch_bounds__(BLOCK_SIZE) #else __launch_bounds__(BLOCK_SIZE, 4)