Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes in cmake for cuda #640

Merged
merged 5 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately).

Master, using release name V 2.4.0 (2/11/25)

* Removed FINUFFT_CUDA_ARCHITECTURES flag, as it was unnecessary duplication.
* Enabled LTO for finufft, nvcc support is flaky at the moment.
* Added GPU spread interp only test. Added CPU spread interp only test to cmake
* Make attributes private in Python Plan classes and allow read-only access to
them using properties (Andén #608).
Expand Down
29 changes: 16 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ option(FINUFFT_BUILD_DOCS "Whether to build the FINUFFT documentation" OFF)
# if FINUFFT_USE_DUCC0 is ON, the following options are ignored
set(FINUFFT_FFTW_LIBRARIES "DEFAULT" CACHE STRING "Specify a custom FFTW library")
set(FINUFFT_FFTW_SUFFIX "OpenMP" CACHE STRING "Suffix for FFTW libraries (e.g. OpenMP, Threads etc.)")
# if FINUFFT_USE_CUDA is OFF, the following options are ignored
set(FINUFFT_CUDA_ARCHITECTURES "native" CACHE STRING "CUDA architectures to build for (e.g. 60;70;75;)")
# if FINUFFT_USE_CPU is OFF, the following options are ignored
set(FINUFFT_ARCH_FLAGS "native" CACHE STRING "Compiler flags for specifying target architecture, defaults to -march=native")
# sphinx tag (don't remove): @cmake_opts_end
Expand Down Expand Up @@ -77,7 +75,8 @@ set(FINUFFT_CXX_FLAGS_DEBUG
-ggdb
-ggdb3
-Wall
-Wno-sign-compare
-Wextra
-Wpedantic
-Wno-unknown-pragmas
)

Expand Down Expand Up @@ -208,7 +207,12 @@ function(finufft_link_test target)
endif()
enable_asan(${target})
target_compile_features(${target} PRIVATE cxx_std_17)
set_target_properties(${target} PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
set_target_properties(
${target}
PROPERTIES
MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE}
)
# disable deprecated warnings for tests if supported
if(FINUFFT_HAS_NO_DEPRECATED_DECLARATIONS)
target_compile_options(${target} PRIVATE -Wno-deprecated-declarations)
Expand Down Expand Up @@ -277,16 +281,14 @@ if(FINUFFT_USE_CPU)
endif()

if(FINUFFT_USE_CUDA)
if(NOT DEFINED FINUFFT_CUDA_ARCHITECTURES)
if(DEFINED CMAKE_CUDA_ARCHITECTURES)
set(FINUFFT_CUDA_ARCHITECTURES "{$CMAKE_CUDA_ARCHITECTURES}")
else()
message(
"FINUFFT WARNING: No CUDA architecture supplied via '-DFINUFFT_CUDA_ARCHITECTURES=...', defaulting to 'native'"
)
message("See: https://developer.nvidia.com/cuda-gpus for more details on what architecture to supply.")
endif()
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
message(
WARNING
"FINUFFT WARNING: No CUDA architecture supplied via '-DCMAKE_CUDA_ARCHITECTURES=...', defaulting to 'native'"
)
message(WARNING "See: https://developer.nvidia.com/cuda-gpus for more details on what architecture to supply.")
endif()
set(CMAKE_CUDA_ARCHITECTURES "native")
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
add_subdirectory(src/cuda)
Expand Down Expand Up @@ -348,6 +350,7 @@ message(STATUS " FINUFFT_FFTW_SUFFIX: ${FINUFFT_FFTW_SUFFIX}")
message(STATUS " FINUFFT_FFTW_LIBRARIES: ${FINUFFT_FFTW_LIBRARIES}")
message(STATUS " FINUFFT_ARCH_FLAGS: ${FINUFFT_ARCH_FLAGS}")
message(STATUS " FINUFFT_USE_DUCC0: ${FINUFFT_USE_DUCC0}")
message(STATUS " CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
# gersemi: on

if(FINUFFT_ENABLE_INSTALL)
Expand Down
11 changes: 11 additions & 0 deletions cmake/utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,14 @@ function(copy_dll source_target destination_target)
unset(SOURCE_FILE)
unset(DESTINATION_FILE)
endfunction()

include(CheckIPOSupported)
check_ipo_supported(RESULT LTO_SUPPORTED OUTPUT LTO_ERROR)

if(LTO_SUPPORTED)
message(STATUS "LTO is supported and enabled.")
set(FINUFFT_INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(WARNING "LTO is not supported: ${LTO_ERROR}")
set(FINUFFT_INTERPROCEDURAL_OPTIMIZATION FALSE)
endif()
2 changes: 2 additions & 0 deletions docs/install_gpu.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ To find out your own device's compute capability without having to look it up on
This will return a text string such as ``8.6`` which would incidate
``sm_86`` architecture, thus to use ``CMAKE_CUDA_ARCHITECTURES=86``.

Note that by default the ``CMAKE_CUDA_ARCHITECTURES`` flag is set to ``native``, which means that the code will be compiled for the compute capability of the GPU on which the code is being compiled.
This might not be portable so it is recommended to set this flag explicitly when building for multiple systems. A good alternative is ``all-major`` which will compile for all major compute capabilities.

Testing
-------
Expand Down
2 changes: 1 addition & 1 deletion perftest/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set_target_properties(
cuperftest
PROPERTIES
LINKER_LANGUAGE CUDA
CUDA_ARCHITECTURES "${FINUFFT_CUDA_ARCHITECTURES}"
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}"
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
)
20 changes: 12 additions & 8 deletions src/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,14 @@ set(FINUFFT_CUDA_FLAGS
-fmad=true
-restrict
--extra-device-vectorization
$<$<CONFIG:Debug>:-G
-maxrregcount
64
>
>
-Xnvlink
--strip-all>
)

if(FINUFFT_SHARED_LINKING)
add_library(cufinufft SHARED ${PRECISION_INDEPENDENT_SRC} ${PRECISION_DEPENDENT_SRC})
else()
add_library(cufinufft STATIC ${PRECISION_INDEPENDENT_SRC} ${PRECISION_DEPENDENT_SRC})
set_target_properties(cufinufft PROPERTIES POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE})
endif()
target_include_directories(cufinufft PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
# set target build location
Expand All @@ -54,20 +50,28 @@ set_target_properties(cufinufft PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${PROJECT_B
set_target_properties(
cufinufft
PROPERTIES
CUDA_ARCHITECTURES "${FINUFFT_CUDA_ARCHITECTURES}"
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}"
CUDA_SEPARABLE_COMPILATION ON
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
WINDOWS_EXPORT_ALL_SYMBOLS ON
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
INTERPROCEDURAL_OPTIMIZATION
OFF # LTO is not supported for CUDA for now
POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE}
)
target_compile_features(cufinufft PRIVATE cxx_std_17)
target_compile_options(cufinufft PRIVATE ${FINUFFT_CUDA_FLAGS})
if(WIN32 OR (BUILD_TESTING AND FINUFFT_BUILD_TESTS))
if(WIN32 OR (BUILD_TESTING AND FINUFFT_BUILD_TESTS) OR env{CIBUILDWHEEL})
target_link_libraries(cufinufft PUBLIC CUDA::cudart CUDA::cufft)
else()
target_link_libraries(cufinufft PUBLIC CUDA::cudart_static CUDA::cufft_static)
endif()

# disable deprecated warnings for tests if supported
if(FINUFFT_HAS_NO_DEPRECATED_DECLARATIONS)
target_compile_options(cufinufft PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Wno-deprecated-declarations>)
endif()

file(GLOB CUFINUFFT_PUBLIC_HEADERS "${CMAKE_SOURCE_DIR}/include/cufinufft*.h")
set_target_properties(cufinufft PROPERTIES PUBLIC_HEADER "${CUFINUFFT_PUBLIC_HEADERS}")
1 change: 0 additions & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ if(NOT FINUFFT_USE_DUCC0)
add_executable(fftw_lock_test fftw_lock_test.cpp)
target_compile_features(fftw_lock_test PRIVATE cxx_std_17)
finufft_link_test(fftw_lock_test)

add_test(NAME run_fftw_lock_test COMMAND fftw_lock_test WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
endif()

Expand Down
4 changes: 2 additions & 2 deletions test/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ foreach(srcfile ${test_src})
target_compile_features(${executable} PUBLIC cxx_std_17)
set_target_properties(
${executable}
PROPERTIES LINKER_LANGUAGE CUDA CUDA_ARCHITECTURES "${FINUFFT_CUDA_ARCHITECTURES}"
PROPERTIES LINKER_LANGUAGE CUDA CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}"
)
message(
STATUS
"Adding test ${executable}"
" with CUDA_ARCHITECTURES=${FINUFFT_CUDA_ARCHITECTURES}"
" with CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
" and INCLUDE=${CUFINUFFT_INCLUDE_DIRS}"
)
endforeach()
Expand Down
Loading