diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index abf971fe..164f3bd5 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -52,8 +52,6 @@ vecmem_add_library( vecmem_core core # Memory management. "include/vecmem/memory/atomic.hpp" "include/vecmem/memory/impl/atomic.ipp" - "include/vecmem/memory/device_atomic_ref.hpp" - "include/vecmem/memory/impl/device_atomic_ref.ipp" "include/vecmem/memory/polymorphic_allocator.hpp" "include/vecmem/memory/memory_resource.hpp" "include/vecmem/memory/details/unique_alloc_deleter.hpp" @@ -61,6 +59,19 @@ vecmem_add_library( vecmem_core core "include/vecmem/memory/unique_ptr.hpp" "include/vecmem/memory/details/is_aligned.hpp" "src/memory/details/is_aligned.cpp" + # Atomic reference(s). + "include/vecmem/memory/device_address_space.hpp" + "include/vecmem/memory/device_atomic_ref.hpp" + "include/vecmem/memory/details/dummy_device_atomic_ref.hpp" + "include/vecmem/memory/impl/dummy_device_atomic_ref.ipp" + "include/vecmem/memory/details/cuda_device_atomic_ref.hpp" + "include/vecmem/memory/impl/cuda_device_atomic_ref.ipp" + "include/vecmem/memory/details/hip_device_atomic_ref.hpp" + "include/vecmem/memory/details/sycl_builtin_device_atomic_ref.hpp" + "include/vecmem/memory/details/sycl_custom_device_atomic_ref.hpp" + "include/vecmem/memory/impl/sycl_custom_device_atomic_ref.ipp" + "include/vecmem/memory/details/posix_device_atomic_ref.hpp" + "include/vecmem/memory/impl/posix_device_atomic_ref.ipp" # EDM types. "include/vecmem/edm/container.hpp" "include/vecmem/edm/buffer.hpp" @@ -316,11 +327,27 @@ if( VECMEM_HAVE_STD_ALIGNED_ALLOC ) PRIVATE VECMEM_HAVE_STD_ALIGNED_ALLOC ) endif() +# Check if vecmem::posix_device_atomic_ref is usable. +set( CMAKE_REQUIRED_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/include" ) +check_cxx_source_compiles( " + #include + int main() { + int foo = 0; + vecmem::posix_device_atomic_ref ref{foo}; + return 0; + } + " VECMEM_SUPPORT_POSIX_ATOMIC_REF ) +if( VECMEM_SUPPORT_POSIX_ATOMIC_REF ) + target_compile_definitions( vecmem_core + PUBLIC VECMEM_SUPPORT_POSIX_ATOMIC_REF ) +endif() +unset( CMAKE_REQUIRED_INCLUDES ) + # Test the public headers of vecmem::core. if( BUILD_TESTING AND VECMEM_BUILD_TESTING ) - file( GLOB_RECURSE vecmem_core_public_headers + file( GLOB vecmem_core_public_headers RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/include" - "include/*/*.hpp" ) + "include/vecmem/*/*.hpp" ) list( APPEND vecmem_core_public_headers "vecmem/version.hpp" ) vecmem_test_public_headers( vecmem_core ${vecmem_core_public_headers} ) endif() diff --git a/core/include/vecmem/memory/details/cuda_device_atomic_ref.hpp b/core/include/vecmem/memory/details/cuda_device_atomic_ref.hpp new file mode 100644 index 00000000..49828737 --- /dev/null +++ b/core/include/vecmem/memory/details/cuda_device_atomic_ref.hpp @@ -0,0 +1,143 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// Local include(s). +#include "vecmem/memory/device_address_space.hpp" +#include "vecmem/memory/memory_order.hpp" +#include "vecmem/utils/types.hpp" + +// System include(s). +#include + +namespace vecmem { +namespace cuda { + +/// Custom implementation for atomic operations in CUDA device code +/// +/// @note All member functions are declared @c VECMEM_HOST_DEVICE, because +/// this class may be used from functions that also carry that setup. +/// (Like functions in @c vecmem::device_vector.) Even though this class +/// cannot be used in host code, CUDA and HIP are sensitive to these +/// sort of declarations being consistent. +/// +/// @tparam T Type to perform atomic operations on +/// @tparam address The device address space to use +/// +template +class device_atomic_ref { + +public: + /// @name Type definitions + /// @{ + + /// Type managed by the object + typedef T value_type; + /// Difference between two objects + typedef value_type difference_type; + /// Pointer to the value in global memory + typedef value_type* pointer; + /// Reference to a value given by the user + typedef value_type& reference; + + /// @} + + /// @name Check(s) on the value type + /// @{ + + static_assert( + std::is_integral::value, + "vecmem::cuda::atomic_ref only accepts built-in integral types"); + + /// @} + + /// Constructor, with a pointer to the managed variable + VECMEM_HOST_AND_DEVICE + explicit device_atomic_ref(reference ref); + /// Copy constructor + VECMEM_HOST_AND_DEVICE + device_atomic_ref(const device_atomic_ref& parent); + + /// Disable the assignment operator + device_atomic_ref& operator=(const device_atomic_ref&) = delete; + + /// @name Value setter/getter functions + /// @{ + + /// Assigns a value desired to the referenced object + /// + /// @see vecmem::cuda::atomic_ref::store + /// + VECMEM_HOST_AND_DEVICE + value_type operator=(value_type data) const; + + /// Set the variable to the desired value + VECMEM_HOST_AND_DEVICE + void store(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Get the value of the variable + VECMEM_HOST_AND_DEVICE + value_type load(memory_order order = memory_order::seq_cst) const; + + /// Exchange the current value of the variable with a different one + VECMEM_HOST_AND_DEVICE + value_type exchange(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// Compare against the current value, and exchange only if different + VECMEM_HOST_AND_DEVICE + bool compare_exchange_strong(reference expected, value_type desired, + memory_order success, + memory_order failure) const; + /// Compare against the current value, and exchange only if different + VECMEM_HOST_AND_DEVICE + bool compare_exchange_strong( + reference expected, value_type desired, + memory_order order = memory_order::seq_cst) const; + + /// @} + + /// @name Value modifier functions + /// @{ + + /// Add a chosen value to the stored variable + VECMEM_HOST_AND_DEVICE + value_type fetch_add(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Substitute a chosen value from the stored variable + VECMEM_HOST_AND_DEVICE + value_type fetch_sub(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// Replace the current value with the specified value AND-ed to it + VECMEM_HOST_AND_DEVICE + value_type fetch_and(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Replace the current value with the specified value OR-d to it + VECMEM_HOST_AND_DEVICE + value_type fetch_or(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Replace the current value with the specified value XOR-d to it + VECMEM_HOST_AND_DEVICE + value_type fetch_xor(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// @} + +private: + /// Pointer to the value to perform atomic operations on + pointer m_ptr; + +}; // class device_atomic_ref + +} // namespace cuda +} // namespace vecmem + +// Include the implementation. +#include "vecmem/memory/impl/cuda_device_atomic_ref.ipp" diff --git a/core/include/vecmem/memory/details/dummy_device_atomic_ref.hpp b/core/include/vecmem/memory/details/dummy_device_atomic_ref.hpp new file mode 100644 index 00000000..9c01b217 --- /dev/null +++ b/core/include/vecmem/memory/details/dummy_device_atomic_ref.hpp @@ -0,0 +1,126 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// Local include(s). +#include "vecmem/memory/device_address_space.hpp" +#include "vecmem/memory/memory_order.hpp" +#include "vecmem/utils/types.hpp" + +// System include(s). +#include + +namespace vecmem { + +/// Dummy / No-op atomic reference for unsupported devices / compilers +/// +/// @tparam T Type to perform atomic operations on +/// @tparam address The device address space to use +/// +template +class dummy_device_atomic_ref { + +public: + /// @name Type definitions + /// @{ + + /// Type managed by the object + typedef T value_type; + /// Difference between two objects + typedef value_type difference_type; + /// Pointer to the value in global memory + typedef value_type* pointer; + /// Reference to a value given by the user + typedef value_type& reference; + + /// @} + + /// Constructor, with a pointer to the managed variable + VECMEM_HOST_AND_DEVICE + explicit dummy_device_atomic_ref(reference ref); + /// Copy constructor + VECMEM_HOST_AND_DEVICE + dummy_device_atomic_ref(const dummy_device_atomic_ref& parent); + + /// Disable the assignment operator + dummy_device_atomic_ref& operator=(const dummy_device_atomic_ref&) = delete; + + /// @name Value setter/getter functions + /// @{ + + /// Assigns a value desired to the referenced object + /// + /// @see vecmem::cuda::atomic_ref::store + /// + VECMEM_HOST_AND_DEVICE + value_type operator=(value_type data) const; + + /// Set the variable to the desired value + VECMEM_HOST_AND_DEVICE + void store(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Get the value of the variable + VECMEM_HOST_AND_DEVICE + value_type load(memory_order order = memory_order::seq_cst) const; + + /// Exchange the current value of the variable with a different one + VECMEM_HOST_AND_DEVICE + value_type exchange(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// Compare against the current value, and exchange only if different + VECMEM_HOST_AND_DEVICE + bool compare_exchange_strong(reference expected, value_type desired, + memory_order success, + memory_order failure) const; + /// Compare against the current value, and exchange only if different + VECMEM_HOST_AND_DEVICE + bool compare_exchange_strong( + reference expected, value_type desired, + memory_order order = memory_order::seq_cst) const; + + /// @} + + /// @name Value modifier functions + /// @{ + + /// Add a chosen value to the stored variable + VECMEM_HOST_AND_DEVICE + value_type fetch_add(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Substitute a chosen value from the stored variable + VECMEM_HOST_AND_DEVICE + value_type fetch_sub(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// Replace the current value with the specified value AND-ed to it + VECMEM_HOST_AND_DEVICE + value_type fetch_and(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Replace the current value with the specified value OR-d to it + VECMEM_HOST_AND_DEVICE + value_type fetch_or(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Replace the current value with the specified value XOR-d to it + VECMEM_HOST_AND_DEVICE + value_type fetch_xor(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// @} + +private: + /// Pointer to the value to perform atomic operations on + pointer m_ptr; + +}; // class dummy_device_atomic_ref + +} // namespace vecmem + +// Include the implementation. +#include "vecmem/memory/impl/dummy_device_atomic_ref.ipp" diff --git a/core/include/vecmem/memory/details/hip_device_atomic_ref.hpp b/core/include/vecmem/memory/details/hip_device_atomic_ref.hpp new file mode 100644 index 00000000..7b745b16 --- /dev/null +++ b/core/include/vecmem/memory/details/hip_device_atomic_ref.hpp @@ -0,0 +1,33 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// HIP include(s). +#include + +// Set up __VECMEM_THREADFENCE correctly for the vecmem::hip::device_atomic_ref +// code. +#ifdef __HIP_DEVICE_COMPILE__ +#define __VECMEM_THREADFENCE __threadfence() +#else +#define __VECMEM_THREADFENCE +#endif // defined(__HIP_DEVICE_COMPILE__) + +// Local include(s). +#include "vecmem/memory/details/cuda_device_atomic_ref.hpp" + +namespace vecmem { +namespace hip { + +/// Use @c vecmem::cuda::device_atomic_ref for HIP code as well +template +using device_atomic_ref = cuda::device_atomic_ref; + +} // namespace hip +} // namespace vecmem diff --git a/core/include/vecmem/memory/details/posix_device_atomic_ref.hpp b/core/include/vecmem/memory/details/posix_device_atomic_ref.hpp new file mode 100644 index 00000000..85119df8 --- /dev/null +++ b/core/include/vecmem/memory/details/posix_device_atomic_ref.hpp @@ -0,0 +1,135 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// Local include(s). +#include "vecmem/memory/device_address_space.hpp" +#include "vecmem/memory/memory_order.hpp" +#include "vecmem/utils/types.hpp" + +// System include(s). +#include + +namespace vecmem { + +/// Custom implementation for atomic operations using "POSIX" built-ins +/// +/// @tparam T Type to perform atomic operations on +/// @tparam address The device address space to use +/// +template +class posix_device_atomic_ref { + +public: + /// @name Type definitions + /// @{ + + /// Type managed by the object + typedef T value_type; + /// Difference between two objects + typedef value_type difference_type; + /// Pointer to the value in global memory + typedef value_type* pointer; + /// Reference to a value given by the user + typedef value_type& reference; + + /// @} + + /// @name Check(s) on the value type + /// @{ + + static_assert(std::is_integral::value, + "vecmem::posix_device_atomic_ref only accepts built-in " + "integral types"); + + /// @} + + /// Constructor, with a pointer to the managed variable + VECMEM_HOST + explicit posix_device_atomic_ref(reference ref); + /// Copy constructor + VECMEM_HOST + posix_device_atomic_ref(const posix_device_atomic_ref& parent); + + /// Disable the assignment operator + posix_device_atomic_ref& operator=(const posix_device_atomic_ref&) = delete; + + /// @name Value setter/getter functions + /// @{ + + /// Assigns a value desired to the referenced object + /// + /// @see vecmem::cuda::atomic_ref::store + /// + VECMEM_HOST + value_type operator=(value_type data) const; + + /// Set the variable to the desired value + VECMEM_HOST + void store(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Get the value of the variable + VECMEM_HOST + value_type load(memory_order order = memory_order::seq_cst) const; + + /// Exchange the current value of the variable with a different one + VECMEM_HOST + value_type exchange(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// Compare against the current value, and exchange only if different + VECMEM_HOST + bool compare_exchange_strong(reference expected, value_type desired, + memory_order success, + memory_order failure) const; + /// Compare against the current value, and exchange only if different + VECMEM_HOST + bool compare_exchange_strong( + reference expected, value_type desired, + memory_order order = memory_order::seq_cst) const; + + /// @} + + /// @name Value modifier functions + /// @{ + + /// Add a chosen value to the stored variable + VECMEM_HOST + value_type fetch_add(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Substitute a chosen value from the stored variable + VECMEM_HOST + value_type fetch_sub(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// Replace the current value with the specified value AND-ed to it + VECMEM_HOST + value_type fetch_and(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Replace the current value with the specified value OR-d to it + VECMEM_HOST + value_type fetch_or(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Replace the current value with the specified value XOR-d to it + VECMEM_HOST + value_type fetch_xor(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// @} + +private: + /// Pointer to the value to perform atomic operations on + pointer m_ptr; + +}; // class posix_device_atomic_ref + +} // namespace vecmem + +// Include the implementation. +#include "vecmem/memory/impl/posix_device_atomic_ref.ipp" diff --git a/core/include/vecmem/memory/details/sycl_builtin_device_atomic_ref.hpp b/core/include/vecmem/memory/details/sycl_builtin_device_atomic_ref.hpp new file mode 100644 index 00000000..30439324 --- /dev/null +++ b/core/include/vecmem/memory/details/sycl_builtin_device_atomic_ref.hpp @@ -0,0 +1,60 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// Local include(s). +#include "vecmem/memory/device_address_space.hpp" + +// SYCL include(s). +#include + +namespace vecmem { +namespace sycl { +namespace details { + +/// Helper trait for setting up an atomic reference on global or local memory +/// +/// @tparam address The address space to use +/// +template +struct builtin_address_space {}; + +/// Specialization for global device memory +template <> +struct builtin_address_space { + static constexpr cl::sycl::memory_order ord = + cl::sycl::memory_order::relaxed; + static constexpr cl::sycl::memory_scope scp = + cl::sycl::memory_scope::device; + static constexpr cl::sycl::access::address_space add = + cl::sycl::access::address_space::global_space; +}; + +/// Specialization for local device memory +template <> +struct builtin_address_space { + static constexpr cl::sycl::memory_order ord = + cl::sycl::memory_order::relaxed; + static constexpr cl::sycl::memory_scope scp = + cl::sycl::memory_scope::work_group; + static constexpr cl::sycl::access::address_space add = + cl::sycl::access::address_space::local_space; +}; + +} // namespace details + +/// Atomic reference based on @c ::sycl::atomic_ref +template +using builtin_device_atomic_ref = + cl::sycl::atomic_ref::ord, + details::builtin_address_space
::scp, + details::builtin_address_space
::add>; + +} // namespace sycl +} // namespace vecmem diff --git a/core/include/vecmem/memory/details/sycl_custom_device_atomic_ref.hpp b/core/include/vecmem/memory/details/sycl_custom_device_atomic_ref.hpp new file mode 100644 index 00000000..f74f0ff6 --- /dev/null +++ b/core/include/vecmem/memory/details/sycl_custom_device_atomic_ref.hpp @@ -0,0 +1,125 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// Local include(s). +#include "vecmem/memory/device_address_space.hpp" +#include "vecmem/memory/memory_order.hpp" + +// System include(s). +#include + +namespace vecmem { +namespace sycl { + +/// Custom implementation for atomic operations in SYCL device code +/// +/// @tparam T Type to perform atomic operations on +/// @tparam address The device address space to use +/// +template +class custom_device_atomic_ref { + +public: + /// @name Type definitions + /// @{ + + /// Type managed by the object + typedef T value_type; + /// Difference between two objects + typedef value_type difference_type; + /// Pointer to the value in global memory + typedef value_type* pointer; + /// Reference to a value given by the user + typedef value_type& reference; + + /// @} + + /// @name Check(s) on the value type + /// @{ + + static_assert( + std::is_integral::value, + "vecmem::sycl::custom_device_atomic_ref only accepts built-in " + "integral types"); + + /// @} + + /// Constructor, with a pointer to the managed variable + explicit custom_device_atomic_ref(reference ref); + /// Copy constructor + custom_device_atomic_ref(const custom_device_atomic_ref& parent); + + /// Disable the assignment operator + custom_device_atomic_ref& operator=(const custom_device_atomic_ref&) = + delete; + + /// @name Value setter/getter functions + /// @{ + + /// Assigns a value desired to the referenced object + /// + /// @see vecmem::device_atomic_ref::store + /// + value_type operator=(value_type data) const; + + /// Set the variable to the desired value + void store(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Get the value of the variable + value_type load(memory_order order = memory_order::seq_cst) const; + + /// Exchange the current value of the variable with a different one + value_type exchange(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// Compare against the current value, and exchange only if different + bool compare_exchange_strong(reference expected, value_type desired, + memory_order success, + memory_order failure) const; + /// Compare against the current value, and exchange only if different + bool compare_exchange_strong( + reference expected, value_type desired, + memory_order order = memory_order::seq_cst) const; + + /// @} + + /// @name Value modifier functions + /// @{ + + /// Add a chosen value to the stored variable + value_type fetch_add(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Substitute a chosen value from the stored variable + value_type fetch_sub(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// Replace the current value with the specified value AND-ed to it + value_type fetch_and(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Replace the current value with the specified value OR-d to it + value_type fetch_or(value_type data, + memory_order order = memory_order::seq_cst) const; + /// Replace the current value with the specified value XOR-d to it + value_type fetch_xor(value_type data, + memory_order order = memory_order::seq_cst) const; + + /// @} + +private: + /// Pointer to the value to perform atomic operations on + pointer m_ptr; + +}; // class custom_device_atomic_ref + +} // namespace sycl +} // namespace vecmem + +// Include the implementation. +#include "vecmem/memory/impl/sycl_custom_device_atomic_ref.ipp" diff --git a/core/include/vecmem/memory/device_address_space.hpp b/core/include/vecmem/memory/device_address_space.hpp new file mode 100644 index 00000000..a5fb1da2 --- /dev/null +++ b/core/include/vecmem/memory/device_address_space.hpp @@ -0,0 +1,14 @@ +/* VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +namespace vecmem { + +/// Custom definition for the device memory adress space +enum class device_address_space { global = 0, local = 1 }; + +} // namespace vecmem diff --git a/core/include/vecmem/memory/device_atomic_ref.hpp b/core/include/vecmem/memory/device_atomic_ref.hpp index 01afc054..69a895bc 100644 --- a/core/include/vecmem/memory/device_atomic_ref.hpp +++ b/core/include/vecmem/memory/device_atomic_ref.hpp @@ -1,202 +1,117 @@ /* * VecMem project, part of the ACTS project (R&D line) * - * (c) 2022-2023 CERN for the benefit of the ACTS project + * (c) 2022-2024 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ #pragma once +// Local include(s). +#include "vecmem/memory/device_address_space.hpp" + // System include(s). #include -// vecmem includes -#include "vecmem/memory/memory_order.hpp" +#if defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) +#if defined(VECMEM_HAVE_SYCL_ATOMIC_REF) + +// Local include(s). +#include "vecmem/memory/details/sycl_builtin_device_atomic_ref.hpp" namespace vecmem { -/// Custom definition for the adress space -enum class device_address_space { global = 0, local = 1 }; +/// Use @c vecmem::sycl::builtin_device_atomic_ref with modern SYCL code +template +using device_atomic_ref = sycl::builtin_device_atomic_ref; } // namespace vecmem -#if (defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION)) && \ - defined(VECMEM_HAVE_SYCL_ATOMIC_REF) +#else // defined(VECMEM_HAVE_SYCL_ATOMIC_REF) -// SYCL include(s). -#include +// Local include(s). +#include "vecmem/memory/details/sycl_custom_device_atomic_ref.hpp" namespace vecmem { -namespace details { -template -struct sycl_address_space {}; - -template <> -struct sycl_address_space { - static constexpr ::sycl::memory_order ord = ::sycl::memory_order::relaxed; - static constexpr ::sycl::memory_scope scp = ::sycl::memory_scope::device; - static constexpr ::sycl::access::address_space add = - ::sycl::access::address_space::global_space; -}; -template <> -struct sycl_address_space { - static constexpr ::sycl::memory_order ord = ::sycl::memory_order::relaxed; - static constexpr ::sycl::memory_scope scp = - ::sycl::memory_scope::work_group; - static constexpr ::sycl::access::address_space add = - ::sycl::access::address_space::local_space; -}; -} // namespace details - -/// @c vecmem::atomic_ref equals @c sycl::atomic_ref with "modern SYCL" + +/// Use @c vecmem::sycl::custom_device_atomic_ref with older SYCL code template -using device_atomic_ref = - ::sycl::atomic_ref::ord, - details::sycl_address_space
::scp, - details::sycl_address_space
::add>; +using device_atomic_ref = sycl::custom_device_atomic_ref; } // namespace vecmem -#elif ((!defined(__CUDA_ARCH__)) && (!defined(__HIP_DEVICE_COMPILE__)) && \ - (!defined(CL_SYCL_LANGUAGE_VERSION)) && \ - (!defined(SYCL_LANGUAGE_VERSION)) && __cpp_lib_atomic_ref) +#endif // defined(VECMEM_HAVE_SYCL_ATOMIC_REF) +#elif defined(__CUDACC__) + +// Local include(s). +#include "vecmem/memory/details/cuda_device_atomic_ref.hpp" namespace vecmem { -/// @c vecmem::atomic_ref equals @c std::atomic_ref in host code with C++20 + +/// Use @c vecmem::cuda::device_atomic_ref in CUDA device code +template +using device_atomic_ref = cuda::device_atomic_ref; + +} // namespace vecmem + +#elif defined(__HIPCC__) + +// Local include(s). +#include "vecmem/memory/details/hip_device_atomic_ref.hpp" + +namespace vecmem { + +/// Use @c vecmem::hip::device_atomic_ref in HIP device code +template +using device_atomic_ref = hip::device_atomic_ref; + +} // namespace vecmem + +#elif defined(__cpp_lib_atomic_ref) + +namespace vecmem { + +/// Use @c std::atomic_ref in host code with C++20 template using device_atomic_ref = std::atomic_ref; } // namespace vecmem -#else +#elif defined(VECMEM_SUPPORT_POSIX_ATOMIC_REF) -// VecMem include(s). -#include "vecmem/utils/types.hpp" +// Local include(s). +#include "vecmem/memory/details/posix_device_atomic_ref.hpp" -// System include(s). -#include +namespace vecmem { + +/// Use @c vecmem::posix_device_atomic_ref with POSIX threads +template +using device_atomic_ref = posix_device_atomic_ref; + +} // namespace vecmem + +#else + +// Local include(s). +#include "vecmem/memory/details/dummy_device_atomic_ref.hpp" namespace vecmem { -/// Class providing atomic operations for the VecMem code -/// -/// It is only meant to be used with primitive types. Ones that CUDA, HIP and -/// SYCL built-in functions exist for. So no structs, or even pointers. -/// -/// Note that it does not perform atomic operations in host code! That is only -/// implemented with @c std::atomic_ref in C++20. With earlier C++ standards all -/// operations in host code are performed as "regular" operations. -/// -template -class device_atomic_ref { - -public: - /// @name Type definitions - /// @{ - - /// Type managed by the object - typedef T value_type; - /// Difference between two objects - typedef value_type difference_type; - /// Pointer to the value in global memory - typedef value_type* pointer; - /// Reference to a value given by the user - typedef value_type& reference; - - /// @} - - /// @name Check(s) on the value type - /// @{ - - static_assert(std::is_integral::value, - "vecmem::atomic_ref only accepts built-in integral types"); - - /// @} - - /// Constructor, with a pointer to the managed variable - VECMEM_HOST_AND_DEVICE - explicit device_atomic_ref(reference ref); - /// Copy constructor - VECMEM_HOST_AND_DEVICE - device_atomic_ref(const device_atomic_ref& parent); - - /// Disable the assignment operator - device_atomic_ref& operator=(const device_atomic_ref&) = delete; - - /// @name Value setter/getter functions - /// @{ - - /// Assigns a value desired to the referenced object - /// - /// @see vecmem::device_atomic_ref::store - /// - VECMEM_HOST_AND_DEVICE - value_type operator=(value_type data) const; - - /// Set the variable to the desired value - VECMEM_HOST_AND_DEVICE - void store(value_type data, - memory_order order = memory_order::seq_cst) const; - /// Get the value of the variable - VECMEM_HOST_AND_DEVICE - value_type load(memory_order order = memory_order::seq_cst) const; - - /// Exchange the current value of the variable with a different one - VECMEM_HOST_AND_DEVICE - value_type exchange(value_type data, - memory_order order = memory_order::seq_cst) const; - - /// Compare against the current value, and exchange only if different - VECMEM_HOST_AND_DEVICE - bool compare_exchange_strong(reference expected, value_type desired, - memory_order success, - memory_order failure) const; - /// Compare against the current value, and exchange only if different - VECMEM_HOST_AND_DEVICE - bool compare_exchange_strong( - reference expected, value_type desired, - memory_order order = memory_order::seq_cst) const; - - /// @} - - /// @name Value modifier functions - /// @{ - - /// Add a chosen value to the stored variable - VECMEM_HOST_AND_DEVICE - value_type fetch_add(value_type data, - memory_order order = memory_order::seq_cst) const; - /// Substitute a chosen value from the stored variable - VECMEM_HOST_AND_DEVICE - value_type fetch_sub(value_type data, - memory_order order = memory_order::seq_cst) const; - - /// Replace the current value with the specified value AND-ed to it - VECMEM_HOST_AND_DEVICE - value_type fetch_and(value_type data, - memory_order order = memory_order::seq_cst) const; - /// Replace the current value with the specified value OR-d to it - VECMEM_HOST_AND_DEVICE - value_type fetch_or(value_type data, - memory_order order = memory_order::seq_cst) const; - /// Replace the current value with the specified value XOR-d to it - VECMEM_HOST_AND_DEVICE - value_type fetch_xor(value_type data, - memory_order order = memory_order::seq_cst) const; - - /// @} - -private: - /// Pointer to the value to perform atomic operations on - pointer m_ptr; - -}; // class device_atomic_ref + +/// Use @c vecmem::dummy_device_atomic_ref as a fallback +template +using device_atomic_ref = dummy_device_atomic_ref; } // namespace vecmem -// Include the implementation. -#include "vecmem/memory/impl/device_atomic_ref.ipp" +#endif +// Test that the selected class would fulfill the atomic_ref concept. #if __cpp_concepts >= 201907L #include "vecmem/concepts/atomic_ref.hpp" static_assert( @@ -209,5 +124,3 @@ static_assert( vecmem::concepts::atomic_ref >, "Atomic reference on std::size_t is incompletely defined."); #endif - -#endif // Platform selection diff --git a/core/include/vecmem/memory/impl/cuda_device_atomic_ref.ipp b/core/include/vecmem/memory/impl/cuda_device_atomic_ref.ipp new file mode 100644 index 00000000..9fb854f0 --- /dev/null +++ b/core/include/vecmem/memory/impl/cuda_device_atomic_ref.ipp @@ -0,0 +1,149 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// System include(s). +#include + +namespace vecmem { +namespace cuda { + +template +VECMEM_HOST_AND_DEVICE device_atomic_ref::device_atomic_ref( + reference ref) + : m_ptr(&ref) {} + +template +VECMEM_HOST_AND_DEVICE device_atomic_ref::device_atomic_ref( + const device_atomic_ref& parent) + : m_ptr(parent.m_ptr) {} + +template +VECMEM_HOST_AND_DEVICE auto device_atomic_ref::operator=( + value_type data) const -> value_type { + + store(data); + return data; +} + +// Only invoke __threadfence() during device code compilation. Without this, +// nvcc gets upset about calling this **device only** function from a function +// labeled HOST_AND_DEVICE. Allow an outside source to set the macro, so that +// vecmem::hip::device_atomic_ref could have its own logic for setting it up +// correctly. +#ifndef __VECMEM_THREADFENCE +#ifdef __CUDA_ARCH__ +#define __VECMEM_THREADFENCE __threadfence() +#else +#define __VECMEM_THREADFENCE +#endif // defined(__CUDA_ARCH__) +#endif // not defined(__VECMEM_THREADFENCE) + +template +VECMEM_HOST_AND_DEVICE void device_atomic_ref::store( + value_type data, memory_order) const { + + volatile pointer addr = m_ptr; + __VECMEM_THREADFENCE; + *addr = data; +} + +template +VECMEM_HOST_AND_DEVICE auto device_atomic_ref::load( + memory_order) const -> value_type { + + volatile pointer addr = m_ptr; + __VECMEM_THREADFENCE; + const value_type value = *addr; + __VECMEM_THREADFENCE; + return value; +} + +#undef __VECMEM_THREADFENCE + +template +VECMEM_HOST_AND_DEVICE auto device_atomic_ref::exchange( + value_type data, memory_order) const -> value_type { + + return atomicExch(m_ptr, data); +} + +template +VECMEM_HOST_AND_DEVICE bool +device_atomic_ref::compare_exchange_strong( + reference expected, value_type desired, memory_order order) const { + + if (order == memory_order::acq_rel) { + return compare_exchange_strong(expected, desired, order, + memory_order::acquire); + } else if (order == memory_order::release) { + return compare_exchange_strong(expected, desired, order, + memory_order::relaxed); + } else { + return compare_exchange_strong(expected, desired, order, order); + } +} + +template +VECMEM_HOST_AND_DEVICE bool +device_atomic_ref::compare_exchange_strong( + reference expected, value_type desired, memory_order, + memory_order failure) const { + + (void)failure; + assert(failure != memory_order::release && + failure != memory_order::acq_rel); + + const value_type r = atomicCAS(m_ptr, expected, desired); + // atomicCAS returns the old value, so the change will have succeeded if + // the old value was the expected value. + if (r == expected) { + return true; + } else { + expected = r; + return false; + } +} + +template +VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_add( + value_type data, memory_order) const -> value_type { + + return atomicAdd(m_ptr, data); +} + +template +VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_sub( + value_type data, memory_order) const -> value_type { + + return atomicSub(m_ptr, data); +} + +template +VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_and( + value_type data, memory_order) const -> value_type { + + return atomicAnd(m_ptr, data); +} + +template +VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_or( + value_type data, memory_order) const -> value_type { + + return atomicOr(m_ptr, data); +} + +template +VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_xor( + value_type data, memory_order) const -> value_type { + + return atomicXor(m_ptr, data); +} + +} // namespace cuda +} // namespace vecmem diff --git a/core/include/vecmem/memory/impl/device_atomic_ref.ipp b/core/include/vecmem/memory/impl/device_atomic_ref.ipp deleted file mode 100644 index 1efd2eff..00000000 --- a/core/include/vecmem/memory/impl/device_atomic_ref.ipp +++ /dev/null @@ -1,420 +0,0 @@ -/* - * VecMem project, part of the ACTS project (R&D line) - * - * (c) 2022-2024 CERN for the benefit of the ACTS project - * - * Mozilla Public License Version 2.0 - */ -#pragma once - -#include -#include - -// vecmem includes -#include "vecmem/memory/memory_order.hpp" - -// HIP include -#if defined(__HIP_DEVICE_COMPILE__) -#include -#endif - -// SYCL include(s). -#if defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) -#include -#endif - -/// Helpers for explicit calls to the SYCL atomic functions -#if defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - -namespace vecmem::details { -template -struct sycl_address_space {}; - -template <> -struct sycl_address_space { - static constexpr cl::sycl::access::address_space add = - cl::sycl::access::address_space::global_space; - - template - using ptr_t = cl::sycl::global_ptr; -}; -template <> -struct sycl_address_space { - static constexpr cl::sycl::access::address_space add = - cl::sycl::access::address_space::local_space; - template - using ptr_t = cl::sycl::local_ptr; -}; -} // namespace vecmem::details - -#define __VECMEM_SYCL_ATOMIC_CALL0(FNAME, PTR) \ - cl::sycl::atomic_##FNAME::add>( \ - cl::sycl::atomic::add>( \ - typename details::sycl_address_space
::template ptr_t< \ - value_type>(PTR))) -#define __VECMEM_SYCL_ATOMIC_CALL1(FNAME, PTR, ARG1) \ - cl::sycl::atomic_##FNAME::add>( \ - cl::sycl::atomic::add>( \ - typename details::sycl_address_space
::template ptr_t< \ - value_type>(PTR)), \ - ARG1) -#define __VECMEM_SYCL_ATOMIC_CALL2(FNAME, PTR, ARG1, ARG2) \ - cl::sycl::atomic_##FNAME::add>( \ - cl::sycl::atomic::add>( \ - typename details::sycl_address_space
::template ptr_t< \ - value_type>(PTR)), \ - ARG1, ARG2) -#endif - -#if defined __has_builtin -#if __has_builtin(__atomic_load_n) -#define VECMEM_HAVE_BUILTIN_ATOMIC_LOAD -#endif -#if __has_builtin(__atomic_store_n) -#define VECMEM_HAVE_BUILTIN_ATOMIC_STORE -#endif -#if __has_builtin(__atomic_exchange_n) -#define VECMEM_HAVE_BUILTIN_ATOMIC_EXCHANGE -#endif -#if __has_builtin(__atomic_compare_exchange_n) -#define VECMEM_HAVE_BUILTIN_ATOMIC_COMPARE_EXCHANGE -#endif -#if __has_builtin(__atomic_fetch_add) -#define VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_ADD -#endif -#if __has_builtin(__atomic_fetch_sub) -#define VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_SUB -#endif -#if __has_builtin(__atomic_fetch_and) -#define VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_AND -#endif -#if __has_builtin(__atomic_fetch_xor) -#define VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_XOR -#endif -#if __has_builtin(__atomic_fetch_or) -#define VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_OR -#endif -#endif - -namespace vecmem { - -#if defined(__ATOMIC_RELAXED) && defined(__ATOMIC_CONSUME) && \ - defined(__ATOMIC_ACQUIRE) && defined(__ATOMIC_RELEASE) && \ - defined(__ATOMIC_ACQ_REL) && defined(__ATOMIC_SEQ_CST) -constexpr int __memorder_vecmem_to_builtin(memory_order o) { - switch (o) { - case memory_order::relaxed: - return __ATOMIC_RELAXED; - case memory_order::consume: - return __ATOMIC_CONSUME; - case memory_order::acquire: - return __ATOMIC_ACQUIRE; - case memory_order::release: - return __ATOMIC_RELEASE; - case memory_order::acq_rel: - return __ATOMIC_ACQ_REL; - case memory_order::seq_cst: - return __ATOMIC_SEQ_CST; - default: - assert(false); - return 0; - } -} -#define VECMEM_HAVE_MEMORDER_DEFINITIONS -#endif - -template -VECMEM_HOST_AND_DEVICE device_atomic_ref::device_atomic_ref( - reference ref) - : m_ptr(&ref) {} - -template -VECMEM_HOST_AND_DEVICE device_atomic_ref::device_atomic_ref( - const device_atomic_ref& parent) - : m_ptr(parent.m_ptr) {} - -template -VECMEM_HOST_AND_DEVICE auto device_atomic_ref::operator=( - value_type data) const -> value_type { - - store(data); - return data; -} - -template -VECMEM_HOST_AND_DEVICE void device_atomic_ref::store( - value_type data, memory_order order) const { - (void)order; -#if (defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)) && \ - (!(defined(SYCL_LANGUAGE_VERSION) || defined(CL_SYCL_LANGUAGE_VERSION))) - volatile pointer addr = m_ptr; - __threadfence(); - *addr = data; -#elif defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - __VECMEM_SYCL_ATOMIC_CALL1(store, m_ptr, data); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_STORE) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - __atomic_store_n(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ - defined(__CUDACC__) - __atomic_store_n(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#else - exchange(data, order); -#endif -} - -template -VECMEM_HOST_AND_DEVICE auto device_atomic_ref::load( - memory_order order) const -> value_type { - (void)order; -#if (defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)) && \ - (!(defined(SYCL_LANGUAGE_VERSION) || defined(CL_SYCL_LANGUAGE_VERSION))) - volatile pointer addr = m_ptr; - __threadfence(); - const value_type value = *addr; - __threadfence(); - return value; -#elif defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - return __VECMEM_SYCL_ATOMIC_CALL0(load, m_ptr); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_LOAD) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - return __atomic_load_n(m_ptr, __memorder_vecmem_to_builtin(order)); -#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ - defined(__CUDACC__) - return __atomic_load_n(m_ptr, __memorder_vecmem_to_builtin(order)); -#else - value_type tmp = 0; - compare_exchange_strong(tmp, tmp, order); - return tmp; -#endif -} - -template -VECMEM_HOST_AND_DEVICE auto device_atomic_ref::exchange( - value_type data, memory_order order) const -> value_type { - (void)order; -#if (defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)) && \ - (!(defined(SYCL_LANGUAGE_VERSION) || defined(CL_SYCL_LANGUAGE_VERSION))) - return atomicExch(m_ptr, data); -#elif defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - return __VECMEM_SYCL_ATOMIC_CALL1(exchange, m_ptr, data); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_EXCHANGE) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - return __atomic_exchange_n(m_ptr, data, - __memorder_vecmem_to_builtin(order)); -#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ - defined(__CUDACC__) - return __atomic_exchange_n(m_ptr, data, - __memorder_vecmem_to_builtin(order)); -#else - value_type tmp = load(); - while (!compare_exchange_strong(tmp, data, order)) - ; - return tmp; -#endif -} - -template -VECMEM_HOST_AND_DEVICE bool -device_atomic_ref::compare_exchange_strong( - reference expected, value_type desired, memory_order order) const { - if (order == memory_order::acq_rel) { - return compare_exchange_strong(expected, desired, order, - memory_order::acquire); - } else if (order == memory_order::release) { - return compare_exchange_strong(expected, desired, order, - memory_order::relaxed); - } else { - return compare_exchange_strong(expected, desired, order, order); - } -} - -template -VECMEM_HOST_AND_DEVICE bool -device_atomic_ref::compare_exchange_strong( - reference expected, value_type desired, memory_order success, - memory_order failure) const { - (void)success, (void)failure; - assert(failure != memory_order::release && - failure != memory_order::acq_rel); -#if (defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)) && \ - (!(defined(SYCL_LANGUAGE_VERSION) || defined(CL_SYCL_LANGUAGE_VERSION))) - value_type r = atomicCAS(m_ptr, expected, desired); - // atomicCAS returns the old value, so the change will have succeeded if - // the old value was the expected value. - if (r == expected) { - return true; - } else { - expected = r; - return false; - } -#elif defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - return __VECMEM_SYCL_ATOMIC_CALL2(compare_exchange_strong, m_ptr, expected, - desired); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_CAS) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - return __atomic_compare_exchange_n(m_ptr, &expected, desired, false, - __memorder_vecmem_to_builtin(success), - __memorder_vecmem_to_builtin(failure)); -#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ - defined(__CUDACC__) - return __atomic_compare_exchange_n(m_ptr, &expected, desired, false, - __memorder_vecmem_to_builtin(success), - __memorder_vecmem_to_builtin(failure)); -#else - // This is **NOT** a sane implementation of CAS! - value_type old = *m_ptr; - if (old == expected) { - *m_ptr = desired; - return true; - } else { - expected = old; - return false; - } -#endif -} - -template -VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_add( - value_type data, memory_order order) const -> value_type { - (void)order; -#if (defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)) && \ - (!(defined(SYCL_LANGUAGE_VERSION) || defined(CL_SYCL_LANGUAGE_VERSION))) - return atomicAdd(m_ptr, data); -#elif defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - return __VECMEM_SYCL_ATOMIC_CALL1(fetch_add, m_ptr, data); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_ADD) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - return __atomic_fetch_add(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ - defined(__CUDACC__) - return __atomic_fetch_add(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#else - value_type tmp = load(); - while (!compare_exchange_strong(tmp, tmp + data, order)) - ; - return tmp; -#endif -} - -template -VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_sub( - value_type data, memory_order order) const -> value_type { - (void)order; -#if (defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)) && \ - (!(defined(SYCL_LANGUAGE_VERSION) || defined(CL_SYCL_LANGUAGE_VERSION))) - return atomicSub(m_ptr, data); -#elif defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - return __VECMEM_SYCL_ATOMIC_CALL1(fetch_sub, m_ptr, data); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_SUB) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - return __atomic_fetch_sub(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_ADD) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - return __atomic_fetch_add(m_ptr, -data, - __memorder_vecmem_to_builtin(order)); -#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ - defined(__CUDACC__) - return __atomic_fetch_sub(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#else - value_type tmp = load(); - while (!compare_exchange_strong(tmp, tmp - data, order)) - ; - return tmp; -#endif -} - -template -VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_and( - value_type data, memory_order order) const -> value_type { - (void)order; -#if (defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)) && \ - (!(defined(SYCL_LANGUAGE_VERSION) || defined(CL_SYCL_LANGUAGE_VERSION))) - return atomicAnd(m_ptr, data); -#elif defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - return __VECMEM_SYCL_ATOMIC_CALL1(fetch_and, m_ptr, data); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_AND) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - return __atomic_fetch_and(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ - defined(__CUDACC__) - return __atomic_fetch_and(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#else - value_type tmp = load(); - while (!compare_exchange_strong(tmp, tmp & data, order)) - ; - return tmp; -#endif -} - -template -VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_or( - value_type data, memory_order order) const -> value_type { - (void)order; -#if (defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)) && \ - (!(defined(SYCL_LANGUAGE_VERSION) || defined(CL_SYCL_LANGUAGE_VERSION))) - return atomicOr(m_ptr, data); -#elif defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - return __VECMEM_SYCL_ATOMIC_CALL1(fetch_or, m_ptr, data); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_OR) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - return __atomic_fetch_or(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ - defined(__CUDACC__) - return __atomic_fetch_or(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#else - value_type tmp = load(); - while (!compare_exchange_strong(tmp, tmp | data, order)) - ; - return tmp; -#endif -} - -template -VECMEM_HOST_AND_DEVICE auto device_atomic_ref::fetch_xor( - value_type data, memory_order order) const -> value_type { - (void)order; -#if (defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)) && \ - (!(defined(SYCL_LANGUAGE_VERSION) || defined(CL_SYCL_LANGUAGE_VERSION))) - return atomicXor(m_ptr, data); -#elif defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) - return __VECMEM_SYCL_ATOMIC_CALL1(fetch_xor, m_ptr, data); -#elif defined(VECMEM_HAVE_BUILTIN_ATOMIC_XOR) && \ - defined(VECMEM_HAVE_MEMORDER_DEFINITIONS) - return __atomic_fetch_xor(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ - defined(__CUDACC__) - return __atomic_fetch_xor(m_ptr, data, __memorder_vecmem_to_builtin(order)); -#else - value_type tmp = load(); - while (!compare_exchange_strong(tmp, tmp ^ data, order)) - ; - return tmp; -#endif -} - -} // namespace vecmem - -// Clean up after the SYCL macros. -#if defined(CL_SYCL_LANGUAGE_VERSION) || defined(SYCL_LANGUAGE_VERSION) -#undef __VECMEM_SYCL_ATOMIC_CALL0 -#undef __VECMEM_SYCL_ATOMIC_CALL1 -#undef __VECMEM_SYCL_ATOMIC_CALL2 -#endif - -#undef VECMEM_HAVE_BUILTIN_ATOMIC_LOAD -#undef VECMEM_HAVE_BUILTIN_ATOMIC_STORE -#undef VECMEM_HAVE_BUILTIN_ATOMIC_EXCHANGE -#undef VECMEM_HAVE_BUILTIN_ATOMIC_COMPARE_EXCHANGE -#undef VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_ADD -#undef VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_SUB -#undef VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_AND -#undef VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_XOR -#undef VECMEM_HAVE_BUILTIN_ATOMIC_FETCH_OR -#undef VECMEM_HAVE_MEMORDER_DEFINITIONS diff --git a/core/include/vecmem/memory/impl/dummy_device_atomic_ref.ipp b/core/include/vecmem/memory/impl/dummy_device_atomic_ref.ipp new file mode 100644 index 00000000..0da36947 --- /dev/null +++ b/core/include/vecmem/memory/impl/dummy_device_atomic_ref.ipp @@ -0,0 +1,146 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// Local include(s). +#include "vecmem/memory/memory_order.hpp" +#include "vecmem/utils/types.hpp" + +// System include(s). +#include + +namespace vecmem { + +template +VECMEM_HOST_AND_DEVICE +dummy_device_atomic_ref::dummy_device_atomic_ref(reference ref) + : m_ptr(&ref) {} + +template +VECMEM_HOST_AND_DEVICE +dummy_device_atomic_ref::dummy_device_atomic_ref( + const dummy_device_atomic_ref& parent) + : m_ptr(parent.m_ptr) {} + +template +VECMEM_HOST_AND_DEVICE auto dummy_device_atomic_ref::operator=( + value_type data) const -> value_type { + + store(data); + return data; +} + +template +VECMEM_HOST_AND_DEVICE void dummy_device_atomic_ref::store( + value_type data, memory_order order) const { + + exchange(data, order); +} + +template +VECMEM_HOST_AND_DEVICE auto dummy_device_atomic_ref::load( + memory_order order) const -> value_type { + + value_type tmp = 0; + compare_exchange_strong(tmp, tmp, order); + return tmp; +} + +template +VECMEM_HOST_AND_DEVICE auto dummy_device_atomic_ref::exchange( + value_type data, memory_order order) const -> value_type { + + value_type tmp = load(); + while (!compare_exchange_strong(tmp, data, order)) + ; + return tmp; +} + +template +VECMEM_HOST_AND_DEVICE bool +dummy_device_atomic_ref::compare_exchange_strong( + reference expected, value_type desired, memory_order order) const { + + if (order == memory_order::acq_rel) { + return compare_exchange_strong(expected, desired, order, + memory_order::acquire); + } else if (order == memory_order::release) { + return compare_exchange_strong(expected, desired, order, + memory_order::relaxed); + } else { + return compare_exchange_strong(expected, desired, order, order); + } +} + +template +VECMEM_HOST_AND_DEVICE bool +dummy_device_atomic_ref::compare_exchange_strong( + reference expected, value_type desired, memory_order, memory_order) const { + + // This is **NOT** a sane implementation of CAS! + value_type old = *m_ptr; + if (old == expected) { + *m_ptr = desired; + return true; + } else { + expected = old; + return false; + } +} + +template +VECMEM_HOST_AND_DEVICE auto dummy_device_atomic_ref::fetch_add( + value_type data, memory_order order) const -> value_type { + + value_type tmp = load(); + while (!compare_exchange_strong(tmp, tmp + data, order)) + ; + return tmp; +} + +template +VECMEM_HOST_AND_DEVICE auto dummy_device_atomic_ref::fetch_sub( + value_type data, memory_order order) const -> value_type { + + value_type tmp = load(); + while (!compare_exchange_strong(tmp, tmp - data, order)) + ; + return tmp; +} + +template +VECMEM_HOST_AND_DEVICE auto dummy_device_atomic_ref::fetch_and( + value_type data, memory_order order) const -> value_type { + + value_type tmp = load(); + while (!compare_exchange_strong(tmp, tmp & data, order)) + ; + return tmp; +} + +template +VECMEM_HOST_AND_DEVICE auto dummy_device_atomic_ref::fetch_or( + value_type data, memory_order order) const -> value_type { + + value_type tmp = load(); + while (!compare_exchange_strong(tmp, tmp | data, order)) + ; + return tmp; +} + +template +VECMEM_HOST_AND_DEVICE auto dummy_device_atomic_ref::fetch_xor( + value_type data, memory_order order) const -> value_type { + + value_type tmp = load(); + while (!compare_exchange_strong(tmp, tmp ^ data, order)) + ; + return tmp; +} + +} // namespace vecmem diff --git a/core/include/vecmem/memory/impl/posix_device_atomic_ref.ipp b/core/include/vecmem/memory/impl/posix_device_atomic_ref.ipp new file mode 100644 index 00000000..b4253dde --- /dev/null +++ b/core/include/vecmem/memory/impl/posix_device_atomic_ref.ipp @@ -0,0 +1,151 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// System include(s). +#include + +namespace vecmem { +namespace details { + +/// @brief Convert a memory order to the corresponding builtin memory order +/// @param o The (vecmem) memory order +/// @return The corresponding builtin memory order +/// +constexpr int memorder_to_posix_builtin(memory_order o) { + switch (o) { + case memory_order::relaxed: + return __ATOMIC_RELAXED; + case memory_order::consume: + return __ATOMIC_CONSUME; + case memory_order::acquire: + return __ATOMIC_ACQUIRE; + case memory_order::release: + return __ATOMIC_RELEASE; + case memory_order::acq_rel: + return __ATOMIC_ACQ_REL; + case memory_order::seq_cst: + return __ATOMIC_SEQ_CST; + default: + assert(false); + return 0; + } +} + +} // namespace details + +template +VECMEM_HOST posix_device_atomic_ref::posix_device_atomic_ref( + reference ref) + : m_ptr(&ref) {} + +template +VECMEM_HOST posix_device_atomic_ref::posix_device_atomic_ref( + const posix_device_atomic_ref& parent) + : m_ptr(parent.m_ptr) {} + +template +VECMEM_HOST auto posix_device_atomic_ref::operator=( + value_type data) const -> value_type { + + store(data); + return data; +} + +template +VECMEM_HOST void posix_device_atomic_ref::store( + value_type data, memory_order order) const { + + __atomic_store_n(m_ptr, data, details::memorder_to_posix_builtin(order)); +} + +template +VECMEM_HOST auto posix_device_atomic_ref::load( + memory_order order) const -> value_type { + + return __atomic_load_n(m_ptr, details::memorder_to_posix_builtin(order)); +} + +template +VECMEM_HOST auto posix_device_atomic_ref::exchange( + value_type data, memory_order order) const -> value_type { + + return __atomic_exchange_n(m_ptr, data, + details::memorder_to_posix_builtin(order)); +} + +template +VECMEM_HOST bool posix_device_atomic_ref::compare_exchange_strong( + reference expected, value_type desired, memory_order order) const { + + if (order == memory_order::acq_rel) { + return compare_exchange_strong(expected, desired, order, + memory_order::acquire); + } else if (order == memory_order::release) { + return compare_exchange_strong(expected, desired, order, + memory_order::relaxed); + } else { + return compare_exchange_strong(expected, desired, order, order); + } +} + +template +VECMEM_HOST bool posix_device_atomic_ref::compare_exchange_strong( + reference expected, value_type desired, memory_order success, + memory_order failure) const { + + assert(failure != memory_order::release && + failure != memory_order::acq_rel); + + return __atomic_compare_exchange_n( + m_ptr, &expected, desired, false, + details::memorder_to_posix_builtin(success), + details::memorder_to_posix_builtin(failure)); +} + +template +VECMEM_HOST auto posix_device_atomic_ref::fetch_add( + value_type data, memory_order order) const -> value_type { + + return __atomic_fetch_add(m_ptr, data, + details::memorder_to_posix_builtin(order)); +} + +template +VECMEM_HOST auto posix_device_atomic_ref::fetch_sub( + value_type data, memory_order order) const -> value_type { + + return __atomic_fetch_add(m_ptr, -data, + details::memorder_to_posix_builtin(order)); +} + +template +VECMEM_HOST auto posix_device_atomic_ref::fetch_and( + value_type data, memory_order order) const -> value_type { + + return __atomic_fetch_and(m_ptr, data, + details::memorder_to_posix_builtin(order)); +} + +template +VECMEM_HOST auto posix_device_atomic_ref::fetch_or( + value_type data, memory_order order) const -> value_type { + + return __atomic_fetch_or(m_ptr, data, + details::memorder_to_posix_builtin(order)); +} + +template +VECMEM_HOST auto posix_device_atomic_ref::fetch_xor( + value_type data, memory_order order) const -> value_type { + + return __atomic_fetch_xor(m_ptr, data, + details::memorder_to_posix_builtin(order)); +} + +} // namespace vecmem diff --git a/core/include/vecmem/memory/impl/sycl_custom_device_atomic_ref.ipp b/core/include/vecmem/memory/impl/sycl_custom_device_atomic_ref.ipp new file mode 100644 index 00000000..66ad8c28 --- /dev/null +++ b/core/include/vecmem/memory/impl/sycl_custom_device_atomic_ref.ipp @@ -0,0 +1,175 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ +#pragma once + +// SYCL include(s). +#include + +namespace vecmem { +namespace sycl { +namespace details { + +template +struct custom_address_space {}; + +template <> +struct custom_address_space { + static constexpr cl::sycl::access::address_space add = + cl::sycl::access::address_space::global_space; + + template + using ptr_t = cl::sycl::global_ptr; +}; + +template <> +struct custom_address_space { + static constexpr cl::sycl::access::address_space add = + cl::sycl::access::address_space::local_space; + template + using ptr_t = cl::sycl::local_ptr; +}; + +} // namespace details + +#define __VECMEM_SYCL_ATOMIC_CALL0(FNAME, PTR) \ + cl::sycl::atomic_##FNAME::add>( \ + cl::sycl::atomic::add>( \ + typename details::custom_address_space
::template ptr_t< \ + value_type>(PTR))) +#define __VECMEM_SYCL_ATOMIC_CALL1(FNAME, PTR, ARG1) \ + cl::sycl::atomic_##FNAME::add>( \ + cl::sycl::atomic::add>( \ + typename details::custom_address_space
::template ptr_t< \ + value_type>(PTR)), \ + ARG1) +#define __VECMEM_SYCL_ATOMIC_CALL2(FNAME, PTR, ARG1, ARG2) \ + cl::sycl::atomic_##FNAME::add>( \ + cl::sycl::atomic::add>( \ + typename details::custom_address_space
::template ptr_t< \ + value_type>(PTR)), \ + ARG1, ARG2) + +template +custom_device_atomic_ref::custom_device_atomic_ref(reference ref) + : m_ptr(&ref) {} + +template +custom_device_atomic_ref::custom_device_atomic_ref( + const custom_device_atomic_ref& parent) + : m_ptr(parent.m_ptr) {} + +template +auto custom_device_atomic_ref::operator=(value_type data) const + -> value_type { + + store(data); + return data; +} + +template +void custom_device_atomic_ref::store(value_type data, + memory_order) const { + + __VECMEM_SYCL_ATOMIC_CALL1(store, m_ptr, data); +} + +template +auto custom_device_atomic_ref::load(memory_order) const + -> value_type { + + return __VECMEM_SYCL_ATOMIC_CALL0(load, m_ptr); +} + +template +auto custom_device_atomic_ref::exchange(value_type data, + memory_order) const + -> value_type { + + return __VECMEM_SYCL_ATOMIC_CALL1(exchange, m_ptr, data); +} + +template +bool custom_device_atomic_ref::compare_exchange_strong( + reference expected, value_type desired, memory_order order) const { + + if (order == memory_order::acq_rel) { + return compare_exchange_strong(expected, desired, order, + memory_order::acquire); + } else if (order == memory_order::release) { + return compare_exchange_strong(expected, desired, order, + memory_order::relaxed); + } else { + return compare_exchange_strong(expected, desired, order, order); + } +} + +template +bool custom_device_atomic_ref::compare_exchange_strong( + reference expected, value_type desired, memory_order, + memory_order failure) const { + + (void)failure; + assert(failure != memory_order::release && + failure != memory_order::acq_rel); + + return __VECMEM_SYCL_ATOMIC_CALL2(compare_exchange_strong, m_ptr, expected, + desired); +} + +template +auto custom_device_atomic_ref::fetch_add(value_type data, + memory_order) const + -> value_type { + + return __VECMEM_SYCL_ATOMIC_CALL1(fetch_add, m_ptr, data); +} + +template +auto custom_device_atomic_ref::fetch_sub(value_type data, + memory_order order) const + -> value_type { + + return __VECMEM_SYCL_ATOMIC_CALL1(fetch_sub, m_ptr, data); +} + +template +auto custom_device_atomic_ref::fetch_and(value_type data, + memory_order) const + -> value_type { + + return __VECMEM_SYCL_ATOMIC_CALL1(fetch_and, m_ptr, data); +} + +template +auto custom_device_atomic_ref::fetch_or(value_type data, + memory_order) const + -> value_type { + + return __VECMEM_SYCL_ATOMIC_CALL1(fetch_or, m_ptr, data); +} + +template +auto custom_device_atomic_ref::fetch_xor(value_type data, + memory_order) const + -> value_type { + + return __VECMEM_SYCL_ATOMIC_CALL1(fetch_xor, m_ptr, data); +} + +#undef __VECMEM_SYCL_ATOMIC_CALL0 +#undef __VECMEM_SYCL_ATOMIC_CALL1 +#undef __VECMEM_SYCL_ATOMIC_CALL2 + +} // namespace sycl +} // namespace vecmem