From 3615ee1758859ea38e09a50e5ec5e775f159c0cd Mon Sep 17 00:00:00 2001 From: Attila Krasznahorkay Date: Tue, 25 Jan 2022 12:16:11 +0100 Subject: [PATCH 1/2] Fixed the alignment of the elements in (jagged) vector buffers For elements larger than unsigned int (32 bits), the alignment needs some careful consideration. Otherwise device code would try to access memory through what it considers unaligned pointers. This could be fixed by an appropriate small padding in the vector buffers. --- .../containers/impl/jagged_vector_buffer.ipp | 20 ++++++- .../vecmem/containers/impl/vector_buffer.ipp | 44 +++++++++++++-- tests/cuda/test_cuda_containers.cpp | 24 +++++++- tests/cuda/test_cuda_containers_kernels.cu | 55 ++++++++++++++++++- tests/cuda/test_cuda_containers_kernels.cuh | 8 ++- 5 files changed, 141 insertions(+), 10 deletions(-) diff --git a/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp b/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp index 44b0907d..3d6f1dd4 100644 --- a/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp +++ b/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp @@ -1,7 +1,7 @@ /* * VecMem project, part of the ACTS project (R&D line) * - * (c) 2021 CERN for the benefit of the ACTS project + * (c) 2021-2022 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -27,6 +27,18 @@ std::vector get_sizes( return result; } +/// Get the padding needed after the size array for correct alignment +template +std::size_t get_buffer_alignment_padding(std::size_t size) { + + // Total size of the size array. + const std::size_t size_size = + size * sizeof(typename vecmem::data::jagged_vector_buffer< + TYPE>::value_type::size_type); + // Return the padding needed after the size array. + return (alignof(TYPE) - (size_size % alignof(TYPE))); +} + /// Function allocating memory for @c vecmem::data::jagged_vector_buffer template vecmem::unique_alloc_ptr< @@ -56,7 +68,8 @@ vecmem::unique_alloc_ptr allocate_jagged_buffer_inner_memory( if (isResizable) { byteSize += sizes.size() * sizeof(typename vecmem::data::jagged_vector_buffer< - TYPE>::value_type::size_type); + TYPE>::value_type::size_type) + + get_buffer_alignment_padding(sizes.size()); } return vecmem::make_unique_alloc(resource, byteSize); @@ -131,7 +144,8 @@ jagged_vector_buffer::jagged_vector_buffer( // Set up the host accessible memory array. std::ptrdiff_t ptrdiff = - (capacities.size() * sizeof(typename value_type::size_type)); + (capacities.size() * sizeof(typename value_type::size_type)) + + get_buffer_alignment_padding(capacities.size()); for (std::size_t i = 0; i < capacities.size(); ++i) { new (host_ptr() + i) value_type( static_cast(capacities[i]), diff --git a/core/include/vecmem/containers/impl/vector_buffer.ipp b/core/include/vecmem/containers/impl/vector_buffer.ipp index 2c054bb1..6349816d 100644 --- a/core/include/vecmem/containers/impl/vector_buffer.ipp +++ b/core/include/vecmem/containers/impl/vector_buffer.ipp @@ -1,6 +1,6 @@ /** VecMem project, part of the ACTS project (R&D line) * - * (c) 2021 CERN for the benefit of the ACTS project + * (c) 2021-2022 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -11,6 +11,40 @@ namespace { +/// @name Buffer alignment helper(s) +/// @{ + +/// Trait for determining a possible padding between the vector size variable +/// and the vector payload. +template +struct buffer_alignment_padding; + +/// Specialisation of the trait for "small" vector types +template +struct buffer_alignment_padding< + TYPE, + typename std::enable_if_t<( + alignof(TYPE) <= + alignof(typename vecmem::data::vector_buffer::size_type))> > { + /// Alignment padding value + static constexpr std::size_t value = 0; +}; + +/// Specialisation of the trait for "large" vector types +template +struct buffer_alignment_padding< + TYPE, + typename std::enable_if_t<( + alignof(TYPE) > + alignof(typename vecmem::data::vector_buffer::size_type))> > { + /// Alignment padding value + static constexpr std::size_t value = + alignof(TYPE) - + alignof(typename vecmem::data::vector_buffer::size_type); +}; + +/// @} + /// Function creating the smart pointer for @c vecmem::data::vector_buffer template vecmem::unique_alloc_ptr allocate_buffer_memory( @@ -26,7 +60,8 @@ vecmem::unique_alloc_ptr allocate_buffer_memory( ((capacity == size) ? (capacity * sizeof(TYPE)) : (sizeof(typename vecmem::data::vector_buffer::size_type) + - capacity * sizeof(TYPE))); + capacity * sizeof(TYPE) + + buffer_alignment_padding::value)); if (capacity == 0) { return nullptr; @@ -56,8 +91,9 @@ vector_buffer::vector_buffer(size_type capacity, size_type size, base_type::m_ptr = reinterpret_cast(m_memory.get()); } else { base_type::m_size = reinterpret_cast(m_memory.get()); - base_type::m_ptr = - reinterpret_cast(m_memory.get() + sizeof(size_type)); + base_type::m_ptr = reinterpret_cast( + m_memory.get() + sizeof(size_type) + + buffer_alignment_padding::value); } } } diff --git a/tests/cuda/test_cuda_containers.cpp b/tests/cuda/test_cuda_containers.cpp index 645bd2dc..5cca733f 100644 --- a/tests/cuda/test_cuda_containers.cpp +++ b/tests/cuda/test_cuda_containers.cpp @@ -1,6 +1,6 @@ /** VecMem project, part of the ACTS project (R&D line) * - * (c) 2021 CERN for the benefit of the ACTS project + * (c) 2021-2022 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -8,6 +8,8 @@ // Local include(s). #include "test_cuda_containers_kernels.cuh" #include "vecmem/containers/array.hpp" +#include "vecmem/containers/data/jagged_vector_buffer.hpp" +#include "vecmem/containers/data/vector_buffer.hpp" #include "vecmem/containers/static_array.hpp" #include "vecmem/containers/vector.hpp" #include "vecmem/memory/cuda/device_memory_resource.hpp" @@ -236,3 +238,23 @@ TEST_F(cuda_containers_test, array_memory) { EXPECT_EQ(vec_array.at(2).at(2), 18); EXPECT_EQ(vec_array.at(3).size(), 0u); } + +/// Test buffers with "large" elements (for which alignment becomes important) +TEST_F(cuda_containers_test, large_buffer) { + + // The memory resource(s). + vecmem::cuda::managed_memory_resource managed_resource; + + // Test a (1D) vector. + vecmem::data::vector_buffer buffer1(3, 0, managed_resource); + m_copy.setup(buffer1); + largeBufferTransform(buffer1); + EXPECT_EQ(m_copy.get_size(buffer1), 1u); + + // Test a (2D) jagged vector. + vecmem::data::jagged_vector_buffer buffer2( + {0, 0, 0}, {3, 3, 3}, managed_resource); + m_copy.setup(buffer2); + largeBufferTransform(buffer2); + EXPECT_EQ(m_copy.get_sizes(buffer2), std::vector({0, 1u, 0})); +} diff --git a/tests/cuda/test_cuda_containers_kernels.cu b/tests/cuda/test_cuda_containers_kernels.cu index c4801ad8..6e2abe52 100644 --- a/tests/cuda/test_cuda_containers_kernels.cu +++ b/tests/cuda/test_cuda_containers_kernels.cu @@ -1,6 +1,6 @@ /** VecMem project, part of the ACTS project (R&D line) * - * (c) 2021 CERN for the benefit of the ACTS project + * (c) 2021-2022 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -16,6 +16,9 @@ #include "vecmem/containers/static_array.hpp" #include "vecmem/memory/atomic.hpp" +// System include(s). +#include + /// Kernel performing a linear transformation using the vector helper types __global__ void linearTransformKernel( vecmem::data::vector_view constants, @@ -233,3 +236,53 @@ void arrayTransform( VECMEM_CUDA_ERROR_CHECK(cudaGetLastError()); VECMEM_CUDA_ERROR_CHECK(cudaDeviceSynchronize()); } + +/// Kernel making a trivial use of the resizable vector that it receives +__global__ void largeBufferTransformKernel( + vecmem::data::vector_view data) { + + // Add one element to the vector in just the first thread + const std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; + if (i != 0) { + return; + } + vecmem::device_vector vec(data); + assert(vec.size() == 0); + vec.push_back(0); +} + +void largeBufferTransform(vecmem::data::vector_view data) { + + // Launch the kernel. + largeBufferTransformKernel<<<1, 1>>>(data); + + // Check whether it succeeded to run. + VECMEM_CUDA_ERROR_CHECK(cudaGetLastError()); + VECMEM_CUDA_ERROR_CHECK(cudaDeviceSynchronize()); +} + +/// Kernel making a trivial use of the resizable jagged vector that it receives +__global__ void largeBufferTransformKernel( + vecmem::data::jagged_vector_view data) { + + // Add one element to the vector in just the first thread + const std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; + if (i != 0) { + return; + } + vecmem::jagged_device_vector vec(data); + assert(vec.size() == 3); + assert(vec.at(1).size() == 0); + vec.at(1).push_back(0); +} + +void largeBufferTransform( + vecmem::data::jagged_vector_view data) { + + // Launch the kernel. + largeBufferTransformKernel<<<1, 1>>>(data); + + // Check whether it succeeded to run. + VECMEM_CUDA_ERROR_CHECK(cudaGetLastError()); + VECMEM_CUDA_ERROR_CHECK(cudaDeviceSynchronize()); +} diff --git a/tests/cuda/test_cuda_containers_kernels.cuh b/tests/cuda/test_cuda_containers_kernels.cuh index e01e69a6..a8d83625 100644 --- a/tests/cuda/test_cuda_containers_kernels.cuh +++ b/tests/cuda/test_cuda_containers_kernels.cuh @@ -1,6 +1,6 @@ /** VecMem project, part of the ACTS project (R&D line) * - * (c) 2021 CERN for the benefit of the ACTS project + * (c) 2021-2022 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -46,3 +46,9 @@ void fillTransform(vecmem::data::jagged_vector_view vec); /// Function transforming the elements of an array of vectors void arrayTransform( vecmem::static_array, 4> data); + +/// Function performing a trivial operation on a "large" vector buffer +void largeBufferTransform(vecmem::data::vector_view data); + +/// Function performing a trivial operation on a "large" jagged vector buffer +void largeBufferTransform(vecmem::data::jagged_vector_view data); From 3beb74a23561054a3444699293836178c378985c Mon Sep 17 00:00:00 2001 From: Attila Krasznahorkay Date: Tue, 25 Jan 2022 16:24:27 +0100 Subject: [PATCH 2/2] Switched to using std::align for aligning the buffer payloads. The code still assumes that the memory provided by any memory resource will be appropriate for the unsigned int size variables of the vectors. The alignment is only explicitly performed for the vector elements. --- .../containers/data/jagged_vector_buffer.hpp | 4 +- .../containers/impl/jagged_vector_buffer.ipp | 76 ++++++++----- .../vecmem/containers/impl/vector_buffer.ipp | 104 ++++++------------ 3 files changed, 83 insertions(+), 101 deletions(-) diff --git a/core/include/vecmem/containers/data/jagged_vector_buffer.hpp b/core/include/vecmem/containers/data/jagged_vector_buffer.hpp index dfb485c5..33bf7d96 100644 --- a/core/include/vecmem/containers/data/jagged_vector_buffer.hpp +++ b/core/include/vecmem/containers/data/jagged_vector_buffer.hpp @@ -1,7 +1,7 @@ /* * VecMem project, part of the ACTS project (R&D line) * - * (c) 2021 CERN for the benefit of the ACTS project + * (c) 2021-2022 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -112,6 +112,8 @@ class jagged_vector_buffer : public jagged_vector_view { vecmem::unique_alloc_ptr m_outer_memory; /// Data object for the @c vecmem::data::vector_view array on the host vecmem::unique_alloc_ptr m_outer_host_memory; + /// Size of the buffer held by @c m_inner_memory; + std::size_t m_inner_memory_size; /// Data object owning the memory of the "inner vectors" vecmem::unique_alloc_ptr m_inner_memory; diff --git a/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp b/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp index 3d6f1dd4..ca5d1718 100644 --- a/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp +++ b/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp @@ -10,6 +10,7 @@ // System include(s). #include #include +#include #include #include @@ -27,18 +28,6 @@ std::vector get_sizes( return result; } -/// Get the padding needed after the size array for correct alignment -template -std::size_t get_buffer_alignment_padding(std::size_t size) { - - // Total size of the size array. - const std::size_t size_size = - size * sizeof(typename vecmem::data::jagged_vector_buffer< - TYPE>::value_type::size_type); - // Return the padding needed after the size array. - return (alignof(TYPE) - (size_size % alignof(TYPE))); -} - /// Function allocating memory for @c vecmem::data::jagged_vector_buffer template vecmem::unique_alloc_ptr< @@ -60,19 +49,27 @@ allocate_jagged_buffer_outer_memory( template vecmem::unique_alloc_ptr allocate_jagged_buffer_inner_memory( const std::vector& sizes, vecmem::memory_resource& resource, - bool isResizable) { - typename vecmem::data::jagged_vector_buffer::size_type byteSize = - std::accumulate(sizes.begin(), sizes.end(), - static_cast(0)) * - sizeof(TYPE); + bool isResizable, std::size_t& bufferSize) { + + // Alignment for the vector elements. + static constexpr std::size_t alignment = alignof(TYPE); + + // Determine the allocation size. + bufferSize = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0)) * + sizeof(TYPE); + // Increase this size if the buffer describes a resizable vector. if (isResizable) { - byteSize += + bufferSize += sizes.size() * sizeof(typename vecmem::data::jagged_vector_buffer< - TYPE>::value_type::size_type) + - get_buffer_alignment_padding(sizes.size()); + TYPE>::value_type::size_type); + // Further increase this size so that we could for sure align the + // payload data correctly. + bufferSize = ((bufferSize + alignment - 1) / alignment) * alignment; } - return vecmem::make_unique_alloc(resource, byteSize); + // Return a smart pointer with the allocation. + return vecmem::make_unique_alloc(resource, bufferSize); } } // namespace @@ -101,8 +98,9 @@ jagged_vector_buffer::jagged_vector_buffer( sizes.size(), (host_access_resource == nullptr ? resource : *host_access_resource))), - m_inner_memory( - ::allocate_jagged_buffer_inner_memory(sizes, resource, false)) { + m_inner_memory_size(0u), + m_inner_memory(::allocate_jagged_buffer_inner_memory( + sizes, resource, false, m_inner_memory_size)) { // Point the base class at the newly allocated memory. base_type::m_ptr = @@ -131,8 +129,9 @@ jagged_vector_buffer::jagged_vector_buffer( sizes.size(), (host_access_resource == nullptr ? resource : *host_access_resource))), + m_inner_memory_size(0u), m_inner_memory(::allocate_jagged_buffer_inner_memory( - capacities, resource, true)) { + capacities, resource, true, m_inner_memory_size)) { // Some sanity check. assert(sizes.size() == capacities.size()); @@ -142,17 +141,36 @@ jagged_vector_buffer::jagged_vector_buffer( ((host_access_resource != nullptr) ? m_outer_memory.get() : m_outer_host_memory.get()); - // Set up the host accessible memory array. - std::ptrdiff_t ptrdiff = - (capacities.size() * sizeof(typename value_type::size_type)) + - get_buffer_alignment_padding(capacities.size()); + // Size of the data payload. + const std::size_t dataSize = + std::accumulate(capacities.begin(), capacities.end(), + static_cast(0)) * + sizeof(TYPE); + + // + // Construct a correctly aligned "start pointer" for the data elements. + // + // Construct the unaligned pointer by simply jumping over the "size array". + void* unaligned_start_ptr = + m_inner_memory.get() + + (capacities.size() * sizeof(typename value_type::size_type)); + // The remaining size of the buffer, with the "size array" size removed. + std::size_t buffer_size = + m_inner_memory_size - + (capacities.size() * sizeof(typename value_type::size_type)); + // Construct the "start pointer" using std::align. + char* start_ptr = static_cast( + std::align(alignof(TYPE), dataSize, unaligned_start_ptr, buffer_size)); + + // Set up the vecmem::vector_view objects in the host accessible memory. + std::ptrdiff_t ptrdiff = 0; for (std::size_t i = 0; i < capacities.size(); ++i) { new (host_ptr() + i) value_type( static_cast(capacities[i]), reinterpret_cast( m_inner_memory.get() + i * sizeof(typename value_type::size_type)), - reinterpret_cast(m_inner_memory.get() + ptrdiff)); + reinterpret_cast(start_ptr + ptrdiff)); ptrdiff += capacities[i] * sizeof(TYPE); } } diff --git a/core/include/vecmem/containers/impl/vector_buffer.ipp b/core/include/vecmem/containers/impl/vector_buffer.ipp index 6349816d..072ad220 100644 --- a/core/include/vecmem/containers/impl/vector_buffer.ipp +++ b/core/include/vecmem/containers/impl/vector_buffer.ipp @@ -8,93 +8,55 @@ // System include(s). #include +#include -namespace { - -/// @name Buffer alignment helper(s) -/// @{ - -/// Trait for determining a possible padding between the vector size variable -/// and the vector payload. -template -struct buffer_alignment_padding; +namespace vecmem { +namespace data { -/// Specialisation of the trait for "small" vector types -template -struct buffer_alignment_padding< - TYPE, - typename std::enable_if_t<( - alignof(TYPE) <= - alignof(typename vecmem::data::vector_buffer::size_type))> > { - /// Alignment padding value - static constexpr std::size_t value = 0; -}; - -/// Specialisation of the trait for "large" vector types template -struct buffer_alignment_padding< - TYPE, - typename std::enable_if_t<( - alignof(TYPE) > - alignof(typename vecmem::data::vector_buffer::size_type))> > { - /// Alignment padding value - static constexpr std::size_t value = - alignof(TYPE) - - alignof(typename vecmem::data::vector_buffer::size_type); -}; - -/// @} - -/// Function creating the smart pointer for @c vecmem::data::vector_buffer +vector_buffer::vector_buffer(size_type size, memory_resource& resource) + : vector_buffer(size, size, resource) {} + template -vecmem::unique_alloc_ptr allocate_buffer_memory( - typename vecmem::data::vector_buffer::size_type capacity, - typename vecmem::data::vector_buffer::size_type size, - vecmem::memory_resource& resource) { +vector_buffer::vector_buffer(size_type capacity, size_type size, + memory_resource& resource) + : base_type(capacity, nullptr, nullptr), m_memory() { // A sanity check. assert(capacity >= size); - // Decide how many bytes to allocate. - const std::size_t byteSize = - ((capacity == size) - ? (capacity * sizeof(TYPE)) - : (sizeof(typename vecmem::data::vector_buffer::size_type) + - capacity * sizeof(TYPE) + - buffer_alignment_padding::value)); - + // Exit early for null-capacity buffers. if (capacity == 0) { - return nullptr; - } else { - return vecmem::make_unique_alloc(resource, byteSize); + return; } -} -} // namespace + // Alignment for the vector elements. + static constexpr std::size_t alignment = alignof(TYPE); -namespace vecmem { -namespace data { + // Decide how many bytes we need to allocate. + std::size_t byteSize = capacity * sizeof(TYPE); -template -vector_buffer::vector_buffer(size_type size, memory_resource& resource) - : vector_buffer(size, size, resource) {} + // Increase this size if the buffer describes a resizable vector. + if (capacity != size) { + byteSize += + sizeof(typename vecmem::data::vector_buffer::size_type); + // Further increase this size so that we could for sure align the + // payload data correctly. + byteSize = ((byteSize + alignment - 1) / alignment) * alignment; + } -template -vector_buffer::vector_buffer(size_type capacity, size_type size, - memory_resource& resource) - : base_type(capacity, nullptr, nullptr), - m_memory(::allocate_buffer_memory(capacity, size, resource)) { + // Allocate the memory. + m_memory = vecmem::make_unique_alloc(resource, byteSize); // Set the base class's pointers correctly. - if (capacity > 0) { - if (size == capacity) { - base_type::m_ptr = reinterpret_cast(m_memory.get()); - } else { - base_type::m_size = reinterpret_cast(m_memory.get()); - base_type::m_ptr = reinterpret_cast( - m_memory.get() + sizeof(size_type) + - buffer_alignment_padding::value); - } + if (size == capacity) { + base_type::m_ptr = reinterpret_cast(m_memory.get()); + } else { + base_type::m_size = reinterpret_cast(m_memory.get()); + void* ptr = m_memory.get() + sizeof(size_type); + std::size_t space = byteSize - sizeof(size_type); + base_type::m_ptr = reinterpret_cast( + std::align(alignof(TYPE), capacity * sizeof(TYPE), ptr, space)); } }