Merge pull request #160 from krasznaa/BufferAlignFixes-main-20220125

Buffer Alignment Fixes, main branch (2022.01.25.)
acts-project · Jan 25, 2022 · caa4b58 · caa4b58
2 parents 9d62ea5 + 3beb74a
commit caa4b58
Show file tree

Hide file tree

Showing 6 changed files with 168 additions and 55 deletions.
diff --git a/core/include/vecmem/containers/data/jagged_vector_buffer.hpp b/core/include/vecmem/containers/data/jagged_vector_buffer.hpp
@@ -1,7 +1,7 @@
 /*
  * VecMem project, part of the ACTS project (R&D line)
  *
- * (c) 2021 CERN for the benefit of the ACTS project
+ * (c) 2021-2022 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
@@ -112,6 +112,8 @@ class jagged_vector_buffer : public jagged_vector_view<TYPE> {
     vecmem::unique_alloc_ptr<value_type[]> m_outer_memory;
     /// Data object for the @c vecmem::data::vector_view array on the host
     vecmem::unique_alloc_ptr<value_type[]> m_outer_host_memory;
+    /// Size of the buffer held by @c m_inner_memory;
+    std::size_t m_inner_memory_size;
     /// Data object owning the memory of the "inner vectors"
     vecmem::unique_alloc_ptr<char[]> m_inner_memory;
 

diff --git a/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp b/core/include/vecmem/containers/impl/jagged_vector_buffer.ipp
@@ -1,7 +1,7 @@
 /*
  * VecMem project, part of the ACTS project (R&D line)
  *
- * (c) 2021 CERN for the benefit of the ACTS project
+ * (c) 2021-2022 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
@@ -10,6 +10,7 @@
 // System include(s).
 #include <cassert>
 #include <cstddef>
+#include <memory>
 #include <numeric>
 #include <vector>
 
@@ -48,18 +49,27 @@ allocate_jagged_buffer_outer_memory(
 template <typename TYPE>
 vecmem::unique_alloc_ptr<char[]> allocate_jagged_buffer_inner_memory(
     const std::vector<std::size_t>& sizes, vecmem::memory_resource& resource,
-    bool isResizable) {
-    typename vecmem::data::jagged_vector_buffer<TYPE>::size_type byteSize =
-        std::accumulate(sizes.begin(), sizes.end(),
-                        static_cast<std::size_t>(0)) *
-        sizeof(TYPE);
+    bool isResizable, std::size_t& bufferSize) {
+
+    // Alignment for the vector elements.
+    static constexpr std::size_t alignment = alignof(TYPE);
+
+    // Determine the allocation size.
+    bufferSize = std::accumulate(sizes.begin(), sizes.end(),
+                                 static_cast<std::size_t>(0)) *
+                 sizeof(TYPE);
+    // Increase this size if the buffer describes a resizable vector.
     if (isResizable) {
-        byteSize +=
+        bufferSize +=
             sizes.size() * sizeof(typename vecmem::data::jagged_vector_buffer<
                                   TYPE>::value_type::size_type);
+        // Further increase this size so that we could for sure align the
+        // payload data correctly.
+        bufferSize = ((bufferSize + alignment - 1) / alignment) * alignment;
     }
 
-    return vecmem::make_unique_alloc<char[]>(resource, byteSize);
+    // Return a smart pointer with the allocation.
+    return vecmem::make_unique_alloc<char[]>(resource, bufferSize);
 }
 
 }  // namespace
@@ -88,8 +98,9 @@ jagged_vector_buffer<TYPE>::jagged_vector_buffer(
           sizes.size(),
           (host_access_resource == nullptr ? resource
                                            : *host_access_resource))),
-      m_inner_memory(
-          ::allocate_jagged_buffer_inner_memory<TYPE>(sizes, resource, false)) {
+      m_inner_memory_size(0u),
+      m_inner_memory(::allocate_jagged_buffer_inner_memory<TYPE>(
+          sizes, resource, false, m_inner_memory_size)) {
 
     // Point the base class at the newly allocated memory.
     base_type::m_ptr =
@@ -118,8 +129,9 @@ jagged_vector_buffer<TYPE>::jagged_vector_buffer(
           sizes.size(),
           (host_access_resource == nullptr ? resource
                                            : *host_access_resource))),
+      m_inner_memory_size(0u),
       m_inner_memory(::allocate_jagged_buffer_inner_memory<TYPE>(
-          capacities, resource, true)) {
+          capacities, resource, true, m_inner_memory_size)) {
 
     // Some sanity check.
     assert(sizes.size() == capacities.size());
@@ -129,16 +141,36 @@ jagged_vector_buffer<TYPE>::jagged_vector_buffer(
         ((host_access_resource != nullptr) ? m_outer_memory.get()
                                            : m_outer_host_memory.get());
 
-    // Set up the host accessible memory array.
-    std::ptrdiff_t ptrdiff =
+    // Size of the data payload.
+    const std::size_t dataSize =
+        std::accumulate(capacities.begin(), capacities.end(),
+                        static_cast<std::size_t>(0)) *
+        sizeof(TYPE);
+
+    //
+    // Construct a correctly aligned "start pointer" for the data elements.
+    //
+    // Construct the unaligned pointer by simply jumping over the "size array".
+    void* unaligned_start_ptr =
+        m_inner_memory.get() +
         (capacities.size() * sizeof(typename value_type::size_type));
+    // The remaining size of the buffer, with the "size array" size removed.
+    std::size_t buffer_size =
+        m_inner_memory_size -
+        (capacities.size() * sizeof(typename value_type::size_type));
+    // Construct the "start pointer" using std::align.
+    char* start_ptr = static_cast<char*>(
+        std::align(alignof(TYPE), dataSize, unaligned_start_ptr, buffer_size));
+
+    // Set up the vecmem::vector_view objects in the host accessible memory.
+    std::ptrdiff_t ptrdiff = 0;
     for (std::size_t i = 0; i < capacities.size(); ++i) {
         new (host_ptr() + i) value_type(
             static_cast<typename value_type::size_type>(capacities[i]),
             reinterpret_cast<typename value_type::size_pointer>(
                 m_inner_memory.get() +
                 i * sizeof(typename value_type::size_type)),
-            reinterpret_cast<TYPE*>(m_inner_memory.get() + ptrdiff));
+            reinterpret_cast<TYPE*>(start_ptr + ptrdiff));
         ptrdiff += capacities[i] * sizeof(TYPE);
     }
 }

diff --git a/core/include/vecmem/containers/impl/vector_buffer.ipp b/core/include/vecmem/containers/impl/vector_buffer.ipp
@@ -1,64 +1,62 @@
 /** VecMem project, part of the ACTS project (R&D line)
  *
- * (c) 2021 CERN for the benefit of the ACTS project
+ * (c) 2021-2022 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
 #pragma once
 
 // System include(s).
 #include <cassert>
+#include <memory>
 
-namespace {
+namespace vecmem {
+namespace data {
 
-/// Function creating the smart pointer for @c vecmem::data::vector_buffer
 template <typename TYPE>
-vecmem::unique_alloc_ptr<char[]> allocate_buffer_memory(
-    typename vecmem::data::vector_buffer<TYPE>::size_type capacity,
-    typename vecmem::data::vector_buffer<TYPE>::size_type size,
-    vecmem::memory_resource& resource) {
+vector_buffer<TYPE>::vector_buffer(size_type size, memory_resource& resource)
+    : vector_buffer(size, size, resource) {}
+
+template <typename TYPE>
+vector_buffer<TYPE>::vector_buffer(size_type capacity, size_type size,
+                                   memory_resource& resource)
+    : base_type(capacity, nullptr, nullptr), m_memory() {
 
     // A sanity check.
     assert(capacity >= size);
 
-    // Decide how many bytes to allocate.
-    const std::size_t byteSize =
-        ((capacity == size)
-             ? (capacity * sizeof(TYPE))
-             : (sizeof(typename vecmem::data::vector_buffer<TYPE>::size_type) +
-                capacity * sizeof(TYPE)));
-
+    // Exit early for null-capacity buffers.
     if (capacity == 0) {
-        return nullptr;
-    } else {
-        return vecmem::make_unique_alloc<char[]>(resource, byteSize);
+        return;
     }
-}
 
-}  // namespace
+    // Alignment for the vector elements.
+    static constexpr std::size_t alignment = alignof(TYPE);
 
-namespace vecmem {
-namespace data {
+    // Decide how many bytes we need to allocate.
+    std::size_t byteSize = capacity * sizeof(TYPE);
 
-template <typename TYPE>
-vector_buffer<TYPE>::vector_buffer(size_type size, memory_resource& resource)
-    : vector_buffer(size, size, resource) {}
+    // Increase this size if the buffer describes a resizable vector.
+    if (capacity != size) {
+        byteSize +=
+            sizeof(typename vecmem::data::vector_buffer<TYPE>::size_type);
+        // Further increase this size so that we could for sure align the
+        // payload data correctly.
+        byteSize = ((byteSize + alignment - 1) / alignment) * alignment;
+    }
 
-template <typename TYPE>
-vector_buffer<TYPE>::vector_buffer(size_type capacity, size_type size,
-                                   memory_resource& resource)
-    : base_type(capacity, nullptr, nullptr),
-      m_memory(::allocate_buffer_memory<TYPE>(capacity, size, resource)) {
+    // Allocate the memory.
+    m_memory = vecmem::make_unique_alloc<char[]>(resource, byteSize);
 
     // Set the base class's pointers correctly.
-    if (capacity > 0) {
-        if (size == capacity) {
-            base_type::m_ptr = reinterpret_cast<pointer>(m_memory.get());
-        } else {
-            base_type::m_size = reinterpret_cast<size_pointer>(m_memory.get());
-            base_type::m_ptr =
-                reinterpret_cast<pointer>(m_memory.get() + sizeof(size_type));
-        }
+    if (size == capacity) {
+        base_type::m_ptr = reinterpret_cast<pointer>(m_memory.get());
+    } else {
+        base_type::m_size = reinterpret_cast<size_pointer>(m_memory.get());
+        void* ptr = m_memory.get() + sizeof(size_type);
+        std::size_t space = byteSize - sizeof(size_type);
+        base_type::m_ptr = reinterpret_cast<pointer>(
+            std::align(alignof(TYPE), capacity * sizeof(TYPE), ptr, space));
     }
 }
 

diff --git a/tests/cuda/test_cuda_containers.cpp b/tests/cuda/test_cuda_containers.cpp
@@ -1,13 +1,15 @@
 /** VecMem project, part of the ACTS project (R&D line)
  *
- * (c) 2021 CERN for the benefit of the ACTS project
+ * (c) 2021-2022 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
 
 // Local include(s).
 #include "test_cuda_containers_kernels.cuh"
 #include "vecmem/containers/array.hpp"
+#include "vecmem/containers/data/jagged_vector_buffer.hpp"
+#include "vecmem/containers/data/vector_buffer.hpp"
 #include "vecmem/containers/static_array.hpp"
 #include "vecmem/containers/vector.hpp"
 #include "vecmem/memory/cuda/device_memory_resource.hpp"
@@ -236,3 +238,23 @@ TEST_F(cuda_containers_test, array_memory) {
     EXPECT_EQ(vec_array.at(2).at(2), 18);
     EXPECT_EQ(vec_array.at(3).size(), 0u);
 }
+
+/// Test buffers with "large" elements (for which alignment becomes important)
+TEST_F(cuda_containers_test, large_buffer) {
+
+    // The memory resource(s).
+    vecmem::cuda::managed_memory_resource managed_resource;
+
+    // Test a (1D) vector.
+    vecmem::data::vector_buffer<unsigned long> buffer1(3, 0, managed_resource);
+    m_copy.setup(buffer1);
+    largeBufferTransform(buffer1);
+    EXPECT_EQ(m_copy.get_size(buffer1), 1u);
+
+    // Test a (2D) jagged vector.
+    vecmem::data::jagged_vector_buffer<unsigned long> buffer2(
+        {0, 0, 0}, {3, 3, 3}, managed_resource);
+    m_copy.setup(buffer2);
+    largeBufferTransform(buffer2);
+    EXPECT_EQ(m_copy.get_sizes(buffer2), std::vector<unsigned int>({0, 1u, 0}));
+}
diff --git a/tests/cuda/test_cuda_containers_kernels.cu b/tests/cuda/test_cuda_containers_kernels.cu
@@ -1,6 +1,6 @@
 /** VecMem project, part of the ACTS project (R&D line)
  *
- * (c) 2021 CERN for the benefit of the ACTS project
+ * (c) 2021-2022 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
@@ -16,6 +16,9 @@
 #include "vecmem/containers/static_array.hpp"
 #include "vecmem/memory/atomic.hpp"
 
+// System include(s).
+#include <cassert>
+
 /// Kernel performing a linear transformation using the vector helper types
 __global__ void linearTransformKernel(
     vecmem::data::vector_view<const int> constants,
@@ -233,3 +236,53 @@ void arrayTransform(
     VECMEM_CUDA_ERROR_CHECK(cudaGetLastError());
     VECMEM_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
 }
+
+/// Kernel making a trivial use of the resizable vector that it receives
+__global__ void largeBufferTransformKernel(
+    vecmem::data::vector_view<unsigned long> data) {
+
+    // Add one element to the vector in just the first thread
+    const std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
+    if (i != 0) {
+        return;
+    }
+    vecmem::device_vector<unsigned long> vec(data);
+    assert(vec.size() == 0);
+    vec.push_back(0);
+}
+
+void largeBufferTransform(vecmem::data::vector_view<unsigned long> data) {
+
+    // Launch the kernel.
+    largeBufferTransformKernel<<<1, 1>>>(data);
+
+    // Check whether it succeeded to run.
+    VECMEM_CUDA_ERROR_CHECK(cudaGetLastError());
+    VECMEM_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
+}
+
+/// Kernel making a trivial use of the resizable jagged vector that it receives
+__global__ void largeBufferTransformKernel(
+    vecmem::data::jagged_vector_view<unsigned long> data) {
+
+    // Add one element to the vector in just the first thread
+    const std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
+    if (i != 0) {
+        return;
+    }
+    vecmem::jagged_device_vector<unsigned long> vec(data);
+    assert(vec.size() == 3);
+    assert(vec.at(1).size() == 0);
+    vec.at(1).push_back(0);
+}
+
+void largeBufferTransform(
+    vecmem::data::jagged_vector_view<unsigned long> data) {
+
+    // Launch the kernel.
+    largeBufferTransformKernel<<<1, 1>>>(data);
+
+    // Check whether it succeeded to run.
+    VECMEM_CUDA_ERROR_CHECK(cudaGetLastError());
+    VECMEM_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
+}
diff --git a/tests/cuda/test_cuda_containers_kernels.cuh b/tests/cuda/test_cuda_containers_kernels.cuh
@@ -1,6 +1,6 @@
 /** VecMem project, part of the ACTS project (R&D line)
  *
- * (c) 2021 CERN for the benefit of the ACTS project
+ * (c) 2021-2022 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
@@ -46,3 +46,9 @@ void fillTransform(vecmem::data::jagged_vector_view<int> vec);
 /// Function transforming the elements of an array of vectors
 void arrayTransform(
     vecmem::static_array<vecmem::data::vector_view<int>, 4> data);
+
+/// Function performing a trivial operation on a "large" vector buffer
+void largeBufferTransform(vecmem::data::vector_view<unsigned long> data);
+
+/// Function performing a trivial operation on a "large" jagged vector buffer
+void largeBufferTransform(vecmem::data::jagged_vector_view<unsigned long> data);