Skip to content

Commit

Permalink
Merge pull request #160 from krasznaa/BufferAlignFixes-main-20220125
Browse files Browse the repository at this point in the history
Buffer Alignment Fixes, main branch (2022.01.25.)
  • Loading branch information
krasznaa authored Jan 25, 2022
2 parents 9d62ea5 + 3beb74a commit caa4b58
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 55 deletions.
4 changes: 3 additions & 1 deletion core/include/vecmem/containers/data/jagged_vector_buffer.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* VecMem project, part of the ACTS project (R&D line)
*
* (c) 2021 CERN for the benefit of the ACTS project
* (c) 2021-2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/
Expand Down Expand Up @@ -112,6 +112,8 @@ class jagged_vector_buffer : public jagged_vector_view<TYPE> {
vecmem::unique_alloc_ptr<value_type[]> m_outer_memory;
/// Data object for the @c vecmem::data::vector_view array on the host
vecmem::unique_alloc_ptr<value_type[]> m_outer_host_memory;
/// Size of the buffer held by @c m_inner_memory;
std::size_t m_inner_memory_size;
/// Data object owning the memory of the "inner vectors"
vecmem::unique_alloc_ptr<char[]> m_inner_memory;

Expand Down
60 changes: 46 additions & 14 deletions core/include/vecmem/containers/impl/jagged_vector_buffer.ipp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* VecMem project, part of the ACTS project (R&D line)
*
* (c) 2021 CERN for the benefit of the ACTS project
* (c) 2021-2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/
Expand All @@ -10,6 +10,7 @@
// System include(s).
#include <cassert>
#include <cstddef>
#include <memory>
#include <numeric>
#include <vector>

Expand Down Expand Up @@ -48,18 +49,27 @@ allocate_jagged_buffer_outer_memory(
template <typename TYPE>
vecmem::unique_alloc_ptr<char[]> allocate_jagged_buffer_inner_memory(
const std::vector<std::size_t>& sizes, vecmem::memory_resource& resource,
bool isResizable) {
typename vecmem::data::jagged_vector_buffer<TYPE>::size_type byteSize =
std::accumulate(sizes.begin(), sizes.end(),
static_cast<std::size_t>(0)) *
sizeof(TYPE);
bool isResizable, std::size_t& bufferSize) {

// Alignment for the vector elements.
static constexpr std::size_t alignment = alignof(TYPE);

// Determine the allocation size.
bufferSize = std::accumulate(sizes.begin(), sizes.end(),
static_cast<std::size_t>(0)) *
sizeof(TYPE);
// Increase this size if the buffer describes a resizable vector.
if (isResizable) {
byteSize +=
bufferSize +=
sizes.size() * sizeof(typename vecmem::data::jagged_vector_buffer<
TYPE>::value_type::size_type);
// Further increase this size so that we could for sure align the
// payload data correctly.
bufferSize = ((bufferSize + alignment - 1) / alignment) * alignment;
}

return vecmem::make_unique_alloc<char[]>(resource, byteSize);
// Return a smart pointer with the allocation.
return vecmem::make_unique_alloc<char[]>(resource, bufferSize);
}

} // namespace
Expand Down Expand Up @@ -88,8 +98,9 @@ jagged_vector_buffer<TYPE>::jagged_vector_buffer(
sizes.size(),
(host_access_resource == nullptr ? resource
: *host_access_resource))),
m_inner_memory(
::allocate_jagged_buffer_inner_memory<TYPE>(sizes, resource, false)) {
m_inner_memory_size(0u),
m_inner_memory(::allocate_jagged_buffer_inner_memory<TYPE>(
sizes, resource, false, m_inner_memory_size)) {

// Point the base class at the newly allocated memory.
base_type::m_ptr =
Expand Down Expand Up @@ -118,8 +129,9 @@ jagged_vector_buffer<TYPE>::jagged_vector_buffer(
sizes.size(),
(host_access_resource == nullptr ? resource
: *host_access_resource))),
m_inner_memory_size(0u),
m_inner_memory(::allocate_jagged_buffer_inner_memory<TYPE>(
capacities, resource, true)) {
capacities, resource, true, m_inner_memory_size)) {

// Some sanity check.
assert(sizes.size() == capacities.size());
Expand All @@ -129,16 +141,36 @@ jagged_vector_buffer<TYPE>::jagged_vector_buffer(
((host_access_resource != nullptr) ? m_outer_memory.get()
: m_outer_host_memory.get());

// Set up the host accessible memory array.
std::ptrdiff_t ptrdiff =
// Size of the data payload.
const std::size_t dataSize =
std::accumulate(capacities.begin(), capacities.end(),
static_cast<std::size_t>(0)) *
sizeof(TYPE);

//
// Construct a correctly aligned "start pointer" for the data elements.
//
// Construct the unaligned pointer by simply jumping over the "size array".
void* unaligned_start_ptr =
m_inner_memory.get() +
(capacities.size() * sizeof(typename value_type::size_type));
// The remaining size of the buffer, with the "size array" size removed.
std::size_t buffer_size =
m_inner_memory_size -
(capacities.size() * sizeof(typename value_type::size_type));
// Construct the "start pointer" using std::align.
char* start_ptr = static_cast<char*>(
std::align(alignof(TYPE), dataSize, unaligned_start_ptr, buffer_size));

// Set up the vecmem::vector_view objects in the host accessible memory.
std::ptrdiff_t ptrdiff = 0;
for (std::size_t i = 0; i < capacities.size(); ++i) {
new (host_ptr() + i) value_type(
static_cast<typename value_type::size_type>(capacities[i]),
reinterpret_cast<typename value_type::size_pointer>(
m_inner_memory.get() +
i * sizeof(typename value_type::size_type)),
reinterpret_cast<TYPE*>(m_inner_memory.get() + ptrdiff));
reinterpret_cast<TYPE*>(start_ptr + ptrdiff));
ptrdiff += capacities[i] * sizeof(TYPE);
}
}
Expand Down
72 changes: 35 additions & 37 deletions core/include/vecmem/containers/impl/vector_buffer.ipp
Original file line number Diff line number Diff line change
@@ -1,64 +1,62 @@
/** VecMem project, part of the ACTS project (R&D line)
*
* (c) 2021 CERN for the benefit of the ACTS project
* (c) 2021-2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/
#pragma once

// System include(s).
#include <cassert>
#include <memory>

namespace {
namespace vecmem {
namespace data {

/// Function creating the smart pointer for @c vecmem::data::vector_buffer
template <typename TYPE>
vecmem::unique_alloc_ptr<char[]> allocate_buffer_memory(
typename vecmem::data::vector_buffer<TYPE>::size_type capacity,
typename vecmem::data::vector_buffer<TYPE>::size_type size,
vecmem::memory_resource& resource) {
vector_buffer<TYPE>::vector_buffer(size_type size, memory_resource& resource)
: vector_buffer(size, size, resource) {}

template <typename TYPE>
vector_buffer<TYPE>::vector_buffer(size_type capacity, size_type size,
memory_resource& resource)
: base_type(capacity, nullptr, nullptr), m_memory() {

// A sanity check.
assert(capacity >= size);

// Decide how many bytes to allocate.
const std::size_t byteSize =
((capacity == size)
? (capacity * sizeof(TYPE))
: (sizeof(typename vecmem::data::vector_buffer<TYPE>::size_type) +
capacity * sizeof(TYPE)));

// Exit early for null-capacity buffers.
if (capacity == 0) {
return nullptr;
} else {
return vecmem::make_unique_alloc<char[]>(resource, byteSize);
return;
}
}

} // namespace
// Alignment for the vector elements.
static constexpr std::size_t alignment = alignof(TYPE);

namespace vecmem {
namespace data {
// Decide how many bytes we need to allocate.
std::size_t byteSize = capacity * sizeof(TYPE);

template <typename TYPE>
vector_buffer<TYPE>::vector_buffer(size_type size, memory_resource& resource)
: vector_buffer(size, size, resource) {}
// Increase this size if the buffer describes a resizable vector.
if (capacity != size) {
byteSize +=
sizeof(typename vecmem::data::vector_buffer<TYPE>::size_type);
// Further increase this size so that we could for sure align the
// payload data correctly.
byteSize = ((byteSize + alignment - 1) / alignment) * alignment;
}

template <typename TYPE>
vector_buffer<TYPE>::vector_buffer(size_type capacity, size_type size,
memory_resource& resource)
: base_type(capacity, nullptr, nullptr),
m_memory(::allocate_buffer_memory<TYPE>(capacity, size, resource)) {
// Allocate the memory.
m_memory = vecmem::make_unique_alloc<char[]>(resource, byteSize);

// Set the base class's pointers correctly.
if (capacity > 0) {
if (size == capacity) {
base_type::m_ptr = reinterpret_cast<pointer>(m_memory.get());
} else {
base_type::m_size = reinterpret_cast<size_pointer>(m_memory.get());
base_type::m_ptr =
reinterpret_cast<pointer>(m_memory.get() + sizeof(size_type));
}
if (size == capacity) {
base_type::m_ptr = reinterpret_cast<pointer>(m_memory.get());
} else {
base_type::m_size = reinterpret_cast<size_pointer>(m_memory.get());
void* ptr = m_memory.get() + sizeof(size_type);
std::size_t space = byteSize - sizeof(size_type);
base_type::m_ptr = reinterpret_cast<pointer>(
std::align(alignof(TYPE), capacity * sizeof(TYPE), ptr, space));
}
}

Expand Down
24 changes: 23 additions & 1 deletion tests/cuda/test_cuda_containers.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
/** VecMem project, part of the ACTS project (R&D line)
*
* (c) 2021 CERN for the benefit of the ACTS project
* (c) 2021-2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

// Local include(s).
#include "test_cuda_containers_kernels.cuh"
#include "vecmem/containers/array.hpp"
#include "vecmem/containers/data/jagged_vector_buffer.hpp"
#include "vecmem/containers/data/vector_buffer.hpp"
#include "vecmem/containers/static_array.hpp"
#include "vecmem/containers/vector.hpp"
#include "vecmem/memory/cuda/device_memory_resource.hpp"
Expand Down Expand Up @@ -236,3 +238,23 @@ TEST_F(cuda_containers_test, array_memory) {
EXPECT_EQ(vec_array.at(2).at(2), 18);
EXPECT_EQ(vec_array.at(3).size(), 0u);
}

/// Test buffers with "large" elements (for which alignment becomes important)
TEST_F(cuda_containers_test, large_buffer) {

// The memory resource(s).
vecmem::cuda::managed_memory_resource managed_resource;

// Test a (1D) vector.
vecmem::data::vector_buffer<unsigned long> buffer1(3, 0, managed_resource);
m_copy.setup(buffer1);
largeBufferTransform(buffer1);
EXPECT_EQ(m_copy.get_size(buffer1), 1u);

// Test a (2D) jagged vector.
vecmem::data::jagged_vector_buffer<unsigned long> buffer2(
{0, 0, 0}, {3, 3, 3}, managed_resource);
m_copy.setup(buffer2);
largeBufferTransform(buffer2);
EXPECT_EQ(m_copy.get_sizes(buffer2), std::vector<unsigned int>({0, 1u, 0}));
}
55 changes: 54 additions & 1 deletion tests/cuda/test_cuda_containers_kernels.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/** VecMem project, part of the ACTS project (R&D line)
*
* (c) 2021 CERN for the benefit of the ACTS project
* (c) 2021-2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/
Expand All @@ -16,6 +16,9 @@
#include "vecmem/containers/static_array.hpp"
#include "vecmem/memory/atomic.hpp"

// System include(s).
#include <cassert>

/// Kernel performing a linear transformation using the vector helper types
__global__ void linearTransformKernel(
vecmem::data::vector_view<const int> constants,
Expand Down Expand Up @@ -233,3 +236,53 @@ void arrayTransform(
VECMEM_CUDA_ERROR_CHECK(cudaGetLastError());
VECMEM_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
}
/// Kernel making a trivial use of the resizable vector that it receives
__global__ void largeBufferTransformKernel(
vecmem::data::vector_view<unsigned long> data) {
// Add one element to the vector in just the first thread
const std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
if (i != 0) {
return;
}
vecmem::device_vector<unsigned long> vec(data);
assert(vec.size() == 0);
vec.push_back(0);
}
void largeBufferTransform(vecmem::data::vector_view<unsigned long> data) {
// Launch the kernel.
largeBufferTransformKernel<<<1, 1>>>(data);
// Check whether it succeeded to run.
VECMEM_CUDA_ERROR_CHECK(cudaGetLastError());
VECMEM_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
}
/// Kernel making a trivial use of the resizable jagged vector that it receives
__global__ void largeBufferTransformKernel(
vecmem::data::jagged_vector_view<unsigned long> data) {
// Add one element to the vector in just the first thread
const std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
if (i != 0) {
return;
}
vecmem::jagged_device_vector<unsigned long> vec(data);
assert(vec.size() == 3);
assert(vec.at(1).size() == 0);
vec.at(1).push_back(0);
}
void largeBufferTransform(
vecmem::data::jagged_vector_view<unsigned long> data) {
// Launch the kernel.
largeBufferTransformKernel<<<1, 1>>>(data);
// Check whether it succeeded to run.
VECMEM_CUDA_ERROR_CHECK(cudaGetLastError());
VECMEM_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
}
8 changes: 7 additions & 1 deletion tests/cuda/test_cuda_containers_kernels.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/** VecMem project, part of the ACTS project (R&D line)
*
* (c) 2021 CERN for the benefit of the ACTS project
* (c) 2021-2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/
Expand Down Expand Up @@ -46,3 +46,9 @@ void fillTransform(vecmem::data::jagged_vector_view<int> vec);
/// Function transforming the elements of an array of vectors
void arrayTransform(
vecmem::static_array<vecmem::data::vector_view<int>, 4> data);

/// Function performing a trivial operation on a "large" vector buffer
void largeBufferTransform(vecmem::data::vector_view<unsigned long> data);

/// Function performing a trivial operation on a "large" jagged vector buffer
void largeBufferTransform(vecmem::data::jagged_vector_view<unsigned long> data);

0 comments on commit caa4b58

Please sign in to comment.