From d9757a67a538880f67465ff79c99817c8897ea02 Mon Sep 17 00:00:00 2001 From: Attila Krasznahorkay Date: Mon, 30 May 2022 16:13:27 +0200 Subject: [PATCH 1/3] Introduced a set of benchmarks for jagged vector copying. --- benchmarks/CMakeLists.txt | 7 ++++ benchmarks/common/make_jagged_vector.cpp | 38 +++++++++++++++++++ benchmarks/common/make_jagged_vector.hpp | 35 +++++++++++++++++ benchmarks/core/CMakeLists.txt | 5 ++- benchmarks/core/benchmark_copy.cpp | 48 ++++++++++++++++++++++++ benchmarks/cuda/CMakeLists.txt | 5 ++- benchmarks/cuda/benchmark_copy.cpp | 48 ++++++++++++++++++++++++ 7 files changed, 184 insertions(+), 2 deletions(-) create mode 100644 benchmarks/common/make_jagged_vector.cpp create mode 100644 benchmarks/common/make_jagged_vector.hpp create mode 100644 benchmarks/core/benchmark_copy.cpp create mode 100644 benchmarks/cuda/benchmark_copy.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 0ed4aced..e981358d 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -23,6 +23,13 @@ if(VECMEM_SETUP_GOOGLEBENCHMARK) endif() endif() +# Build a common, helper library. +add_library( vecmem_benchmark_common STATIC + "common/make_jagged_vector.hpp" + "common/make_jagged_vector.cpp" ) +target_link_libraries( vecmem_benchmark_common + PUBLIC vecmem::core ) + # Include the library specific tests. add_subdirectory(core) if(VECMEM_BUILD_CUDA_LIBRARY) diff --git a/benchmarks/common/make_jagged_vector.cpp b/benchmarks/common/make_jagged_vector.cpp new file mode 100644 index 00000000..52d8252b --- /dev/null +++ b/benchmarks/common/make_jagged_vector.cpp @@ -0,0 +1,38 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Local include(s). +#include "make_jagged_vector.hpp" + +// System include(s). +#include + +namespace vecmem::benchmark { + +jagged_vector make_jagged_vector(std::size_t outerSize, + std::size_t maxInnerSize, + memory_resource& mr) { + + // Create the result object. + jagged_vector result(&mr); + result.reserve(outerSize); + + // Set up a simple random number generator for the inner vector sizes. + std::default_random_engine eng; + std::uniform_int_distribution gen(0, maxInnerSize); + + // Set up each of its inner vectors. + for (std::size_t i = 0; i < outerSize; ++i) { + result.push_back(jagged_vector::value_type(gen(eng), &mr)); + } + + // Return the vector. + return result; +} + +} // namespace vecmem::benchmark diff --git a/benchmarks/common/make_jagged_vector.hpp b/benchmarks/common/make_jagged_vector.hpp new file mode 100644 index 00000000..ad0df5c1 --- /dev/null +++ b/benchmarks/common/make_jagged_vector.hpp @@ -0,0 +1,35 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// VecMem include(s). +#include +#include + +// System include(s). +#include + +namespace vecmem::benchmark { + +/// Function creating a jagged vector with some general size specifications +/// +/// It creates a jagged vector with a fixed "outer size", and random sized +/// "inner vectors" that would not be larger than some specified value. +/// +/// @param outerSize The fixed "outer size" of the resulting vector +/// @param maxInnerSize The maximum for the random "inner sizes" of the +/// resulting vector +/// @param mr The memory resource to use +/// @return A jagged vector with the specifier properties +/// +jagged_vector make_jagged_vector(std::size_t outerSize, + std::size_t maxInnerSize, + memory_resource& mr); + +} // namespace vecmem::benchmark diff --git a/benchmarks/core/CMakeLists.txt b/benchmarks/core/CMakeLists.txt index 4dd526a4..dcf716c2 100644 --- a/benchmarks/core/CMakeLists.txt +++ b/benchmarks/core/CMakeLists.txt @@ -8,13 +8,16 @@ include( vecmem-compiler-options-cpp ) # Set up the benchmark(s) for the core library. -add_executable(vecmem_benchmark_core "benchmark_core.cpp") +add_executable( vecmem_benchmark_core + "benchmark_core.cpp" + "benchmark_copy.cpp" ) target_link_libraries( vecmem_benchmark_core PRIVATE vecmem::core + vecmem_benchmark_common benchmark::benchmark benchmark::benchmark_main ) diff --git a/benchmarks/core/benchmark_copy.cpp b/benchmarks/core/benchmark_copy.cpp new file mode 100644 index 00000000..a29aa82d --- /dev/null +++ b/benchmarks/core/benchmark_copy.cpp @@ -0,0 +1,48 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// VecMem include(s). +#include +#include + +// Common benchmark include(s). +#include "../common/make_jagged_vector.hpp" + +// Google benchmark include(s). +#include + +// System include(s). +#include + +namespace vecmem::benchmark { + +/// The (host) memory resource to use in the benchmark(s). +static host_memory_resource host_mr; +/// The copy object to use in the benchmark(s). +static copy host_copy; + +/// Function benchmarking the @c vecmem::copy jagged vector operations +void jaggedVectorHostCopy(::benchmark::State& state) { + + // Create the "source vector". + jagged_vector source = + make_jagged_vector(state.range(0), state.range(1), host_mr); + const data::jagged_vector_data source_data = get_data(source); + // Create the "destination vector". + jagged_vector dest; + + // Perform the copy benchmark. + for (auto _ : state) { + dest.clear(); + host_copy(source_data, dest); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorHostCopy)->Ranges({{10, 100000}, {50, 5000}}); + +} // namespace vecmem::benchmark diff --git a/benchmarks/cuda/CMakeLists.txt b/benchmarks/cuda/CMakeLists.txt index ef7fc9c6..0b74f5e1 100644 --- a/benchmarks/cuda/CMakeLists.txt +++ b/benchmarks/cuda/CMakeLists.txt @@ -9,13 +9,16 @@ include( vecmem-compiler-options-cpp ) include( vecmem-compiler-options-cuda ) # Set up the benchmark(s) for the CUDA library. -add_executable(vecmem_benchmark_cuda "benchmark_cuda.cpp") +add_executable( vecmem_benchmark_cuda + "benchmark_cuda.cpp" + "benchmark_copy.cpp" ) target_link_libraries( vecmem_benchmark_cuda PRIVATE vecmem::cuda + vecmem_benchmark_common benchmark::benchmark benchmark::benchmark_main ) diff --git a/benchmarks/cuda/benchmark_copy.cpp b/benchmarks/cuda/benchmark_copy.cpp new file mode 100644 index 00000000..3beac4ac --- /dev/null +++ b/benchmarks/cuda/benchmark_copy.cpp @@ -0,0 +1,48 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// VecMem include(s). +#include +#include + +// Common benchmark include(s). +#include "../common/make_jagged_vector.hpp" + +// Google benchmark include(s). +#include + +// System include(s). +#include + +namespace vecmem::cuda::benchmark { + +/// The (managed) memory resource to use in the benchmark(s). +static managed_memory_resource managed_mr; +/// The copy object to use in the benchmark(s). +static copy cuda_copy; + +/// Function benchmarking the @c vecmem::cuda::copy jagged vector operations +void jaggedVectorUnknownCopy(::benchmark::State& state) { + + // Create the "source vector". + jagged_vector source = vecmem::benchmark::make_jagged_vector( + state.range(0), state.range(1), managed_mr); + const data::jagged_vector_data source_data = get_data(source); + // Create the "destination vector". + jagged_vector dest; + + // Perform the copy benchmark. + for (auto _ : state) { + dest.clear(); + cuda_copy(source_data, dest); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorUnknownCopy)->Ranges({{10, 100000}, {50, 5000}}); + +} // namespace vecmem::cuda::benchmark From b6207e028ed073d8b67a4867da644ecee7b7cda1 Mon Sep 17 00:00:00 2001 From: Attila Krasznahorkay Date: Mon, 30 May 2022 16:47:16 +0200 Subject: [PATCH 2/3] Implemented HtoD and DtoH jagged vector copies differently. Now if the user asks for HtoD or DtoH copies explicitly, vecmem::copy coalesces/distributes all memory on the host with HtoH copies, while only doing a single HtoD or DtoH copy. --- core/include/vecmem/utils/copy.hpp | 10 ++- core/include/vecmem/utils/impl/copy.ipp | 95 +++++++++++++++++++++---- 2 files changed, 91 insertions(+), 14 deletions(-) diff --git a/core/include/vecmem/utils/copy.hpp b/core/include/vecmem/utils/copy.hpp index 4b8b5895..472d170d 100644 --- a/core/include/vecmem/utils/copy.hpp +++ b/core/include/vecmem/utils/copy.hpp @@ -189,8 +189,14 @@ class VECMEM_CORE_EXPORT copy { int value); /// Helper function performing the copy of a jagged array/vector template - void copy_views(std::size_t size, const data::vector_view* from, - data::vector_view* to, type::copy_type cptype); + void copy_views_impl1(std::size_t size, + const data::vector_view* from, + data::vector_view* to, type::copy_type cptype); + /// Helper function performing the copy of a jagged array/vector + template + void copy_views_impl2(std::size_t size, + const data::vector_view* from, + data::vector_view* to, type::copy_type cptype); /// Helper function for getting the sizes of a jagged vector/buffer template std::vector::size_type> get_sizes( diff --git a/core/include/vecmem/utils/impl/copy.ipp b/core/include/vecmem/utils/impl/copy.ipp index 553db980..972270c4 100644 --- a/core/include/vecmem/utils/impl/copy.ipp +++ b/core/include/vecmem/utils/impl/copy.ipp @@ -9,10 +9,12 @@ // VecMem include(s). #include "vecmem/containers/jagged_vector.hpp" +#include "vecmem/memory/host_memory_resource.hpp" #include "vecmem/utils/debug.hpp" #include "vecmem/utils/type_traits.hpp" // System include(s). +#include #include namespace vecmem { @@ -194,7 +196,7 @@ data::jagged_vector_buffer> copy::to( setup(result); // Copy the payload of the inner vectors. - copy_views(data.m_size, data.m_ptr, result.host_ptr(), cptype); + copy_views_impl1(data.m_size, data.m_ptr, result.host_ptr(), cptype); // Return the newly created object. return result; @@ -214,7 +216,7 @@ data::jagged_vector_buffer> copy::to( setup(result); // Copy the payload of the inner vectors. - copy_views(data.m_size, data.host_ptr(), result.host_ptr(), cptype); + copy_views_impl1(data.m_size, data.host_ptr(), result.host_ptr(), cptype); // Return the newly created object. return result; @@ -235,7 +237,7 @@ void copy::operator()(const data::jagged_vector_view& from_view, assert(from_view.m_size == to_view.m_size); // Copy the payload of the inner vectors. - copy_views(from_view.m_size, from_view.m_ptr, to_view.m_ptr, cptype); + copy_views_impl1(from_view.m_size, from_view.m_ptr, to_view.m_ptr, cptype); } template @@ -253,7 +255,8 @@ void copy::operator()(const data::jagged_vector_view& from_view, assert(from_view.m_size == to_buffer.m_size); // Copy the payload of the inner vectors. - copy_views(from_view.m_size, from_view.m_ptr, to_buffer.host_ptr(), cptype); + copy_views_impl1(from_view.m_size, from_view.m_ptr, to_buffer.host_ptr(), + cptype); } template @@ -271,8 +274,8 @@ void copy::operator()(const data::jagged_vector_buffer& from_buffer, assert(from_buffer.m_size == to_view.m_size); // Copy the payload of the inner vectors. - copy_views(from_buffer.m_size, from_buffer.host_ptr(), to_view.m_ptr, - cptype); + copy_views_impl1(from_buffer.m_size, from_buffer.host_ptr(), to_view.m_ptr, + cptype); } template @@ -290,8 +293,8 @@ void copy::operator()(const data::jagged_vector_buffer& from_buffer, assert(from_buffer.m_size == to_buffer.m_size); // Copy the payload of the inner vectors. - copy_views(from_buffer.m_size, from_buffer.host_ptr(), to_buffer.host_ptr(), - cptype); + copy_views_impl1(from_buffer.m_size, from_buffer.host_ptr(), + to_buffer.host_ptr(), cptype); } template @@ -369,10 +372,78 @@ void copy::memset_impl(std::size_t size, data::vector_view* data, } template -void copy::copy_views(std::size_t size, - const data::vector_view* from_view, - data::vector_view* to_view, - type::copy_type cptype) { +void copy::copy_views_impl1(std::size_t size, + const data::vector_view* from_view, + data::vector_view* to_view, + type::copy_type cptype) { + + // The input and output types are allowed to be different, but only by + // const-ness. + static_assert(std::is_same::value || + details::is_same_nc::value, + "Can only use compatible types in the copy"); + + // Check if anything needs to be done. + if (size == 0) { + return; + } + + // Helper lambda for figuring out if a set of views is contiguous in + // memory. + auto is_contiguous = [size](const auto* views) { + auto ptr = views[0].ptr(); + for (std::size_t i = 0; i < size; ++i) { + if ((ptr + views[i - 1].capacity()) != views[i].ptr()) { + return false; + } + ptr = views[i].ptr(); + } + return true; + }; + + /// Helper (host) memory resource + static host_memory_resource host_mr; + /// Helper (host) copy object + static copy host_copy; + + // Deal with different types of memory configurations. + if ((cptype == type::host_to_device) && + (is_contiguous(from_view) == false) && + (is_contiguous(to_view) == true)) { + // Create a contiguous buffer in host memory with the appropriate + // capacities. + std::vector sizes(size); + std::transform(from_view, from_view + size, sizes.begin(), + [](const auto& view) { return view.capacity(); }); + data::jagged_vector_buffer buffer(sizes, host_mr); + // Collect the data into this buffer with host-to-host memory copies. + host_copy.copy_views_impl2(size, from_view, buffer.host_ptr(), cptype); + // Now perform the host-to-device copy in one go. + copy_views_impl2(size, buffer.host_ptr(), to_view, cptype); + } else if ((cptype == type::device_to_host) && + (is_contiguous(from_view) == true) && + (is_contiguous(to_view) == false)) { + // Create a contiguous buffer in host memory with the appropriate + // capacities. + std::vector sizes(size); + std::transform(from_view, from_view + size, sizes.begin(), + [](const auto& view) { return view.capacity(); }); + data::jagged_vector_buffer buffer(sizes, host_mr); + // Perform the device-to-host copy into this contiguous buffer. + copy_views_impl2(size, from_view, buffer.host_ptr(), cptype); + // Now fill the host views with host-to-host memory copies. + host_copy.copy_views_impl2(size, buffer.host_ptr(), to_view, cptype); + } else { + // Do the copy as best as we can with the existing views. + copy_views_impl2(size, from_view, to_view, cptype); + } +} + +template +void copy::copy_views_impl2(std::size_t size, + const data::vector_view* from_view, + data::vector_view* to_view, + type::copy_type cptype) { // The input and output types are allowed to be different, but only by // const-ness. From 8efbcc2e20eb981120db0e3906972bc773153b45 Mon Sep 17 00:00:00 2001 From: Attila Krasznahorkay Date: Mon, 30 May 2022 16:55:52 +0200 Subject: [PATCH 3/3] Added benchmarks for HtoD and DtoH jagged vector copies. While introducing SYCL benchmarks as well, including benchmarks for SYCL memory allocations. --- benchmarks/CMakeLists.txt | 5 + benchmarks/common/make_jagged_sizes.cpp | 35 +++++ benchmarks/common/make_jagged_sizes.hpp | 30 ++++ benchmarks/common/make_jagged_vector.cpp | 18 +-- benchmarks/common/make_jagged_vector.hpp | 8 +- benchmarks/core/benchmark_copy.cpp | 127 +++++++++++++++-- benchmarks/cuda/benchmark_copy.cpp | 140 +++++++++++++++++-- benchmarks/sycl/CMakeLists.txt | 18 +++ benchmarks/sycl/benchmark_copy.cpp | 166 +++++++++++++++++++++++ benchmarks/sycl/benchmark_sycl.cpp | 41 ++++++ 10 files changed, 550 insertions(+), 38 deletions(-) create mode 100644 benchmarks/common/make_jagged_sizes.cpp create mode 100644 benchmarks/common/make_jagged_sizes.hpp create mode 100644 benchmarks/sycl/CMakeLists.txt create mode 100644 benchmarks/sycl/benchmark_copy.cpp create mode 100644 benchmarks/sycl/benchmark_sycl.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index e981358d..20b1c554 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -25,6 +25,8 @@ endif() # Build a common, helper library. add_library( vecmem_benchmark_common STATIC + "common/make_jagged_sizes.hpp" + "common/make_jagged_sizes.cpp" "common/make_jagged_vector.hpp" "common/make_jagged_vector.cpp" ) target_link_libraries( vecmem_benchmark_common @@ -35,3 +37,6 @@ add_subdirectory(core) if(VECMEM_BUILD_CUDA_LIBRARY) add_subdirectory(cuda) endif() +if(VECMEM_BUILD_SYCL_LIBRARY) + add_subdirectory(sycl) +endif() diff --git a/benchmarks/common/make_jagged_sizes.cpp b/benchmarks/common/make_jagged_sizes.cpp new file mode 100644 index 00000000..16f87373 --- /dev/null +++ b/benchmarks/common/make_jagged_sizes.cpp @@ -0,0 +1,35 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Local include(s). +#include "make_jagged_sizes.hpp" + +// System include(s). +#include +#include + +namespace vecmem::benchmark { + +std::vector make_jagged_sizes(std::size_t outerSize, + std::size_t maxInnerSize) { + + // Set up a simple random number generator for the inner vector sizes. + std::default_random_engine eng; + eng.seed(outerSize + maxInnerSize); + std::uniform_int_distribution gen(0, maxInnerSize); + + // Generate the result vector. + std::vector result(outerSize); + std::generate(result.begin(), result.end(), + [&eng, &gen]() { return gen(eng); }); + + // Give it to the user. + return result; +} + +} // namespace vecmem::benchmark diff --git a/benchmarks/common/make_jagged_sizes.hpp b/benchmarks/common/make_jagged_sizes.hpp new file mode 100644 index 00000000..34a4065d --- /dev/null +++ b/benchmarks/common/make_jagged_sizes.hpp @@ -0,0 +1,30 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// System include(s). +#include +#include + +namespace vecmem::benchmark { + +/// Helper function for generating the sizes for a jagged vector (buffer) +/// +/// It implements a pretty simple thing, but since this is used in multiple +/// places, it made sense to put it into a central location. +/// +/// @param outerSize The fixed "outer size" of the jagged vector (buffer) +/// @param maxInnerSize The maximum for the random "inner sizes" of the +/// resulting vector (buffer) +/// @return A vector of sizes corresponding to the received parameters +/// +std::vector make_jagged_sizes(std::size_t outerSize, + std::size_t maxInnerSize); + +} // namespace vecmem::benchmark diff --git a/benchmarks/common/make_jagged_vector.cpp b/benchmarks/common/make_jagged_vector.cpp index 52d8252b..3d4c1e59 100644 --- a/benchmarks/common/make_jagged_vector.cpp +++ b/benchmarks/common/make_jagged_vector.cpp @@ -9,26 +9,16 @@ // Local include(s). #include "make_jagged_vector.hpp" -// System include(s). -#include - namespace vecmem::benchmark { -jagged_vector make_jagged_vector(std::size_t outerSize, - std::size_t maxInnerSize, +jagged_vector make_jagged_vector(const std::vector& sizes, memory_resource& mr) { // Create the result object. jagged_vector result(&mr); - result.reserve(outerSize); - - // Set up a simple random number generator for the inner vector sizes. - std::default_random_engine eng; - std::uniform_int_distribution gen(0, maxInnerSize); - - // Set up each of its inner vectors. - for (std::size_t i = 0; i < outerSize; ++i) { - result.push_back(jagged_vector::value_type(gen(eng), &mr)); + result.reserve(sizes.size()); + for (std::size_t size : sizes) { + result.push_back(jagged_vector::value_type(size, &mr)); } // Return the vector. diff --git a/benchmarks/common/make_jagged_vector.hpp b/benchmarks/common/make_jagged_vector.hpp index ad0df5c1..8ceaa1bd 100644 --- a/benchmarks/common/make_jagged_vector.hpp +++ b/benchmarks/common/make_jagged_vector.hpp @@ -14,6 +14,7 @@ // System include(s). #include +#include namespace vecmem::benchmark { @@ -22,14 +23,11 @@ namespace vecmem::benchmark { /// It creates a jagged vector with a fixed "outer size", and random sized /// "inner vectors" that would not be larger than some specified value. /// -/// @param outerSize The fixed "outer size" of the resulting vector -/// @param maxInnerSize The maximum for the random "inner sizes" of the -/// resulting vector +/// @param sizes The sizes of the vectors in the jagged vector /// @param mr The memory resource to use /// @return A jagged vector with the specifier properties /// -jagged_vector make_jagged_vector(std::size_t outerSize, - std::size_t maxInnerSize, +jagged_vector make_jagged_vector(const std::vector& sizes, memory_resource& mr); } // namespace vecmem::benchmark diff --git a/benchmarks/core/benchmark_copy.cpp b/benchmarks/core/benchmark_copy.cpp index a29aa82d..f544f702 100644 --- a/benchmarks/core/benchmark_copy.cpp +++ b/benchmarks/core/benchmark_copy.cpp @@ -11,12 +11,14 @@ #include // Common benchmark include(s). +#include "../common/make_jagged_sizes.hpp" #include "../common/make_jagged_vector.hpp" // Google benchmark include(s). #include // System include(s). +#include #include namespace vecmem::benchmark { @@ -26,23 +28,132 @@ static host_memory_resource host_mr; /// The copy object to use in the benchmark(s). static copy host_copy; -/// Function benchmarking the @c vecmem::copy jagged vector operations -void jaggedVectorHostCopy(::benchmark::State& state) { +/// Function benchmarking "unknown" host-to-device jagged vector copies +void jaggedVectorUnknownHtoDCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); // Create the "source vector". - jagged_vector source = - make_jagged_vector(state.range(0), state.range(1), host_mr); + jagged_vector source = make_jagged_vector(sizes, host_mr); const data::jagged_vector_data source_data = get_data(source); - // Create the "destination vector". - jagged_vector dest; + // Create the "destination buffer". + data::jagged_vector_buffer dest(sizes, host_mr); + host_copy.setup(dest); // Perform the copy benchmark. for (auto _ : state) { - dest.clear(); host_copy(source_data, dest); } } // Set up the benchmark. -BENCHMARK(jaggedVectorHostCopy)->Ranges({{10, 100000}, {50, 5000}}); +BENCHMARK(jaggedVectorUnknownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}}); + +/// Function benchmarking "known" host-to-device jagged vector copies +void jaggedVectorKnownHtoDCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source vector". + jagged_vector source = make_jagged_vector(sizes, host_mr); + const data::jagged_vector_data source_data = get_data(source); + // Create the "destination buffer". + data::jagged_vector_buffer dest(sizes, host_mr); + host_copy.setup(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + host_copy(source_data, dest, copy::type::host_to_device); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorKnownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}}); + +/// Function benchmarking "unknown" device-to-host jagged vector copies +void jaggedVectorUnknownDtoHCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source buffer". + data::jagged_vector_buffer source(sizes, host_mr); + host_copy.setup(source); + // Create the "destination vector". + jagged_vector dest = make_jagged_vector(sizes, host_mr); + data::jagged_vector_data dest_data = get_data(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + host_copy(source, dest_data); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorUnknownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}}); + +/// Function benchmarking "known" device-to-host jagged vector copies +void jaggedVectorKnownDtoHCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source buffer". + data::jagged_vector_buffer source(sizes, host_mr); + host_copy.setup(source); + // Create the "destination vector". + jagged_vector dest = make_jagged_vector(sizes, host_mr); + data::jagged_vector_data dest_data = get_data(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + host_copy(source, dest_data, copy::type::device_to_host); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorKnownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}}); } // namespace vecmem::benchmark diff --git a/benchmarks/cuda/benchmark_copy.cpp b/benchmarks/cuda/benchmark_copy.cpp index 3beac4ac..5913ae93 100644 --- a/benchmarks/cuda/benchmark_copy.cpp +++ b/benchmarks/cuda/benchmark_copy.cpp @@ -7,42 +7,160 @@ */ // VecMem include(s). -#include +#include +#include #include // Common benchmark include(s). +#include "../common/make_jagged_sizes.hpp" #include "../common/make_jagged_vector.hpp" // Google benchmark include(s). #include // System include(s). +#include #include namespace vecmem::cuda::benchmark { -/// The (managed) memory resource to use in the benchmark(s). -static managed_memory_resource managed_mr; +/// The (host) memory resource to use in the benchmark(s). +static vecmem::host_memory_resource host_mr; +/// The (device) memory resource to use in the benchmark(s). +static device_memory_resource device_mr; /// The copy object to use in the benchmark(s). static copy cuda_copy; -/// Function benchmarking the @c vecmem::cuda::copy jagged vector operations -void jaggedVectorUnknownCopy(::benchmark::State& state) { +/// Function benchmarking "unknown" host-to-device jagged vector copies +void jaggedVectorUnknownHtoDCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); // Create the "source vector". - jagged_vector source = vecmem::benchmark::make_jagged_vector( - state.range(0), state.range(1), managed_mr); + jagged_vector source = + vecmem::benchmark::make_jagged_vector(sizes, host_mr); const data::jagged_vector_data source_data = get_data(source); - // Create the "destination vector". - jagged_vector dest; + // Create the "destination buffer". + data::jagged_vector_buffer dest(sizes, device_mr, &host_mr); + cuda_copy.setup(dest); // Perform the copy benchmark. for (auto _ : state) { - dest.clear(); cuda_copy(source_data, dest); } } // Set up the benchmark. -BENCHMARK(jaggedVectorUnknownCopy)->Ranges({{10, 100000}, {50, 5000}}); +BENCHMARK(jaggedVectorUnknownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}}); + +/// Function benchmarking "known" host-to-device jagged vector copies +void jaggedVectorKnownHtoDCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source vector". + jagged_vector source = + vecmem::benchmark::make_jagged_vector(sizes, host_mr); + const data::jagged_vector_data source_data = get_data(source); + // Create the "destination buffer". + data::jagged_vector_buffer dest(sizes, device_mr, &host_mr); + cuda_copy.setup(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + cuda_copy(source_data, dest, copy::type::host_to_device); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorKnownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}}); + +/// Function benchmarking "unknown" device-to-host jagged vector copies +void jaggedVectorUnknownDtoHCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source buffer". + data::jagged_vector_buffer source(sizes, device_mr, &host_mr); + cuda_copy.setup(source); + // Create the "destination vector". + jagged_vector dest = + vecmem::benchmark::make_jagged_vector(sizes, host_mr); + data::jagged_vector_data dest_data = get_data(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + cuda_copy(source, dest_data); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorUnknownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}}); + +/// Function benchmarking "known" device-to-host jagged vector copies +void jaggedVectorKnownDtoHCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source buffer". + data::jagged_vector_buffer source(sizes, device_mr, &host_mr); + cuda_copy.setup(source); + // Create the "destination vector". + jagged_vector dest = + vecmem::benchmark::make_jagged_vector(sizes, host_mr); + data::jagged_vector_data dest_data = get_data(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + cuda_copy(source, dest_data, copy::type::device_to_host); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorKnownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}}); } // namespace vecmem::cuda::benchmark diff --git a/benchmarks/sycl/CMakeLists.txt b/benchmarks/sycl/CMakeLists.txt new file mode 100644 index 00000000..cb9d395b --- /dev/null +++ b/benchmarks/sycl/CMakeLists.txt @@ -0,0 +1,18 @@ +# VecMem project, part of the ACTS project (R&D line) +# +# (c) 2022 CERN for the benefit of the ACTS project +# +# Mozilla Public License Version 2.0 + +# Project include(s). +include( vecmem-compiler-options-cpp ) +include( vecmem-compiler-options-sycl ) + +# Set up the benchmark(s) for the SYCL library. +add_executable( vecmem_benchmark_sycl + "benchmark_sycl.cpp" + "benchmark_copy.cpp" ) +target_link_libraries( vecmem_benchmark_sycl + PRIVATE vecmem::sycl vecmem_benchmark_common + benchmark::benchmark benchmark::benchmark_main +) diff --git a/benchmarks/sycl/benchmark_copy.cpp b/benchmarks/sycl/benchmark_copy.cpp new file mode 100644 index 00000000..849fe5b5 --- /dev/null +++ b/benchmarks/sycl/benchmark_copy.cpp @@ -0,0 +1,166 @@ +/* + * VecMem project, part of the ACTS project (R&D line) + * + * (c) 2022 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// VecMem include(s). +#include +#include +#include + +// Common benchmark include(s). +#include "../common/make_jagged_sizes.hpp" +#include "../common/make_jagged_vector.hpp" + +// Google benchmark include(s). +#include + +// System include(s). +#include +#include + +namespace vecmem::sycl::benchmark { + +/// The (host) memory resource to use in the benchmark(s). +static host_memory_resource host_mr; +/// The (device) memory resource to use in the benchmark(s). +static device_memory_resource device_mr; +/// The copy object to use in the benchmark(s). +static copy sycl_copy; + +/// Function benchmarking "unknown" host-to-device jagged vector copies +void jaggedVectorUnknownHtoDCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source vector". + jagged_vector source = + vecmem::benchmark::make_jagged_vector(sizes, host_mr); + const data::jagged_vector_data source_data = get_data(source); + // Create the "destination buffer". + data::jagged_vector_buffer dest(sizes, device_mr, &host_mr); + sycl_copy.setup(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + sycl_copy(source_data, dest); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorUnknownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}}); + +/// Function benchmarking "known" host-to-device jagged vector copies +void jaggedVectorKnownHtoDCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source vector". + jagged_vector source = + vecmem::benchmark::make_jagged_vector(sizes, host_mr); + const data::jagged_vector_data source_data = get_data(source); + // Create the "destination buffer". + data::jagged_vector_buffer dest(sizes, device_mr, &host_mr); + sycl_copy.setup(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + sycl_copy(source_data, dest, copy::type::host_to_device); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorKnownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}}); + +/// Function benchmarking "unknown" device-to-host jagged vector copies +void jaggedVectorUnknownDtoHCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source buffer". + data::jagged_vector_buffer source(sizes, device_mr, &host_mr); + sycl_copy.setup(source); + // Create the "destination vector". + jagged_vector dest = + vecmem::benchmark::make_jagged_vector(sizes, host_mr); + data::jagged_vector_data dest_data = get_data(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + sycl_copy(source, dest_data); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorUnknownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}}); + +/// Function benchmarking "known" device-to-host jagged vector copies +void jaggedVectorKnownDtoHCopy(::benchmark::State& state) { + + // Generate the sizes of the jagged vector/buffer for the test. + const std::vector sizes = + vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1)); + + // Set custom "counters" for the benchmark. + const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(), + static_cast(0u)) * + sizeof(int); + state.counters["Bytes"] = static_cast(bytes); + state.counters["Rate"] = + ::benchmark::Counter(static_cast(bytes), + ::benchmark::Counter::kIsIterationInvariantRate, + ::benchmark::Counter::kIs1024); + + // Create the "source buffer". + data::jagged_vector_buffer source(sizes, device_mr, &host_mr); + sycl_copy.setup(source); + // Create the "destination vector". + jagged_vector dest = + vecmem::benchmark::make_jagged_vector(sizes, host_mr); + data::jagged_vector_data dest_data = get_data(dest); + + // Perform the copy benchmark. + for (auto _ : state) { + sycl_copy(source, dest_data, copy::type::device_to_host); + } +} +// Set up the benchmark. +BENCHMARK(jaggedVectorKnownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}}); + +} // namespace vecmem::sycl::benchmark diff --git a/benchmarks/sycl/benchmark_sycl.cpp b/benchmarks/sycl/benchmark_sycl.cpp new file mode 100644 index 00000000..846b57e5 --- /dev/null +++ b/benchmarks/sycl/benchmark_sycl.cpp @@ -0,0 +1,41 @@ +/** VecMem project, part of the ACTS project (R&D line) + * + * (c) 2021-2022 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// VecMem include(s). +#include +#include +#include + +// Google benchmark include(s). +#include + +static vecmem::sycl::device_memory_resource device_mr; +void BenchmarkSYCLDevice(benchmark::State& state) { + for (auto _ : state) { + void* p = device_mr.allocate(state.range(0)); + device_mr.deallocate(p, state.range(0)); + } +} +BENCHMARK(BenchmarkSYCLDevice)->RangeMultiplier(2)->Range(1, 2UL << 31); + +static vecmem::sycl::host_memory_resource host_mr; +void BenchmarkSYCLHost(benchmark::State& state) { + for (auto _ : state) { + void* p = host_mr.allocate(state.range(0)); + host_mr.deallocate(p, state.range(0)); + } +} +BENCHMARK(BenchmarkSYCLHost)->RangeMultiplier(2)->Range(1, 2UL << 31); + +static vecmem::sycl::shared_memory_resource shared_mr; +void BenchmarkSYCLShared(benchmark::State& state) { + for (auto _ : state) { + void* p = shared_mr.allocate(state.range(0)); + shared_mr.deallocate(p, state.range(0)); + } +} +BENCHMARK(BenchmarkSYCLShared)->RangeMultiplier(2)->Range(1, 2UL << 31);