From d9757a67a538880f67465ff79c99817c8897ea02 Mon Sep 17 00:00:00 2001
From: Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>
Date: Mon, 30 May 2022 16:13:27 +0200
Subject: [PATCH 1/3] Introduced a set of benchmarks for jagged vector copying.

---
 benchmarks/CMakeLists.txt                |  7 ++++
 benchmarks/common/make_jagged_vector.cpp | 38 +++++++++++++++++++
 benchmarks/common/make_jagged_vector.hpp | 35 +++++++++++++++++
 benchmarks/core/CMakeLists.txt           |  5 ++-
 benchmarks/core/benchmark_copy.cpp       | 48 ++++++++++++++++++++++++
 benchmarks/cuda/CMakeLists.txt           |  5 ++-
 benchmarks/cuda/benchmark_copy.cpp       | 48 ++++++++++++++++++++++++
 7 files changed, 184 insertions(+), 2 deletions(-)
 create mode 100644 benchmarks/common/make_jagged_vector.cpp
 create mode 100644 benchmarks/common/make_jagged_vector.hpp
 create mode 100644 benchmarks/core/benchmark_copy.cpp
 create mode 100644 benchmarks/cuda/benchmark_copy.cpp
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index 0ed4aced..e981358d 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -23,6 +23,13 @@ if(VECMEM_SETUP_GOOGLEBENCHMARK)
    endif()
 endif()
 
+# Build a common, helper library.
+add_library( vecmem_benchmark_common STATIC
+   "common/make_jagged_vector.hpp"
+   "common/make_jagged_vector.cpp" )
+target_link_libraries( vecmem_benchmark_common
+   PUBLIC vecmem::core )
+
 # Include the library specific tests.
 add_subdirectory(core)
 if(VECMEM_BUILD_CUDA_LIBRARY)
diff --git a/benchmarks/common/make_jagged_vector.cpp b/benchmarks/common/make_jagged_vector.cpp
new file mode 100644
index 00000000..52d8252b
--- /dev/null
+++ b/benchmarks/common/make_jagged_vector.cpp
@@ -0,0 +1,38 @@
+/*
+ * VecMem project, part of the ACTS project (R&D line)
+ *
+ * (c) 2022 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+// Local include(s).
+#include "make_jagged_vector.hpp"
+
+// System include(s).
+#include <random>
+
+namespace vecmem::benchmark {
+
+jagged_vector<int> make_jagged_vector(std::size_t outerSize,
+                                      std::size_t maxInnerSize,
+                                      memory_resource& mr) {
+
+    // Create the result object.
+    jagged_vector<int> result(&mr);
+    result.reserve(outerSize);
+
+    // Set up a simple random number generator for the inner vector sizes.
+    std::default_random_engine eng;
+    std::uniform_int_distribution<std::size_t> gen(0, maxInnerSize);
+
+    // Set up each of its inner vectors.
+    for (std::size_t i = 0; i < outerSize; ++i) {
+        result.push_back(jagged_vector<int>::value_type(gen(eng), &mr));
+    }
+
+    // Return the vector.
+    return result;
+}
+
+}  // namespace vecmem::benchmark
diff --git a/benchmarks/common/make_jagged_vector.hpp b/benchmarks/common/make_jagged_vector.hpp
new file mode 100644
index 00000000..ad0df5c1
--- /dev/null
+++ b/benchmarks/common/make_jagged_vector.hpp
@@ -0,0 +1,35 @@
+/*
+ * VecMem project, part of the ACTS project (R&D line)
+ *
+ * (c) 2022 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+#pragma once
+
+// VecMem include(s).
+#include <vecmem/containers/jagged_vector.hpp>
+#include <vecmem/memory/memory_resource.hpp>
+
+// System include(s).
+#include <cstddef>
+
+namespace vecmem::benchmark {
+
+/// Function creating a jagged vector with some general size specifications
+///
+/// It creates a jagged vector with a fixed "outer size", and random sized
+/// "inner vectors" that would not be larger than some specified value.
+///
+/// @param outerSize The fixed "outer size" of the resulting vector
+/// @param maxInnerSize The maximum for the random "inner sizes" of the
+///                     resulting vector
+/// @param mr The memory resource to use
+/// @return A jagged vector with the specifier properties
+///
+jagged_vector<int> make_jagged_vector(std::size_t outerSize,
+                                      std::size_t maxInnerSize,
+                                      memory_resource& mr);
+
+}  // namespace vecmem::benchmark
diff --git a/benchmarks/core/CMakeLists.txt b/benchmarks/core/CMakeLists.txt
index 4dd526a4..dcf716c2 100644
--- a/benchmarks/core/CMakeLists.txt
+++ b/benchmarks/core/CMakeLists.txt
@@ -8,13 +8,16 @@
 include( vecmem-compiler-options-cpp )
 
 # Set up the benchmark(s) for the core library.
-add_executable(vecmem_benchmark_core "benchmark_core.cpp")
+add_executable( vecmem_benchmark_core
+    "benchmark_core.cpp"
+    "benchmark_copy.cpp" )
 
 target_link_libraries(
     vecmem_benchmark_core
 
     PRIVATE
     vecmem::core
+    vecmem_benchmark_common
     benchmark::benchmark
     benchmark::benchmark_main
 )
diff --git a/benchmarks/core/benchmark_copy.cpp b/benchmarks/core/benchmark_copy.cpp
new file mode 100644
index 00000000..a29aa82d
--- /dev/null
+++ b/benchmarks/core/benchmark_copy.cpp
@@ -0,0 +1,48 @@
+/*
+ * VecMem project, part of the ACTS project (R&D line)
+ *
+ * (c) 2022 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+// VecMem include(s).
+#include <vecmem/memory/host_memory_resource.hpp>
+#include <vecmem/utils/copy.hpp>
+
+// Common benchmark include(s).
+#include "../common/make_jagged_vector.hpp"
+
+// Google benchmark include(s).
+#include <benchmark/benchmark.h>
+
+// System include(s).
+#include <vector>
+
+namespace vecmem::benchmark {
+
+/// The (host) memory resource to use in the benchmark(s).
+static host_memory_resource host_mr;
+/// The copy object to use in the benchmark(s).
+static copy host_copy;
+
+/// Function benchmarking the @c vecmem::copy jagged vector operations
+void jaggedVectorHostCopy(::benchmark::State& state) {
+
+    // Create the "source vector".
+    jagged_vector<int> source =
+        make_jagged_vector(state.range(0), state.range(1), host_mr);
+    const data::jagged_vector_data<int> source_data = get_data(source);
+    // Create the "destination vector".
+    jagged_vector<int> dest;
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        dest.clear();
+        host_copy(source_data, dest);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorHostCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+}  // namespace vecmem::benchmark
diff --git a/benchmarks/cuda/CMakeLists.txt b/benchmarks/cuda/CMakeLists.txt
index ef7fc9c6..0b74f5e1 100644
--- a/benchmarks/cuda/CMakeLists.txt
+++ b/benchmarks/cuda/CMakeLists.txt
@@ -9,13 +9,16 @@ include( vecmem-compiler-options-cpp )
 include( vecmem-compiler-options-cuda )
 
 # Set up the benchmark(s) for the CUDA library.
-add_executable(vecmem_benchmark_cuda "benchmark_cuda.cpp")
+add_executable( vecmem_benchmark_cuda
+    "benchmark_cuda.cpp"
+    "benchmark_copy.cpp" )
 
 target_link_libraries(
     vecmem_benchmark_cuda
 
     PRIVATE
     vecmem::cuda
+    vecmem_benchmark_common
     benchmark::benchmark
     benchmark::benchmark_main
 )
diff --git a/benchmarks/cuda/benchmark_copy.cpp b/benchmarks/cuda/benchmark_copy.cpp
new file mode 100644
index 00000000..3beac4ac
--- /dev/null
+++ b/benchmarks/cuda/benchmark_copy.cpp
@@ -0,0 +1,48 @@
+/*
+ * VecMem project, part of the ACTS project (R&D line)
+ *
+ * (c) 2022 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+// VecMem include(s).
+#include <vecmem/memory/cuda/managed_memory_resource.hpp>
+#include <vecmem/utils/cuda/copy.hpp>
+
+// Common benchmark include(s).
+#include "../common/make_jagged_vector.hpp"
+
+// Google benchmark include(s).
+#include <benchmark/benchmark.h>
+
+// System include(s).
+#include <vector>
+
+namespace vecmem::cuda::benchmark {
+
+/// The (managed) memory resource to use in the benchmark(s).
+static managed_memory_resource managed_mr;
+/// The copy object to use in the benchmark(s).
+static copy cuda_copy;
+
+/// Function benchmarking the @c vecmem::cuda::copy jagged vector operations
+void jaggedVectorUnknownCopy(::benchmark::State& state) {
+
+    // Create the "source vector".
+    jagged_vector<int> source = vecmem::benchmark::make_jagged_vector(
+        state.range(0), state.range(1), managed_mr);
+    const data::jagged_vector_data<int> source_data = get_data(source);
+    // Create the "destination vector".
+    jagged_vector<int> dest;
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        dest.clear();
+        cuda_copy(source_data, dest);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorUnknownCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+}  // namespace vecmem::cuda::benchmark

From b6207e028ed073d8b67a4867da644ecee7b7cda1 Mon Sep 17 00:00:00 2001
From: Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>
Date: Mon, 30 May 2022 16:47:16 +0200
Subject: [PATCH 2/3] Implemented HtoD and DtoH jagged vector copies
 differently.

Now if the user asks for HtoD or DtoH copies explicitly, vecmem::copy
coalesces/distributes all memory on the host with HtoH copies, while
only doing a single HtoD or DtoH copy.
---
 core/include/vecmem/utils/copy.hpp      | 10 ++-
 core/include/vecmem/utils/impl/copy.ipp | 95 +++++++++++++++++++++----
 2 files changed, 91 insertions(+), 14 deletions(-)

diff --git a/core/include/vecmem/utils/copy.hpp b/core/include/vecmem/utils/copy.hpp
index 4b8b5895..472d170d 100644
--- a/core/include/vecmem/utils/copy.hpp
+++ b/core/include/vecmem/utils/copy.hpp
@@ -189,8 +189,14 @@ class VECMEM_CORE_EXPORT copy {
                      int value);
     /// Helper function performing the copy of a jagged array/vector
     template <typename TYPE1, typename TYPE2>
-    void copy_views(std::size_t size, const data::vector_view<TYPE1>* from,
-                    data::vector_view<TYPE2>* to, type::copy_type cptype);
+    void copy_views_impl1(std::size_t size,
+                          const data::vector_view<TYPE1>* from,
+                          data::vector_view<TYPE2>* to, type::copy_type cptype);
+    /// Helper function performing the copy of a jagged array/vector
+    template <typename TYPE1, typename TYPE2>
+    void copy_views_impl2(std::size_t size,
+                          const data::vector_view<TYPE1>* from,
+                          data::vector_view<TYPE2>* to, type::copy_type cptype);
     /// Helper function for getting the sizes of a jagged vector/buffer
     template <typename TYPE>
     std::vector<typename data::vector_view<TYPE>::size_type> get_sizes(
diff --git a/core/include/vecmem/utils/impl/copy.ipp b/core/include/vecmem/utils/impl/copy.ipp
index 553db980..972270c4 100644
--- a/core/include/vecmem/utils/impl/copy.ipp
+++ b/core/include/vecmem/utils/impl/copy.ipp
@@ -9,10 +9,12 @@
 
 // VecMem include(s).
 #include "vecmem/containers/jagged_vector.hpp"
+#include "vecmem/memory/host_memory_resource.hpp"
 #include "vecmem/utils/debug.hpp"
 #include "vecmem/utils/type_traits.hpp"
 
 // System include(s).
+#include <algorithm>
 #include <cassert>
 
 namespace vecmem {
@@ -194,7 +196,7 @@ data::jagged_vector_buffer<std::remove_cv_t<TYPE>> copy::to(
     setup(result);
 
     // Copy the payload of the inner vectors.
-    copy_views(data.m_size, data.m_ptr, result.host_ptr(), cptype);
+    copy_views_impl1(data.m_size, data.m_ptr, result.host_ptr(), cptype);
 
     // Return the newly created object.
     return result;
@@ -214,7 +216,7 @@ data::jagged_vector_buffer<std::remove_cv_t<TYPE>> copy::to(
     setup(result);
 
     // Copy the payload of the inner vectors.
-    copy_views(data.m_size, data.host_ptr(), result.host_ptr(), cptype);
+    copy_views_impl1(data.m_size, data.host_ptr(), result.host_ptr(), cptype);
 
     // Return the newly created object.
     return result;
@@ -235,7 +237,7 @@ void copy::operator()(const data::jagged_vector_view<TYPE1>& from_view,
     assert(from_view.m_size == to_view.m_size);
 
     // Copy the payload of the inner vectors.
-    copy_views(from_view.m_size, from_view.m_ptr, to_view.m_ptr, cptype);
+    copy_views_impl1(from_view.m_size, from_view.m_ptr, to_view.m_ptr, cptype);
 }
 
 template <typename TYPE1, typename TYPE2>
@@ -253,7 +255,8 @@ void copy::operator()(const data::jagged_vector_view<TYPE1>& from_view,
     assert(from_view.m_size == to_buffer.m_size);
 
     // Copy the payload of the inner vectors.
-    copy_views(from_view.m_size, from_view.m_ptr, to_buffer.host_ptr(), cptype);
+    copy_views_impl1(from_view.m_size, from_view.m_ptr, to_buffer.host_ptr(),
+                     cptype);
 }
 
 template <typename TYPE1, typename TYPE2>
@@ -271,8 +274,8 @@ void copy::operator()(const data::jagged_vector_buffer<TYPE1>& from_buffer,
     assert(from_buffer.m_size == to_view.m_size);
 
     // Copy the payload of the inner vectors.
-    copy_views(from_buffer.m_size, from_buffer.host_ptr(), to_view.m_ptr,
-               cptype);
+    copy_views_impl1(from_buffer.m_size, from_buffer.host_ptr(), to_view.m_ptr,
+                     cptype);
 }
 
 template <typename TYPE1, typename TYPE2>
@@ -290,8 +293,8 @@ void copy::operator()(const data::jagged_vector_buffer<TYPE1>& from_buffer,
     assert(from_buffer.m_size == to_buffer.m_size);
 
     // Copy the payload of the inner vectors.
-    copy_views(from_buffer.m_size, from_buffer.host_ptr(), to_buffer.host_ptr(),
-               cptype);
+    copy_views_impl1(from_buffer.m_size, from_buffer.host_ptr(),
+                     to_buffer.host_ptr(), cptype);
 }
 
 template <typename TYPE1, typename TYPE2, typename ALLOC1, typename ALLOC2>
@@ -369,10 +372,78 @@ void copy::memset_impl(std::size_t size, data::vector_view<TYPE>* data,
 }
 
 template <typename TYPE1, typename TYPE2>
-void copy::copy_views(std::size_t size,
-                      const data::vector_view<TYPE1>* from_view,
-                      data::vector_view<TYPE2>* to_view,
-                      type::copy_type cptype) {
+void copy::copy_views_impl1(std::size_t size,
+                            const data::vector_view<TYPE1>* from_view,
+                            data::vector_view<TYPE2>* to_view,
+                            type::copy_type cptype) {
+
+    // The input and output types are allowed to be different, but only by
+    // const-ness.
+    static_assert(std::is_same<TYPE1, TYPE2>::value ||
+                      details::is_same_nc<TYPE1, TYPE2>::value,
+                  "Can only use compatible types in the copy");
+
+    // Check if anything needs to be done.
+    if (size == 0) {
+        return;
+    }
+
+    // Helper lambda for figuring out if a set of views is contiguous in
+    // memory.
+    auto is_contiguous = [size](const auto* views) {
+        auto ptr = views[0].ptr();
+        for (std::size_t i = 0; i < size; ++i) {
+            if ((ptr + views[i - 1].capacity()) != views[i].ptr()) {
+                return false;
+            }
+            ptr = views[i].ptr();
+        }
+        return true;
+    };
+
+    /// Helper (host) memory resource
+    static host_memory_resource host_mr;
+    /// Helper (host) copy object
+    static copy host_copy;
+
+    // Deal with different types of memory configurations.
+    if ((cptype == type::host_to_device) &&
+        (is_contiguous(from_view) == false) &&
+        (is_contiguous(to_view) == true)) {
+        // Create a contiguous buffer in host memory with the appropriate
+        // capacities.
+        std::vector<std::size_t> sizes(size);
+        std::transform(from_view, from_view + size, sizes.begin(),
+                       [](const auto& view) { return view.capacity(); });
+        data::jagged_vector_buffer<TYPE2> buffer(sizes, host_mr);
+        // Collect the data into this buffer with host-to-host memory copies.
+        host_copy.copy_views_impl2(size, from_view, buffer.host_ptr(), cptype);
+        // Now perform the host-to-device copy in one go.
+        copy_views_impl2(size, buffer.host_ptr(), to_view, cptype);
+    } else if ((cptype == type::device_to_host) &&
+               (is_contiguous(from_view) == true) &&
+               (is_contiguous(to_view) == false)) {
+        // Create a contiguous buffer in host memory with the appropriate
+        // capacities.
+        std::vector<std::size_t> sizes(size);
+        std::transform(from_view, from_view + size, sizes.begin(),
+                       [](const auto& view) { return view.capacity(); });
+        data::jagged_vector_buffer<TYPE2> buffer(sizes, host_mr);
+        // Perform the device-to-host copy into this contiguous buffer.
+        copy_views_impl2(size, from_view, buffer.host_ptr(), cptype);
+        // Now fill the host views with host-to-host memory copies.
+        host_copy.copy_views_impl2(size, buffer.host_ptr(), to_view, cptype);
+    } else {
+        // Do the copy as best as we can with the existing views.
+        copy_views_impl2(size, from_view, to_view, cptype);
+    }
+}
+
+template <typename TYPE1, typename TYPE2>
+void copy::copy_views_impl2(std::size_t size,
+                            const data::vector_view<TYPE1>* from_view,
+                            data::vector_view<TYPE2>* to_view,
+                            type::copy_type cptype) {
 
     // The input and output types are allowed to be different, but only by
     // const-ness.

From 8efbcc2e20eb981120db0e3906972bc773153b45 Mon Sep 17 00:00:00 2001
From: Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>
Date: Mon, 30 May 2022 16:55:52 +0200
Subject: [PATCH 3/3] Added benchmarks for HtoD and DtoH jagged vector copies.

While introducing SYCL benchmarks as well, including benchmarks for
SYCL memory allocations.
---
 benchmarks/CMakeLists.txt                |   5 +
 benchmarks/common/make_jagged_sizes.cpp  |  35 +++++
 benchmarks/common/make_jagged_sizes.hpp  |  30 ++++
 benchmarks/common/make_jagged_vector.cpp |  18 +--
 benchmarks/common/make_jagged_vector.hpp |   8 +-
 benchmarks/core/benchmark_copy.cpp       | 127 +++++++++++++++--
 benchmarks/cuda/benchmark_copy.cpp       | 140 +++++++++++++++++--
 benchmarks/sycl/CMakeLists.txt           |  18 +++
 benchmarks/sycl/benchmark_copy.cpp       | 166 +++++++++++++++++++++++
 benchmarks/sycl/benchmark_sycl.cpp       |  41 ++++++
 10 files changed, 550 insertions(+), 38 deletions(-)
 create mode 100644 benchmarks/common/make_jagged_sizes.cpp
 create mode 100644 benchmarks/common/make_jagged_sizes.hpp
 create mode 100644 benchmarks/sycl/CMakeLists.txt
 create mode 100644 benchmarks/sycl/benchmark_copy.cpp
 create mode 100644 benchmarks/sycl/benchmark_sycl.cpp

diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index e981358d..20b1c554 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -25,6 +25,8 @@ endif()
 
 # Build a common, helper library.
 add_library( vecmem_benchmark_common STATIC
+   "common/make_jagged_sizes.hpp"
+   "common/make_jagged_sizes.cpp"
    "common/make_jagged_vector.hpp"
    "common/make_jagged_vector.cpp" )
 target_link_libraries( vecmem_benchmark_common
@@ -35,3 +37,6 @@ add_subdirectory(core)
 if(VECMEM_BUILD_CUDA_LIBRARY)
    add_subdirectory(cuda)
 endif()
+if(VECMEM_BUILD_SYCL_LIBRARY)
+   add_subdirectory(sycl)
+endif()
diff --git a/benchmarks/common/make_jagged_sizes.cpp b/benchmarks/common/make_jagged_sizes.cpp
new file mode 100644
index 00000000..16f87373
--- /dev/null
+++ b/benchmarks/common/make_jagged_sizes.cpp
@@ -0,0 +1,35 @@
+/*
+ * VecMem project, part of the ACTS project (R&D line)
+ *
+ * (c) 2022 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+// Local include(s).
+#include "make_jagged_sizes.hpp"
+
+// System include(s).
+#include <algorithm>
+#include <random>
+
+namespace vecmem::benchmark {
+
+std::vector<std::size_t> make_jagged_sizes(std::size_t outerSize,
+                                           std::size_t maxInnerSize) {
+
+    // Set up a simple random number generator for the inner vector sizes.
+    std::default_random_engine eng;
+    eng.seed(outerSize + maxInnerSize);
+    std::uniform_int_distribution<std::size_t> gen(0, maxInnerSize);
+
+    // Generate the result vector.
+    std::vector<std::size_t> result(outerSize);
+    std::generate(result.begin(), result.end(),
+                  [&eng, &gen]() { return gen(eng); });
+
+    // Give it to the user.
+    return result;
+}
+
+}  // namespace vecmem::benchmark
diff --git a/benchmarks/common/make_jagged_sizes.hpp b/benchmarks/common/make_jagged_sizes.hpp
new file mode 100644
index 00000000..34a4065d
--- /dev/null
+++ b/benchmarks/common/make_jagged_sizes.hpp
@@ -0,0 +1,30 @@
+/*
+ * VecMem project, part of the ACTS project (R&D line)
+ *
+ * (c) 2022 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+#pragma once
+
+// System include(s).
+#include <cstddef>
+#include <vector>
+
+namespace vecmem::benchmark {
+
+/// Helper function for generating the sizes for a jagged vector (buffer)
+///
+/// It implements a pretty simple thing, but since this is used in multiple
+/// places, it made sense to put it into a central location.
+///
+/// @param outerSize The fixed "outer size" of the jagged vector (buffer)
+/// @param maxInnerSize The maximum for the random "inner sizes" of the
+///                     resulting vector (buffer)
+/// @return A vector of sizes corresponding to the received parameters
+///
+std::vector<std::size_t> make_jagged_sizes(std::size_t outerSize,
+                                           std::size_t maxInnerSize);
+
+}  // namespace vecmem::benchmark
diff --git a/benchmarks/common/make_jagged_vector.cpp b/benchmarks/common/make_jagged_vector.cpp
index 52d8252b..3d4c1e59 100644
--- a/benchmarks/common/make_jagged_vector.cpp
+++ b/benchmarks/common/make_jagged_vector.cpp
@@ -9,26 +9,16 @@
 // Local include(s).
 #include "make_jagged_vector.hpp"
 
-// System include(s).
-#include <random>
-
 namespace vecmem::benchmark {
 
-jagged_vector<int> make_jagged_vector(std::size_t outerSize,
-                                      std::size_t maxInnerSize,
+jagged_vector<int> make_jagged_vector(const std::vector<std::size_t>& sizes,
                                       memory_resource& mr) {
 
     // Create the result object.
     jagged_vector<int> result(&mr);
-    result.reserve(outerSize);
-
-    // Set up a simple random number generator for the inner vector sizes.
-    std::default_random_engine eng;
-    std::uniform_int_distribution<std::size_t> gen(0, maxInnerSize);
-
-    // Set up each of its inner vectors.
-    for (std::size_t i = 0; i < outerSize; ++i) {
-        result.push_back(jagged_vector<int>::value_type(gen(eng), &mr));
+    result.reserve(sizes.size());
+    for (std::size_t size : sizes) {
+        result.push_back(jagged_vector<int>::value_type(size, &mr));
     }
 
     // Return the vector.
diff --git a/benchmarks/common/make_jagged_vector.hpp b/benchmarks/common/make_jagged_vector.hpp
index ad0df5c1..8ceaa1bd 100644
--- a/benchmarks/common/make_jagged_vector.hpp
+++ b/benchmarks/common/make_jagged_vector.hpp
@@ -14,6 +14,7 @@
 
 // System include(s).
 #include <cstddef>
+#include <vector>
 
 namespace vecmem::benchmark {
 
@@ -22,14 +23,11 @@ namespace vecmem::benchmark {
 /// It creates a jagged vector with a fixed "outer size", and random sized
 /// "inner vectors" that would not be larger than some specified value.
 ///
-/// @param outerSize The fixed "outer size" of the resulting vector
-/// @param maxInnerSize The maximum for the random "inner sizes" of the
-///                     resulting vector
+/// @param sizes The sizes of the vectors in the jagged vector
 /// @param mr The memory resource to use
 /// @return A jagged vector with the specifier properties
 ///
-jagged_vector<int> make_jagged_vector(std::size_t outerSize,
-                                      std::size_t maxInnerSize,
+jagged_vector<int> make_jagged_vector(const std::vector<std::size_t>& sizes,
                                       memory_resource& mr);
 
 }  // namespace vecmem::benchmark
diff --git a/benchmarks/core/benchmark_copy.cpp b/benchmarks/core/benchmark_copy.cpp
index a29aa82d..f544f702 100644
--- a/benchmarks/core/benchmark_copy.cpp
+++ b/benchmarks/core/benchmark_copy.cpp
@@ -11,12 +11,14 @@
 #include <vecmem/utils/copy.hpp>
 
 // Common benchmark include(s).
+#include "../common/make_jagged_sizes.hpp"
 #include "../common/make_jagged_vector.hpp"
 
 // Google benchmark include(s).
 #include <benchmark/benchmark.h>
 
 // System include(s).
+#include <numeric>
 #include <vector>
 
 namespace vecmem::benchmark {
@@ -26,23 +28,132 @@ static host_memory_resource host_mr;
 /// The copy object to use in the benchmark(s).
 static copy host_copy;
 
-/// Function benchmarking the @c vecmem::copy jagged vector operations
-void jaggedVectorHostCopy(::benchmark::State& state) {
+/// Function benchmarking "unknown" host-to-device jagged vector copies
+void jaggedVectorUnknownHtoDCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
 
     // Create the "source vector".
-    jagged_vector<int> source =
-        make_jagged_vector(state.range(0), state.range(1), host_mr);
+    jagged_vector<int> source = make_jagged_vector(sizes, host_mr);
     const data::jagged_vector_data<int> source_data = get_data(source);
-    // Create the "destination vector".
-    jagged_vector<int> dest;
+    // Create the "destination buffer".
+    data::jagged_vector_buffer<int> dest(sizes, host_mr);
+    host_copy.setup(dest);
 
     // Perform the copy benchmark.
     for (auto _ : state) {
-        dest.clear();
         host_copy(source_data, dest);
     }
 }
 // Set up the benchmark.
-BENCHMARK(jaggedVectorHostCopy)->Ranges({{10, 100000}, {50, 5000}});
+BENCHMARK(jaggedVectorUnknownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+/// Function benchmarking "known" host-to-device jagged vector copies
+void jaggedVectorKnownHtoDCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source vector".
+    jagged_vector<int> source = make_jagged_vector(sizes, host_mr);
+    const data::jagged_vector_data<int> source_data = get_data(source);
+    // Create the "destination buffer".
+    data::jagged_vector_buffer<int> dest(sizes, host_mr);
+    host_copy.setup(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        host_copy(source_data, dest, copy::type::host_to_device);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorKnownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+/// Function benchmarking "unknown" device-to-host jagged vector copies
+void jaggedVectorUnknownDtoHCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source buffer".
+    data::jagged_vector_buffer<int> source(sizes, host_mr);
+    host_copy.setup(source);
+    // Create the "destination vector".
+    jagged_vector<int> dest = make_jagged_vector(sizes, host_mr);
+    data::jagged_vector_data<int> dest_data = get_data(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        host_copy(source, dest_data);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorUnknownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+/// Function benchmarking "known" device-to-host jagged vector copies
+void jaggedVectorKnownDtoHCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source buffer".
+    data::jagged_vector_buffer<int> source(sizes, host_mr);
+    host_copy.setup(source);
+    // Create the "destination vector".
+    jagged_vector<int> dest = make_jagged_vector(sizes, host_mr);
+    data::jagged_vector_data<int> dest_data = get_data(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        host_copy(source, dest_data, copy::type::device_to_host);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorKnownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}});
 
 }  // namespace vecmem::benchmark
diff --git a/benchmarks/cuda/benchmark_copy.cpp b/benchmarks/cuda/benchmark_copy.cpp
index 3beac4ac..5913ae93 100644
--- a/benchmarks/cuda/benchmark_copy.cpp
+++ b/benchmarks/cuda/benchmark_copy.cpp
@@ -7,42 +7,160 @@
  */
 
 // VecMem include(s).
-#include <vecmem/memory/cuda/managed_memory_resource.hpp>
+#include <vecmem/memory/cuda/device_memory_resource.hpp>
+#include <vecmem/memory/host_memory_resource.hpp>
 #include <vecmem/utils/cuda/copy.hpp>
 
 // Common benchmark include(s).
+#include "../common/make_jagged_sizes.hpp"
 #include "../common/make_jagged_vector.hpp"
 
 // Google benchmark include(s).
 #include <benchmark/benchmark.h>
 
 // System include(s).
+#include <numeric>
 #include <vector>
 
 namespace vecmem::cuda::benchmark {
 
-/// The (managed) memory resource to use in the benchmark(s).
-static managed_memory_resource managed_mr;
+/// The (host) memory resource to use in the benchmark(s).
+static vecmem::host_memory_resource host_mr;
+/// The (device) memory resource to use in the benchmark(s).
+static device_memory_resource device_mr;
 /// The copy object to use in the benchmark(s).
 static copy cuda_copy;
 
-/// Function benchmarking the @c vecmem::cuda::copy jagged vector operations
-void jaggedVectorUnknownCopy(::benchmark::State& state) {
+/// Function benchmarking "unknown" host-to-device jagged vector copies
+void jaggedVectorUnknownHtoDCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
 
     // Create the "source vector".
-    jagged_vector<int> source = vecmem::benchmark::make_jagged_vector(
-        state.range(0), state.range(1), managed_mr);
+    jagged_vector<int> source =
+        vecmem::benchmark::make_jagged_vector(sizes, host_mr);
     const data::jagged_vector_data<int> source_data = get_data(source);
-    // Create the "destination vector".
-    jagged_vector<int> dest;
+    // Create the "destination buffer".
+    data::jagged_vector_buffer<int> dest(sizes, device_mr, &host_mr);
+    cuda_copy.setup(dest);
 
     // Perform the copy benchmark.
     for (auto _ : state) {
-        dest.clear();
         cuda_copy(source_data, dest);
     }
 }
 // Set up the benchmark.
-BENCHMARK(jaggedVectorUnknownCopy)->Ranges({{10, 100000}, {50, 5000}});
+BENCHMARK(jaggedVectorUnknownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+/// Function benchmarking "known" host-to-device jagged vector copies
+void jaggedVectorKnownHtoDCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source vector".
+    jagged_vector<int> source =
+        vecmem::benchmark::make_jagged_vector(sizes, host_mr);
+    const data::jagged_vector_data<int> source_data = get_data(source);
+    // Create the "destination buffer".
+    data::jagged_vector_buffer<int> dest(sizes, device_mr, &host_mr);
+    cuda_copy.setup(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        cuda_copy(source_data, dest, copy::type::host_to_device);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorKnownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+/// Function benchmarking "unknown" device-to-host jagged vector copies
+void jaggedVectorUnknownDtoHCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source buffer".
+    data::jagged_vector_buffer<int> source(sizes, device_mr, &host_mr);
+    cuda_copy.setup(source);
+    // Create the "destination vector".
+    jagged_vector<int> dest =
+        vecmem::benchmark::make_jagged_vector(sizes, host_mr);
+    data::jagged_vector_data<int> dest_data = get_data(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        cuda_copy(source, dest_data);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorUnknownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+/// Function benchmarking "known" device-to-host jagged vector copies
+void jaggedVectorKnownDtoHCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source buffer".
+    data::jagged_vector_buffer<int> source(sizes, device_mr, &host_mr);
+    cuda_copy.setup(source);
+    // Create the "destination vector".
+    jagged_vector<int> dest =
+        vecmem::benchmark::make_jagged_vector(sizes, host_mr);
+    data::jagged_vector_data<int> dest_data = get_data(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        cuda_copy(source, dest_data, copy::type::device_to_host);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorKnownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}});
 
 }  // namespace vecmem::cuda::benchmark
diff --git a/benchmarks/sycl/CMakeLists.txt b/benchmarks/sycl/CMakeLists.txt
new file mode 100644
index 00000000..cb9d395b
--- /dev/null
+++ b/benchmarks/sycl/CMakeLists.txt
@@ -0,0 +1,18 @@
+# VecMem project, part of the ACTS project (R&D line)
+#
+# (c) 2022 CERN for the benefit of the ACTS project
+#
+# Mozilla Public License Version 2.0
+
+# Project include(s).
+include( vecmem-compiler-options-cpp )
+include( vecmem-compiler-options-sycl )
+
+# Set up the benchmark(s) for the SYCL library.
+add_executable( vecmem_benchmark_sycl
+    "benchmark_sycl.cpp"
+    "benchmark_copy.cpp" )
+target_link_libraries( vecmem_benchmark_sycl
+    PRIVATE vecmem::sycl vecmem_benchmark_common
+            benchmark::benchmark benchmark::benchmark_main
+)
diff --git a/benchmarks/sycl/benchmark_copy.cpp b/benchmarks/sycl/benchmark_copy.cpp
new file mode 100644
index 00000000..849fe5b5
--- /dev/null
+++ b/benchmarks/sycl/benchmark_copy.cpp
@@ -0,0 +1,166 @@
+/*
+ * VecMem project, part of the ACTS project (R&D line)
+ *
+ * (c) 2022 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+// VecMem include(s).
+#include <vecmem/memory/sycl/device_memory_resource.hpp>
+#include <vecmem/memory/sycl/host_memory_resource.hpp>
+#include <vecmem/utils/sycl/copy.hpp>
+
+// Common benchmark include(s).
+#include "../common/make_jagged_sizes.hpp"
+#include "../common/make_jagged_vector.hpp"
+
+// Google benchmark include(s).
+#include <benchmark/benchmark.h>
+
+// System include(s).
+#include <numeric>
+#include <vector>
+
+namespace vecmem::sycl::benchmark {
+
+/// The (host) memory resource to use in the benchmark(s).
+static host_memory_resource host_mr;
+/// The (device) memory resource to use in the benchmark(s).
+static device_memory_resource device_mr;
+/// The copy object to use in the benchmark(s).
+static copy sycl_copy;
+
+/// Function benchmarking "unknown" host-to-device jagged vector copies
+void jaggedVectorUnknownHtoDCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source vector".
+    jagged_vector<int> source =
+        vecmem::benchmark::make_jagged_vector(sizes, host_mr);
+    const data::jagged_vector_data<int> source_data = get_data(source);
+    // Create the "destination buffer".
+    data::jagged_vector_buffer<int> dest(sizes, device_mr, &host_mr);
+    sycl_copy.setup(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        sycl_copy(source_data, dest);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorUnknownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+/// Function benchmarking "known" host-to-device jagged vector copies
+void jaggedVectorKnownHtoDCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source vector".
+    jagged_vector<int> source =
+        vecmem::benchmark::make_jagged_vector(sizes, host_mr);
+    const data::jagged_vector_data<int> source_data = get_data(source);
+    // Create the "destination buffer".
+    data::jagged_vector_buffer<int> dest(sizes, device_mr, &host_mr);
+    sycl_copy.setup(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        sycl_copy(source_data, dest, copy::type::host_to_device);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorKnownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+/// Function benchmarking "unknown" device-to-host jagged vector copies
+void jaggedVectorUnknownDtoHCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source buffer".
+    data::jagged_vector_buffer<int> source(sizes, device_mr, &host_mr);
+    sycl_copy.setup(source);
+    // Create the "destination vector".
+    jagged_vector<int> dest =
+        vecmem::benchmark::make_jagged_vector(sizes, host_mr);
+    data::jagged_vector_data<int> dest_data = get_data(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        sycl_copy(source, dest_data);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorUnknownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+/// Function benchmarking "known" device-to-host jagged vector copies
+void jaggedVectorKnownDtoHCopy(::benchmark::State& state) {
+
+    // Generate the sizes of the jagged vector/buffer for the test.
+    const std::vector<std::size_t> sizes =
+        vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));
+
+    // Set custom "counters" for the benchmark.
+    const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
+                                              static_cast<std::size_t>(0u)) *
+                              sizeof(int);
+    state.counters["Bytes"] = static_cast<double>(bytes);
+    state.counters["Rate"] =
+        ::benchmark::Counter(static_cast<double>(bytes),
+                             ::benchmark::Counter::kIsIterationInvariantRate,
+                             ::benchmark::Counter::kIs1024);
+
+    // Create the "source buffer".
+    data::jagged_vector_buffer<int> source(sizes, device_mr, &host_mr);
+    sycl_copy.setup(source);
+    // Create the "destination vector".
+    jagged_vector<int> dest =
+        vecmem::benchmark::make_jagged_vector(sizes, host_mr);
+    data::jagged_vector_data<int> dest_data = get_data(dest);
+
+    // Perform the copy benchmark.
+    for (auto _ : state) {
+        sycl_copy(source, dest_data, copy::type::device_to_host);
+    }
+}
+// Set up the benchmark.
+BENCHMARK(jaggedVectorKnownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}});
+
+}  // namespace vecmem::sycl::benchmark
diff --git a/benchmarks/sycl/benchmark_sycl.cpp b/benchmarks/sycl/benchmark_sycl.cpp
new file mode 100644
index 00000000..846b57e5
--- /dev/null
+++ b/benchmarks/sycl/benchmark_sycl.cpp
@@ -0,0 +1,41 @@
+/** VecMem project, part of the ACTS project (R&D line)
+ *
+ * (c) 2021-2022 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+// VecMem include(s).
+#include <vecmem/memory/sycl/device_memory_resource.hpp>
+#include <vecmem/memory/sycl/host_memory_resource.hpp>
+#include <vecmem/memory/sycl/shared_memory_resource.hpp>
+
+// Google benchmark include(s).
+#include <benchmark/benchmark.h>
+
+static vecmem::sycl::device_memory_resource device_mr;
+void BenchmarkSYCLDevice(benchmark::State& state) {
+    for (auto _ : state) {
+        void* p = device_mr.allocate(state.range(0));
+        device_mr.deallocate(p, state.range(0));
+    }
+}
+BENCHMARK(BenchmarkSYCLDevice)->RangeMultiplier(2)->Range(1, 2UL << 31);
+
+static vecmem::sycl::host_memory_resource host_mr;
+void BenchmarkSYCLHost(benchmark::State& state) {
+    for (auto _ : state) {
+        void* p = host_mr.allocate(state.range(0));
+        host_mr.deallocate(p, state.range(0));
+    }
+}
+BENCHMARK(BenchmarkSYCLHost)->RangeMultiplier(2)->Range(1, 2UL << 31);
+
+static vecmem::sycl::shared_memory_resource shared_mr;
+void BenchmarkSYCLShared(benchmark::State& state) {
+    for (auto _ : state) {
+        void* p = shared_mr.allocate(state.range(0));
+        shared_mr.deallocate(p, state.range(0));
+    }
+}
+BENCHMARK(BenchmarkSYCLShared)->RangeMultiplier(2)->Range(1, 2UL << 31);