acts-project · niermann999 · Feb 7, 2025 · Sep 24, 2024 · Sep 24, 2024
diff --git a/benchmarks/common/benchmarks/toy_detector_benchmark.hpp b/benchmarks/common/benchmarks/toy_detector_benchmark.hpp
@@ -58,8 +58,7 @@ class ToyDetectorBenchmark : public benchmark::Fixture {
 
     static constexpr std::array<float, 2> phi_range{
         -traccc::constant<float>::pi, traccc::constant<float>::pi};
-    static constexpr std::array<float, 2> theta_range{
-        0.f, traccc::constant<float>::pi};
+    static constexpr std::array<float, 2> eta_range{-3, 3};
     static constexpr std::array<float, 2> mom_range{
         10.f * traccc::unit<float>::GeV, 100.f * traccc::unit<float>::GeV};
 
@@ -82,6 +81,10 @@ class ToyDetectorBenchmark : public benchmark::Fixture {
                      "the simulation data."
                   << std::endl;
 
+        // Apply correct propagation config
+        apply_propagation_config(finding_cfg.propagation);
+        apply_propagation_config(fitting_cfg.propagation);
+
         // Use deterministic random number generator for testing
         using uniform_gen_t = detray::detail::random_numbers<
             traccc::scalar, std::uniform_real_distribution<traccc::scalar>>;
@@ -101,7 +104,7 @@ class ToyDetectorBenchmark : public benchmark::Fixture {
         generator_type::configuration gen_cfg{};
         gen_cfg.n_tracks(n_tracks);
         gen_cfg.phi_range(phi_range);
-        gen_cfg.theta_range(theta_range);
+        gen_cfg.eta_range(eta_range);
         gen_cfg.mom_range(mom_range);
         generator_type generator(gen_cfg);
 
@@ -127,6 +130,8 @@ class ToyDetectorBenchmark : public benchmark::Fixture {
             detray::muon<traccc::scalar>(), n_events, det, field,
             std::move(generator), std::move(smearer_writer_cfg), full_path);
 
+        // Same propagation configuration for sim and reco
+        apply_propagation_config(sim.get_config().propagation);
         // Set constrained step size to 1 mm
         sim.get_config().propagation.stepping.step_constraint =
             1.f * detray::unit<float>::mm;
@@ -155,6 +160,15 @@ class ToyDetectorBenchmark : public benchmark::Fixture {
         return toy_cfg;
     }
 
+    void apply_propagation_config(detray::propagation::config& cfg) const {
+        // Configure the propagation for the toy detector
+        cfg.navigation.search_window = {3, 3};
+        cfg.navigation.overstep_tolerance = -300.f * detray::unit<float>::um;
+        cfg.navigation.min_mask_tolerance = 1e-5f * detray::unit<float>::mm;
+        cfg.navigation.max_mask_tolerance = 3.f * detray::unit<float>::mm;
+        cfg.navigation.mask_tolerance_scalor = 0.05f;
+    }
+
     void SetUp(::benchmark::State& /*state*/) {
 
         // Read events

diff --git a/benchmarks/cpu/toy_detector_cpu.cpp b/benchmarks/cpu/toy_detector_cpu.cpp
@@ -62,7 +62,7 @@ BENCHMARK_F(ToyDetectorBenchmark, CPU)(benchmark::State& state) {
     for (auto _ : state) {
 
 // Iterate over events
-#pragma omp parallel for
+#pragma omp parallel for schedule(dynamic)
         for (unsigned int i_evt = 0; i_evt < n_events; i_evt++) {
 
             auto& spacepoints_per_event = spacepoints[i_evt];

diff --git a/benchmarks/cuda/toy_detector_cuda.cpp b/benchmarks/cuda/toy_detector_cuda.cpp
@@ -32,7 +32,6 @@
 // VecMem include(s).
 #include <vecmem/memory/cuda/device_memory_resource.hpp>
 #include <vecmem/memory/cuda/host_memory_resource.hpp>
-#include <vecmem/memory/cuda/managed_memory_resource.hpp>
 #include <vecmem/memory/host_memory_resource.hpp>
 #include <vecmem/utils/cuda/async_copy.hpp>
 #include <vecmem/utils/cuda/copy.hpp>
@@ -56,7 +55,6 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
     vecmem::cuda::host_memory_resource cuda_host_mr;
     vecmem::cuda::device_memory_resource device_mr;
     traccc::memory_resource mr{device_mr, &cuda_host_mr};
-    vecmem::cuda::managed_memory_resource mng_mr;
 
     // Copy and stream
     vecmem::copy host_copy;
@@ -65,9 +63,9 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
     vecmem::cuda::async_copy async_copy{stream.cudaStream()};
 
     // Read back detector file
-    host_detector_type det{mng_mr};
+    host_detector_type det{cuda_host_mr};
     traccc::io::read_detector(
-        det, mng_mr, sim_dir + "toy_detector_geometry.json",
+        det, cuda_host_mr, sim_dir + "toy_detector_geometry.json",
         sim_dir + "toy_detector_homogeneous_material.json",
         sim_dir + "toy_detector_surface_grids.json");
 
@@ -84,8 +82,10 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
     traccc::cuda::fitting_algorithm<device_fitter_type> device_fitting(
         fitting_cfg, mr, async_copy, stream);
 
+    // Copy detector to device
+    auto det_buffer = detray::get_buffer(det, device_mr, copy);
     // Detector view object
-    auto det_view = detray::get_data(det);
+    auto det_view = detray::get_data(det_buffer);
 
     // D2H copy object
     traccc::device::container_d2h_copy_alg<traccc::track_state_container_types>
@@ -147,7 +147,7 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
                     det_view, field, track_candidates_cuda_buffer);
 
             // Create a temporary buffer that will receive the device memory.
-            auto size = track_states_cuda_buffer.headers.size();
+            /*auto size = track_states_cuda_buffer.headers.size();
             std::vector<std::size_t> capacities(size, 0);
             std::transform(track_states_cuda_buffer.items.host_ptr(),
                            track_states_cuda_buffer.items.host_ptr() + size,
@@ -156,7 +156,7 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
 
             // Copy the track states back to the host.
             traccc::track_state_container_types::host track_states_host =
-                track_state_d2h(track_states_cuda_buffer);
+                track_state_d2h(track_states_cuda_buffer);*/
         }
     }