diff --git a/core/include/detray/propagator/propagator.hpp b/core/include/detray/propagator/propagator.hpp index 45b374079..58c0ed813 100644 --- a/core/include/detray/propagator/propagator.hpp +++ b/core/include/detray/propagator/propagator.hpp @@ -55,12 +55,6 @@ struct propagator { explicit constexpr propagator(const propagation::config &cfg) : m_cfg{cfg} {} - /// @returns the actor chain - DETRAY_HOST_DEVICE - constexpr const actor_chain_t &get_actor_chain() const { - return run_actors; - } - /// Propagation that state aggregates a stepping and a navigation state. It /// also keeps references to the actor states. struct state { diff --git a/tests/benchmarks/cpu/propagation.cpp b/tests/benchmarks/cpu/propagation.cpp index c783cc1ff..c63dd978f 100644 --- a/tests/benchmarks/cpu/propagation.cpp +++ b/tests/benchmarks/cpu/propagation.cpp @@ -40,6 +40,7 @@ int main(int argc, char** argv) { using test_algebra = typename toy_detector_t::algebra_type; using scalar = dscalar; using vector3 = dvector3D; + using free_track_parameters_t = free_track_parameters; using uniform_gen_t = detail::random_numbers>; @@ -97,6 +98,7 @@ int main(int argc, char** argv) { // Add additional tracks for warmup bench_cfg.n_warmup(static_cast( std::ceil(0.1f * static_cast(trk_cfg.n_tracks())))); + bench_cfg.do_warmup(true); // // Prepare data diff --git a/tests/benchmarks/cuda/CMakeLists.txt b/tests/benchmarks/cuda/CMakeLists.txt index a101f0854..b15c8356d 100644 --- a/tests/benchmarks/cuda/CMakeLists.txt +++ b/tests/benchmarks/cuda/CMakeLists.txt @@ -15,17 +15,14 @@ enable_language(CUDA) # Set the CUDA build flags. include(detray-compiler-options-cuda) -# Look for openMP, which is used for the CPU benchmark -find_package(OpenMP) - -# make unit tests for multiple algebras -# Currently vc and smatrix is not supported -set(algebras "array") +# Build benchmarks for multiple algebra plugins +# Currently vc and smatrix is not supported on device +set(algebra_plugins "array") if(DETRAY_EIGEN_PLUGIN) - list(APPEND algebras "eigen") + list(APPEND algebra_plugins "eigen") endif() -foreach(algebra ${algebras}) +foreach(algebra ${algebra_plugins}) detray_add_executable(benchmark_cuda_propagation_${algebra} "propagation.cpp" LINK_LIBRARIES detray::benchmark_cuda detray::core detray::algebra_${algebra} vecmem::cuda detray::test_utils @@ -40,11 +37,4 @@ foreach(algebra ${algebras}) detray_benchmark_cuda_propagation_${algebra} PRIVATE "-march=native" "-ftree-vectorize" ) - - if(OpenMP_CXX_FOUND) - target_link_libraries( - detray_benchmark_cuda_propagation_${algebra} - PRIVATE OpenMP::OpenMP_CXX - ) - endif() endforeach() diff --git a/tests/benchmarks/cuda/propagation.cpp b/tests/benchmarks/cuda/propagation.cpp index ef5721395..9ec9e004b 100644 --- a/tests/benchmarks/cuda/propagation.cpp +++ b/tests/benchmarks/cuda/propagation.cpp @@ -41,6 +41,7 @@ int main(int argc, char** argv) { using test_algebra = typename toy_detector_t::algebra_type; using scalar = dscalar; using vector3 = dvector3D; + using free_track_parameters_t = free_track_parameters; using uniform_gen_t = detail::random_numbers>; @@ -87,11 +88,12 @@ int main(int argc, char** argv) { n_tracks); // Specific configuration for the random track generation - trk_cfg.seed(42u); + trk_cfg.seed(detail::random_numbers::default_seed()); // Add additional tracks for warmup bench_cfg.n_warmup(static_cast( std::ceil(0.1f * static_cast(trk_cfg.n_tracks())))); + bench_cfg.do_warmup(true); // // Prepare data diff --git a/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp index 94c78c950..d001a6cfd 100644 --- a/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp +++ b/tests/benchmarks/include/detray/benchmarks/cpu/propagation_benchmark.hpp @@ -1,6 +1,6 @@ /** Detray library, part of the ACTS project (R&D line) * - * (c) 2023-2024 CERN for the benefit of the ACTS project + * (c) 2023-2025 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -52,12 +52,12 @@ struct host_propagation_bm : public benchmark_base { configuration &config() { return m_cfg; } /// Prepare data and run benchmark loop - inline void operator()(::benchmark::State &state, - dvector> *tracks, - const typename propagator_t::detector_type *det, - const bfield_t *bfield, - typename propagator_t::actor_chain_type::state_tuple - *input_actor_states) const { + inline void operator()( + ::benchmark::State &state, + const dvector> *tracks, + const typename propagator_t::detector_type *det, const bfield_t *bfield, + const typename propagator_t::actor_chain_type::state_tuple + *input_actor_states) const { using actor_chain_t = typename propagator_t::actor_chain_type; using actor_states_t = typename actor_chain_t::state_tuple; @@ -76,7 +76,8 @@ struct host_propagation_bm : public benchmark_base { // Call the host propagation auto run_propagation = [&p, det, bfield, input_actor_states]( - free_track_parameters &track) { + const free_track_parameters + &track) { // Fresh copy of actor states actor_states_t actor_states(*input_actor_states); // Tuple of references to pass to the propagator @@ -103,17 +104,23 @@ struct host_propagation_bm : public benchmark_base { // Warm-up if (m_cfg.benchmark().do_warmup()) { assert(n_warmup > 0); - auto stride{n_samples / n_warmup}; + int stride{n_samples / n_warmup}; stride = (stride == 0) ? 10 : stride; assert(stride > 0); #pragma omp parallel for schedule(dynamic) for (int i = 0; i < n_samples; i += stride) { + // The track gets copied into the stepper state, so that the + // original track sample vector remains unchanged run_propagation((*tracks)[static_cast(i)]); } } // Run the benchmark + + // Calculate the propagation rate + // @see + // https://github.com/google/benchmark/blob/main/docs/user_guide.md#custom-counters std::size_t total_tracks = 0u; for (auto _ : state) { #pragma omp parallel for schedule(dynamic) diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu index 200d9c295..ba45520ff 100644 --- a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.cu @@ -1,6 +1,6 @@ /** Detray library, part of the ACTS project (R&D line) * - * (c) 2022-2024 CERN for the benefit of the ACTS project + * (c) 2022-2025 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -15,7 +15,7 @@ __global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( propagation::config cfg, typename propagator_t::detector_type::view_type det_view, typename propagator_t::stepper_type::magnetic_field_type field_view, - typename propagator_t::actor_chain_type::state_tuple + const typename propagator_t::actor_chain_type::state_tuple *device_actor_state_ptr, vecmem::data::vector_view< free_track_parameters> @@ -30,8 +30,9 @@ __global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( propagator, actor_chain_t>; - detector_device_t det(det_view); - vecmem::device_vector> tracks(tracks_view); + const detector_device_t det(det_view); + const vecmem::device_vector> tracks( + tracks_view); int gid = threadIdx.x + blockIdx.x * blockDim.x; if (gid >= tracks.size()) { @@ -46,6 +47,9 @@ __global__ void __launch_bounds__(256, 4) propagator_benchmark_kernel( auto actor_state_refs = actor_chain_t::setup_actor_states(actor_states); // Create the propagator state + + // The track gets copied into the stepper state, so that the + // original track sample vector remains unchanged typename propagator_device_t::state p_state(tracks.at(gid), field_view, det); diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp index f5ef5e199..459894608 100644 --- a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp +++ b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp @@ -1,6 +1,6 @@ /** Detray library, part of the ACTS project (R&D line) * - * (c) 2024 CERN for the benefit of the ACTS project + * (c) 2024-2025 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -85,6 +85,7 @@ void run_propagation_kernel( const int); /// Allocate actor state blueprint on device +/// @note This only works if each actor state in the tuple is essentially POD template typename propagator_t::actor_chain_type::state_tuple *setup_actor_states( typename propagator_t::actor_chain_type::state_tuple *); @@ -155,14 +156,18 @@ struct cuda_propagation_bm : public benchmark_base { setup_actor_states(input_actor_states); // Do a small warm up run - { + if (m_cfg.benchmark().do_warmup()) { auto warmup_track_buffer = detray::get_buffer( vecmem::get_data(*tracks), *dev_mr, cuda_cpy); + run_propagation_kernel( m_cfg.propagation(), det_view, *bfield, device_actor_state_ptr, warmup_track_buffer, math::min(n_warmup, n_samples)); } + // Calculate the propagation rate + // @see + // https://github.com/google/benchmark/blob/main/docs/user_guide.md#custom-counters std::size_t total_tracks = 0u; for (auto _ : state) { // Launch the propagator test for GPU device diff --git a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp index e3c4bffd8..1cb559205 100644 --- a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp +++ b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp @@ -171,19 +171,19 @@ inline void register_benchmark( std::cout << bench_name << "\n" << bench_cfg; - // Cpu benchmark if constexpr (std::is_invocable_v< decltype(prop_benchmark), ::benchmark::State &, dvector> *, const detector_t *, const bfield_bknd_t *, typename propagator_t::actor_chain_type::state_tuple *>) { + // Cpu benchmark ::benchmark::RegisterBenchmark(bench_name.c_str(), prop_benchmark, &tracks, &det, &bfield, actor_states); //->MeasureProcessCPUTime(); } else { - + // Device benchmark ::benchmark::RegisterBenchmark(bench_name.c_str(), prop_benchmark, dev_mr, &tracks, &det, &bfield, actor_states); diff --git a/tests/tools/include/detray/options/propagation_options.hpp b/tests/tools/include/detray/options/propagation_options.hpp index 57bc8082d..ffa193332 100644 --- a/tests/tools/include/detray/options/propagation_options.hpp +++ b/tests/tools/include/detray/options/propagation_options.hpp @@ -42,7 +42,7 @@ void add_options( "mask_tolerance_scalor", boost::program_options::value()->default_value( cfg.mask_tolerance_scalor), - "Mask tolerance scaling")( + "Mask tolerance scale factor")( "overstep_tolerance", boost::program_options::value()->default_value( cfg.overstep_tolerance / unit::um), diff --git a/tests/tools/include/detray/options/track_generator_options.hpp b/tests/tools/include/detray/options/track_generator_options.hpp index 4ebabd97f..f085f23ed 100644 --- a/tests/tools/include/detray/options/track_generator_options.hpp +++ b/tests/tools/include/detray/options/track_generator_options.hpp @@ -53,10 +53,10 @@ void add_uniform_track_gen_options( "Coordintates for particle gun origin position [mm]")( "p_range", boost::program_options::value>()->multitoken(), - "Total momentum [range] of the test particle [GeV]")( + "Total momentum [range] of the test particles [GeV]")( "pT_range", boost::program_options::value>()->multitoken(), - "Transverse momentum [range] of the test particle [GeV]"); + "Transverse momentum [range] of the test particles [GeV]"); } /// Add options for detray event generation @@ -149,20 +149,20 @@ void add_rnd_track_gen_options( "Seed for the random number generator")( "theta_range", boost::program_options::value>()->multitoken(), - "Min, Max range of theta values for particle gun")( + "Min, Max range of theta values for particle gun. Interval in [0, pi)")( "eta_range", boost::program_options::value>()->multitoken(), "Min, Max range of eta values for particle gun")( "randomize_charge", "Randomly flip charge sign per track")( "origin", boost::program_options::value>()->multitoken(), - "Coordintates for particle gun origin position")( + "Coordintates for particle gun origin position [mm]")( "p_range", boost::program_options::value>()->multitoken(), - "Total momentum [range] of the test particle [GeV]")( + "Total momentum [range] of the test particles [GeV]")( "pT_range", boost::program_options::value>()->multitoken(), - "Transverse momentum [range] of the test particle [GeV]"); + "Transverse momentum [range] of the test particles [GeV]"); } /// Add options for detray event generation diff --git a/tests/tools/src/cpu/CMakeLists.txt b/tests/tools/src/cpu/CMakeLists.txt index ea630ea83..8c63a8416 100644 --- a/tests/tools/src/cpu/CMakeLists.txt +++ b/tests/tools/src/cpu/CMakeLists.txt @@ -58,19 +58,19 @@ if(DETRAY_BUILD_BENCHMARKS) # Build the propagation benchmark executable. macro(detray_add_propagation_benchmark algebra) - detray_add_executable(propagation_benchmark_${algebra} + detray_add_executable(propagation_benchmark_cpu_${algebra} "propagation_benchmark.cpp" - LINK_LIBRARIES Boost::program_options benchmark::benchmark benchmark::benchmark_main vecmem::core detray::core_${algebra} detray::benchmarks detray::benchmark_cpu detray::tools detray::detectors + LINK_LIBRARIES Boost::program_options benchmark::benchmark benchmark::benchmark_main vecmem::core detray::benchmark_cpu detray::core_${algebra} detray::tools detray::detectors ) target_compile_options( - detray_propagation_benchmark_${algebra} + detray_propagation_benchmark_cpu_${algebra} PRIVATE "-march=native" "-ftree-vectorize" ) if(OpenMP_CXX_FOUND) target_link_libraries( - detray_propagation_benchmark_${algebra} + detray_propagation_benchmark_cpu_${algebra} PRIVATE OpenMP::OpenMP_CXX ) endif() diff --git a/tests/tools/src/cpu/propagation_benchmark.cpp b/tests/tools/src/cpu/propagation_benchmark.cpp index 53468c068..3040d328b 100644 --- a/tests/tools/src/cpu/propagation_benchmark.cpp +++ b/tests/tools/src/cpu/propagation_benchmark.cpp @@ -26,7 +26,7 @@ #include "detray/test/utils/simulation/event_generator/track_generators.hpp" #include "detray/test/utils/types.hpp" -// Detray test include(s) +// Detray tools include(s) #include "detray/options/detector_io_options.hpp" #include "detray/options/parse_options.hpp" #include "detray/options/propagation_options.hpp" @@ -50,6 +50,7 @@ int main(int argc, char** argv) { using test_algebra = typename detector_t::algebra_type; using scalar = dscalar; using vector3 = dvector3D; + using free_track_parameters_t = free_track_parameters; using uniform_gen_t = detail::random_numbers>; @@ -60,7 +61,7 @@ int main(int argc, char** argv) { using stepper_t = rk_stepper; using empty_chain_t = actor_chain<>; using default_chain = - actor_chain, + actor_chain, pointwise_material_interactor, parameter_resetter>; @@ -81,9 +82,8 @@ int main(int argc, char** argv) { // Specific options for this test po::options_description desc("\ndetray propagation benchmark options"); - std::vector window; desc.add_options()("context", po::value(), - "Number of the geometry context")( + "Index of the geometry context")( "sort_tracks", "Sort track samples by theta angle"); // Configs to be filled @@ -96,10 +96,10 @@ int main(int argc, char** argv) { po::variables_map vm = detray::options::parse_options( desc, argc, argv, reader_cfg, trk_cfg, prop_cfg); - // General options + // Custom options bool do_sort{(vm.count("sort_tracks") != 0)}; - // The geometry context to be displayed + // The geometry context to be used detector_t::geometry_context gctx; if (vm.count("context")) { gctx = detector_t::geometry_context{vm["context"].as()}; diff --git a/tests/tools/src/cuda/CMakeLists.txt b/tests/tools/src/cuda/CMakeLists.txt index 271577341..f323db92e 100644 --- a/tests/tools/src/cuda/CMakeLists.txt +++ b/tests/tools/src/cuda/CMakeLists.txt @@ -26,3 +26,27 @@ detray_add_executable(material_validation_cuda LINK_LIBRARIES GTest::gtest GTest::gtest_main Boost::program_options detray::test_cuda detray::tools ) + +# Build benchmarks for multiple algebra plugins +# Currently vc and smatrix is not supported on device +set(algebra_plugins "array") +if(DETRAY_EIGEN_PLUGIN) + list(APPEND algebra_plugins "eigen") +endif() + +foreach(algebra ${algebra_plugins}) + detray_add_executable(propagation_benchmark_cuda_${algebra} + "propagation_benchmark_cuda.cpp" + LINK_LIBRARIES detray::benchmark_cuda detray::core_${algebra} vecmem::cuda detray::tools detray::test_utils + ) + + target_compile_definitions( + detray_propagation_benchmark_cuda_${algebra} + PRIVATE ${algebra}=${algebra} + ) + + target_compile_options( + detray_propagation_benchmark_cuda_${algebra} + PRIVATE "-march=native" "-ftree-vectorize" + ) +endforeach() diff --git a/tests/tools/src/cuda/propagation_benchmark_cuda.cpp b/tests/tools/src/cuda/propagation_benchmark_cuda.cpp new file mode 100644 index 000000000..c24550fb7 --- /dev/null +++ b/tests/tools/src/cuda/propagation_benchmark_cuda.cpp @@ -0,0 +1,163 @@ +/** Detray library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Project include(s) +#include "detray/detectors/bfield.hpp" +#include "detray/navigation/navigator.hpp" +#include "detray/propagator/actor_chain.hpp" +#include "detray/propagator/actors/aborters.hpp" +#include "detray/propagator/actors/parameter_resetter.hpp" +#include "detray/propagator/actors/parameter_transporter.hpp" +#include "detray/propagator/actors/pointwise_material_interactor.hpp" +#include "detray/propagator/rk_stepper.hpp" +#include "detray/tracks/tracks.hpp" + +// Detray IO include(s) +#include "detray/io/frontend/detector_reader.hpp" + +// Detray benchmark include(s) +#include "detray/benchmarks/device/cuda/propagation_benchmark.hpp" + +// Detray test include(s). +#include "detray/test/utils/simulation/event_generator/track_generators.hpp" +#include "detray/test/utils/types.hpp" + +// Detray tools include(s) +#include "detray/options/detector_io_options.hpp" +#include "detray/options/parse_options.hpp" +#include "detray/options/propagation_options.hpp" +#include "detray/options/track_generator_options.hpp" + +// Vecmem include(s) +#include + +// System include(s) +#include +#include + +namespace po = boost::program_options; + +using namespace detray; + +int main(int argc, char** argv) { + + // Use the most general type to be able to read in all detector files + using detector_t = detray::detector; + using test_algebra = typename detector_t::algebra_type; + using scalar = dscalar; + using vector3 = dvector3D; + + using free_track_parameters_t = free_track_parameters; + using uniform_gen_t = + detail::random_numbers>; + using track_generator_t = + random_track_generator; + + using field_bknd_t = bfield::const_bknd_t; + + // Host and device memory resources + vecmem::host_memory_resource host_mr; + vecmem::cuda::device_memory_resource dev_mr; + + // Constant magnetic field + vector3 B{0.f, 0.f, 2.f * unit::T}; + + // Number of tracks in the different benchmark cases + std::vector n_tracks{8 * 8, 16 * 16, 32 * 32, 64 * 64, + 128 * 128, 256 * 256, 512 * 512}; + + // + // Configuration + // + + // Specific options for this test + po::options_description desc("\ndetray propagation benchmark options"); + + desc.add_options()("context", po::value(), + "Index of the geometry context")( + "sort_tracks", "Sort track samples by theta angle"); + + // Configs to be filled + detray::io::detector_reader_config reader_cfg{}; + track_generator_t::configuration trk_cfg{}; + propagation::config prop_cfg{}; + detray::benchmarks::benchmark_base::configuration bench_cfg{}; + + // Read options from commandline + po::variables_map vm = detray::options::parse_options( + desc, argc, argv, reader_cfg, trk_cfg, prop_cfg); + + // Custom options + bool do_sort{(vm.count("sort_tracks") != 0)}; + + // The geometry context to be used + detector_t::geometry_context gctx; + if (vm.count("context")) { + gctx = detector_t::geometry_context{vm["context"].as()}; + } + + // + // Prepare data + // + + // Read the detector geometry + reader_cfg.do_check(true); + + const auto [det, names] = + detray::io::read_detector(host_mr, reader_cfg); + const std::string& det_name = det.name(names); + + // Generate the track samples + auto track_samples = + detray::benchmarks::generate_track_samples( + &host_mr, n_tracks, trk_cfg, do_sort); + + // Create a constant b-field + auto bfield = bfield::create_const_field(B); + + // Build actor states + dtuple<> empty_state{}; + + parameter_transporter::state transporter_state{}; + pointwise_material_interactor::state interactor_state{}; + parameter_resetter::state resetter_state{}; + + auto actor_states = detail::make_tuple( + transporter_state, interactor_state, resetter_state); + + // + // Register benchmarks + // + + // Number of warmup tracks + const int n_max_tracks{*std::ranges::max_element(n_tracks)}; + bench_cfg.n_warmup( + static_cast(std::ceil(0.1f * static_cast(n_max_tracks)))); + + if (prop_cfg.stepping.do_covariance_transport) { + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type< + test::default_metadata, field_bknd_t, + detray::benchmarks::default_chain>>( + det_name + "_W_COV_TRANSPORT", bench_cfg, prop_cfg, det, bfield, + &actor_states, track_samples, n_tracks, &dev_mr); + } else { + detray::benchmarks::register_benchmark< + detray::benchmarks::cuda_propagation_bm, + detray::benchmarks::cuda_propagator_type< + test::default_metadata, field_bknd_t, + detray::benchmarks::empty_chain>>( + det_name, bench_cfg, prop_cfg, det, bfield, &empty_state, + track_samples, n_tracks, &dev_mr); + } + + // Run benchmarks + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); +}