From fa237b90ce8346104e2d892634a10c1cb63658e5 Mon Sep 17 00:00:00 2001 From: Pavel Kumbrasev Date: Wed, 31 Jul 2024 11:13:45 +0100 Subject: [PATCH] Improve parallel_for_each scalability (#1445) --------- Signed-off-by: pavelkumbrasev --- include/oneapi/tbb/parallel_for_each.h | 44 ++++++++++++++------------ 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/include/oneapi/tbb/parallel_for_each.h b/include/oneapi/tbb/parallel_for_each.h index ab0b345388..85c0269196 100644 --- a/include/oneapi/tbb/parallel_for_each.h +++ b/include/oneapi/tbb/parallel_for_each.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -118,14 +118,17 @@ struct feeder_item_task: public task { using feeder_type = feeder_impl; template - feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) : + feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc, wait_tree_vertex_interface& wait_vertex) : item(std::forward(input_item)), my_feeder(feeder), - my_allocator(alloc) - {} + my_allocator(alloc), + m_wait_tree_vertex(r1::get_thread_reference_vertex(&wait_vertex)) + { + m_wait_tree_vertex->reserve(); + } void finalize(const execution_data& ed) { - my_feeder.my_wait_context.release(); + m_wait_tree_vertex->release(); my_allocator.delete_object(this, ed); } @@ -160,6 +163,7 @@ struct feeder_item_task: public task { Item item; feeder_type& my_feeder; small_object_allocator my_allocator; + wait_tree_vertex_interface* m_wait_tree_vertex; }; // class feeder_item_task /** Implements new task adding procedure. @@ -170,9 +174,8 @@ class feeder_impl : public feeder { void internal_add_copy_impl(std::true_type, const Item& item) { using feeder_task = feeder_item_task; small_object_allocator alloc; - auto task = alloc.new_object(item, *this, alloc); + auto task = alloc.new_object(item, *this, alloc, my_wait_context); - my_wait_context.reserve(); spawn(*task, my_execution_context); } @@ -187,20 +190,19 @@ class feeder_impl : public feeder { void internal_add_move(Item&& item) override { using feeder_task = feeder_item_task; small_object_allocator alloc{}; - auto task = alloc.new_object(std::move(item), *this, alloc); + auto task = alloc.new_object(std::move(item), *this, alloc, my_wait_context); - my_wait_context.reserve(); spawn(*task, my_execution_context); } public: - feeder_impl(const Body& body, wait_context& w_context, task_group_context &context) + feeder_impl(const Body& body, wait_context_vertex& w_context, task_group_context &context) : my_body(body), my_wait_context(w_context) , my_execution_context(context) {} const Body& my_body; - wait_context& my_wait_context; + wait_context_vertex& my_wait_context; task_group_context& my_execution_context; }; // class feeder_impl @@ -263,7 +265,7 @@ struct input_block_handling_task : public task { using iteration_task_iterator_type = typename input_iteration_task_iterator_helper::type; using iteration_task = for_each_iteration_task; - input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context, + input_block_handling_task(wait_context_vertex& root_wait_context, task_group_context& e_context, const Body& body, feeder_impl* feeder_ptr, small_object_allocator& alloc) :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context), my_execution_context(e_context), my_allocator(alloc) @@ -312,7 +314,7 @@ struct input_block_handling_task : public task { aligned_space task_pool; std::size_t my_size; wait_context my_wait_context; - wait_context& my_root_wait_context; + wait_context_vertex& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class input_block_handling_task @@ -326,7 +328,7 @@ struct forward_block_handling_task : public task { using iteration_task = for_each_iteration_task; forward_block_handling_task(Iterator first, std::size_t size, - wait_context& w_context, task_group_context& e_context, + wait_context_vertex& w_context, task_group_context& e_context, const Body& body, feeder_impl* feeder_ptr, small_object_allocator& alloc) : my_size(size), my_wait_context(0), my_root_wait_context(w_context), @@ -373,7 +375,7 @@ struct forward_block_handling_task : public task { aligned_space task_pool; std::size_t my_size; wait_context my_wait_context; - wait_context& my_root_wait_context; + wait_context_vertex& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class forward_block_handling_task @@ -456,7 +458,7 @@ using feeder_is_required = tbb::detail::void_t struct feeder_holder { - feeder_holder( wait_context&, task_group_context&, const Body& ) {} + feeder_holder( wait_context_vertex&, task_group_context&, const Body& ) {} feeder_impl* feeder_ptr() { return nullptr; } }; // class feeder_holder @@ -464,7 +466,7 @@ struct feeder_holder { template class feeder_holder> { public: - feeder_holder( wait_context& w_context, task_group_context& context, const Body& body ) + feeder_holder( wait_context_vertex& w_context, task_group_context& context, const Body& body ) : my_feeder(body, w_context, context) {} feeder_impl* feeder_ptr() { return &my_feeder; } @@ -475,7 +477,7 @@ class feeder_holder class for_each_root_task_base : public task { public: - for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context) + for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context_vertex& w_context, task_group_context& e_context) : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context), my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body) { @@ -489,7 +491,7 @@ class for_each_root_task_base : public task { protected: Iterator my_first; Iterator my_last; - wait_context& my_wait_context; + wait_context_vertex& my_wait_context; task_group_context& my_execution_context; const Body& my_body; feeder_holder my_feeder_holder; @@ -624,11 +626,11 @@ void run_parallel_for_each( Iterator first, Iterator last, const Body& body, tas { if (!(first == last)) { using ItemType = get_item_type::value_type>; - wait_context w_context(0); + wait_context_vertex w_context(0); for_each_root_task root_task(first, last, body, w_context, context); - execute_and_wait(root_task, context, w_context, context); + execute_and_wait(root_task, context, w_context.get_context(), context); } }