From a864f28645ed04034881541ccf9d32e79a0439bc Mon Sep 17 00:00:00 2001 From: pavelkumbrasev Date: Fri, 12 Jul 2024 11:01:47 +0100 Subject: [PATCH 1/4] Improve scalability for parallel_for_each feeder Signed-off-by: pavelkumbrasev --- include/oneapi/tbb/parallel_for_each.h | 42 ++++++++++++++------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/include/oneapi/tbb/parallel_for_each.h b/include/oneapi/tbb/parallel_for_each.h index ab0b345388..90be98f275 100644 --- a/include/oneapi/tbb/parallel_for_each.h +++ b/include/oneapi/tbb/parallel_for_each.h @@ -118,14 +118,17 @@ struct feeder_item_task: public task { using feeder_type = feeder_impl; template - feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) : + feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc, wait_tree_node_interface* node) : item(std::forward(input_item)), my_feeder(feeder), - my_allocator(alloc) - {} + my_allocator(alloc), + m_wait_tree_node(node) + { + m_wait_tree_node->reserve(); + } void finalize(const execution_data& ed) { - my_feeder.my_wait_context.release(); + m_wait_tree_node->release(); my_allocator.delete_object(this, ed); } @@ -160,6 +163,7 @@ struct feeder_item_task: public task { Item item; feeder_type& my_feeder; small_object_allocator my_allocator; + wait_tree_node_interface* m_wait_tree_node; }; // class feeder_item_task /** Implements new task adding procedure. @@ -170,9 +174,8 @@ class feeder_impl : public feeder { void internal_add_copy_impl(std::true_type, const Item& item) { using feeder_task = feeder_item_task; small_object_allocator alloc; - auto task = alloc.new_object(item, *this, alloc); + auto task = alloc.new_object(item, *this, alloc, r1::get_thread_reference_node(&my_wait_context)); - my_wait_context.reserve(); spawn(*task, my_execution_context); } @@ -187,20 +190,19 @@ class feeder_impl : public feeder { void internal_add_move(Item&& item) override { using feeder_task = feeder_item_task; small_object_allocator alloc{}; - auto task = alloc.new_object(std::move(item), *this, alloc); + auto task = alloc.new_object(std::move(item), *this, alloc, r1::get_thread_reference_node(&my_wait_context)); - my_wait_context.reserve(); spawn(*task, my_execution_context); } public: - feeder_impl(const Body& body, wait_context& w_context, task_group_context &context) + feeder_impl(const Body& body, wait_context_node& w_context, task_group_context &context) : my_body(body), my_wait_context(w_context) , my_execution_context(context) {} const Body& my_body; - wait_context& my_wait_context; + wait_context_node& my_wait_context; task_group_context& my_execution_context; }; // class feeder_impl @@ -263,7 +265,7 @@ struct input_block_handling_task : public task { using iteration_task_iterator_type = typename input_iteration_task_iterator_helper::type; using iteration_task = for_each_iteration_task; - input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context, + input_block_handling_task(wait_context_node& root_wait_context, task_group_context& e_context, const Body& body, feeder_impl* feeder_ptr, small_object_allocator& alloc) :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context), my_execution_context(e_context), my_allocator(alloc) @@ -312,7 +314,7 @@ struct input_block_handling_task : public task { aligned_space task_pool; std::size_t my_size; wait_context my_wait_context; - wait_context& my_root_wait_context; + wait_context_node& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class input_block_handling_task @@ -326,7 +328,7 @@ struct forward_block_handling_task : public task { using iteration_task = for_each_iteration_task; forward_block_handling_task(Iterator first, std::size_t size, - wait_context& w_context, task_group_context& e_context, + wait_context_node& w_context, task_group_context& e_context, const Body& body, feeder_impl* feeder_ptr, small_object_allocator& alloc) : my_size(size), my_wait_context(0), my_root_wait_context(w_context), @@ -373,7 +375,7 @@ struct forward_block_handling_task : public task { aligned_space task_pool; std::size_t my_size; wait_context my_wait_context; - wait_context& my_root_wait_context; + wait_context_node& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class forward_block_handling_task @@ -456,7 +458,7 @@ using feeder_is_required = tbb::detail::void_t struct feeder_holder { - feeder_holder( wait_context&, task_group_context&, const Body& ) {} + feeder_holder( wait_context_node&, task_group_context&, const Body& ) {} feeder_impl* feeder_ptr() { return nullptr; } }; // class feeder_holder @@ -464,7 +466,7 @@ struct feeder_holder { template class feeder_holder> { public: - feeder_holder( wait_context& w_context, task_group_context& context, const Body& body ) + feeder_holder( wait_context_node& w_context, task_group_context& context, const Body& body ) : my_feeder(body, w_context, context) {} feeder_impl* feeder_ptr() { return &my_feeder; } @@ -475,7 +477,7 @@ class feeder_holder class for_each_root_task_base : public task { public: - for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context) + for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context_node& w_context, task_group_context& e_context) : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context), my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body) { @@ -489,7 +491,7 @@ class for_each_root_task_base : public task { protected: Iterator my_first; Iterator my_last; - wait_context& my_wait_context; + wait_context_node& my_wait_context; task_group_context& my_execution_context; const Body& my_body; feeder_holder my_feeder_holder; @@ -624,11 +626,11 @@ void run_parallel_for_each( Iterator first, Iterator last, const Body& body, tas { if (!(first == last)) { using ItemType = get_item_type::value_type>; - wait_context w_context(0); + wait_context_node w_context(0); for_each_root_task root_task(first, last, body, w_context, context); - execute_and_wait(root_task, context, w_context, context); + execute_and_wait(root_task, context, w_context.get_context(), context); } } From b18bdcd6f4733ae51a72661c216028c9a9d736ed Mon Sep 17 00:00:00 2001 From: pavelkumbrasev Date: Fri, 12 Jul 2024 11:06:30 +0100 Subject: [PATCH 2/4] Fix var names Signed-off-by: pavelkumbrasev --- include/oneapi/tbb/parallel_for_each.h | 36 +++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/include/oneapi/tbb/parallel_for_each.h b/include/oneapi/tbb/parallel_for_each.h index 90be98f275..8acca46f58 100644 --- a/include/oneapi/tbb/parallel_for_each.h +++ b/include/oneapi/tbb/parallel_for_each.h @@ -118,17 +118,17 @@ struct feeder_item_task: public task { using feeder_type = feeder_impl; template - feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc, wait_tree_node_interface* node) : + feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc, wait_tree_vertex_interface* node) : item(std::forward(input_item)), my_feeder(feeder), my_allocator(alloc), - m_wait_tree_node(node) + m_wait_tree_vertex(node) { - m_wait_tree_node->reserve(); + m_wait_tree_vertex->reserve(); } void finalize(const execution_data& ed) { - m_wait_tree_node->release(); + m_wait_tree_vertex->release(); my_allocator.delete_object(this, ed); } @@ -163,7 +163,7 @@ struct feeder_item_task: public task { Item item; feeder_type& my_feeder; small_object_allocator my_allocator; - wait_tree_node_interface* m_wait_tree_node; + wait_tree_vertex_interface* m_wait_tree_vertex; }; // class feeder_item_task /** Implements new task adding procedure. @@ -174,7 +174,7 @@ class feeder_impl : public feeder { void internal_add_copy_impl(std::true_type, const Item& item) { using feeder_task = feeder_item_task; small_object_allocator alloc; - auto task = alloc.new_object(item, *this, alloc, r1::get_thread_reference_node(&my_wait_context)); + auto task = alloc.new_object(item, *this, alloc, r1::get_thread_reference_vertex(&my_wait_context)); spawn(*task, my_execution_context); } @@ -190,19 +190,19 @@ class feeder_impl : public feeder { void internal_add_move(Item&& item) override { using feeder_task = feeder_item_task; small_object_allocator alloc{}; - auto task = alloc.new_object(std::move(item), *this, alloc, r1::get_thread_reference_node(&my_wait_context)); + auto task = alloc.new_object(std::move(item), *this, alloc, r1::get_thread_reference_vertex(&my_wait_context)); spawn(*task, my_execution_context); } public: - feeder_impl(const Body& body, wait_context_node& w_context, task_group_context &context) + feeder_impl(const Body& body, wait_context_vertex& w_context, task_group_context &context) : my_body(body), my_wait_context(w_context) , my_execution_context(context) {} const Body& my_body; - wait_context_node& my_wait_context; + wait_context_vertex& my_wait_context; task_group_context& my_execution_context; }; // class feeder_impl @@ -265,7 +265,7 @@ struct input_block_handling_task : public task { using iteration_task_iterator_type = typename input_iteration_task_iterator_helper::type; using iteration_task = for_each_iteration_task; - input_block_handling_task(wait_context_node& root_wait_context, task_group_context& e_context, + input_block_handling_task(wait_context_vertex& root_wait_context, task_group_context& e_context, const Body& body, feeder_impl* feeder_ptr, small_object_allocator& alloc) :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context), my_execution_context(e_context), my_allocator(alloc) @@ -314,7 +314,7 @@ struct input_block_handling_task : public task { aligned_space task_pool; std::size_t my_size; wait_context my_wait_context; - wait_context_node& my_root_wait_context; + wait_context_vertex& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class input_block_handling_task @@ -328,7 +328,7 @@ struct forward_block_handling_task : public task { using iteration_task = for_each_iteration_task; forward_block_handling_task(Iterator first, std::size_t size, - wait_context_node& w_context, task_group_context& e_context, + wait_context_vertex& w_context, task_group_context& e_context, const Body& body, feeder_impl* feeder_ptr, small_object_allocator& alloc) : my_size(size), my_wait_context(0), my_root_wait_context(w_context), @@ -375,7 +375,7 @@ struct forward_block_handling_task : public task { aligned_space task_pool; std::size_t my_size; wait_context my_wait_context; - wait_context_node& my_root_wait_context; + wait_context_vertex& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class forward_block_handling_task @@ -458,7 +458,7 @@ using feeder_is_required = tbb::detail::void_t struct feeder_holder { - feeder_holder( wait_context_node&, task_group_context&, const Body& ) {} + feeder_holder( wait_context_vertex&, task_group_context&, const Body& ) {} feeder_impl* feeder_ptr() { return nullptr; } }; // class feeder_holder @@ -466,7 +466,7 @@ struct feeder_holder { template class feeder_holder> { public: - feeder_holder( wait_context_node& w_context, task_group_context& context, const Body& body ) + feeder_holder( wait_context_vertex& w_context, task_group_context& context, const Body& body ) : my_feeder(body, w_context, context) {} feeder_impl* feeder_ptr() { return &my_feeder; } @@ -477,7 +477,7 @@ class feeder_holder class for_each_root_task_base : public task { public: - for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context_node& w_context, task_group_context& e_context) + for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context_vertex& w_context, task_group_context& e_context) : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context), my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body) { @@ -491,7 +491,7 @@ class for_each_root_task_base : public task { protected: Iterator my_first; Iterator my_last; - wait_context_node& my_wait_context; + wait_context_vertex& my_wait_context; task_group_context& my_execution_context; const Body& my_body; feeder_holder my_feeder_holder; @@ -626,7 +626,7 @@ void run_parallel_for_each( Iterator first, Iterator last, const Body& body, tas { if (!(first == last)) { using ItemType = get_item_type::value_type>; - wait_context_node w_context(0); + wait_context_vertex w_context(0); for_each_root_task root_task(first, last, body, w_context, context); From e6371bfbe7593b59dbc80b5fb302a70878ee094b Mon Sep 17 00:00:00 2001 From: pavelkumbrasev Date: Fri, 12 Jul 2024 11:10:16 +0100 Subject: [PATCH 3/4] Update copyright Signed-off-by: pavelkumbrasev --- include/oneapi/tbb/parallel_for_each.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/tbb/parallel_for_each.h b/include/oneapi/tbb/parallel_for_each.h index 8acca46f58..49867907bb 100644 --- a/include/oneapi/tbb/parallel_for_each.h +++ b/include/oneapi/tbb/parallel_for_each.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From e5888ce06a7975d6fed53023de02e4ae782fbada Mon Sep 17 00:00:00 2001 From: pavelkumbrasev Date: Mon, 15 Jul 2024 11:00:53 +0100 Subject: [PATCH 4/4] Apply review comment Signed-off-by: pavelkumbrasev --- include/oneapi/tbb/parallel_for_each.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/tbb/parallel_for_each.h b/include/oneapi/tbb/parallel_for_each.h index 49867907bb..85c0269196 100644 --- a/include/oneapi/tbb/parallel_for_each.h +++ b/include/oneapi/tbb/parallel_for_each.h @@ -118,11 +118,11 @@ struct feeder_item_task: public task { using feeder_type = feeder_impl; template - feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc, wait_tree_vertex_interface* node) : + feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc, wait_tree_vertex_interface& wait_vertex) : item(std::forward(input_item)), my_feeder(feeder), my_allocator(alloc), - m_wait_tree_vertex(node) + m_wait_tree_vertex(r1::get_thread_reference_vertex(&wait_vertex)) { m_wait_tree_vertex->reserve(); } @@ -174,7 +174,7 @@ class feeder_impl : public feeder { void internal_add_copy_impl(std::true_type, const Item& item) { using feeder_task = feeder_item_task; small_object_allocator alloc; - auto task = alloc.new_object(item, *this, alloc, r1::get_thread_reference_vertex(&my_wait_context)); + auto task = alloc.new_object(item, *this, alloc, my_wait_context); spawn(*task, my_execution_context); } @@ -190,7 +190,7 @@ class feeder_impl : public feeder { void internal_add_move(Item&& item) override { using feeder_task = feeder_item_task; small_object_allocator alloc{}; - auto task = alloc.new_object(std::move(item), *this, alloc, r1::get_thread_reference_vertex(&my_wait_context)); + auto task = alloc.new_object(std::move(item), *this, alloc, my_wait_context); spawn(*task, my_execution_context); }