From 9325207c6f63ed50eb773bbf5295527d6306e3ab Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Thu, 12 Dec 2024 16:37:10 +0100 Subject: [PATCH 01/11] Fetch main branch of CCCL, using cudax --- cpp/cmake/thirdparty/cccl_override.json | 9 +++++++++ cpp/cmake/thirdparty/get_cccl.cmake | 7 +++++++ 2 files changed, 16 insertions(+) create mode 100644 cpp/cmake/thirdparty/cccl_override.json diff --git a/cpp/cmake/thirdparty/cccl_override.json b/cpp/cmake/thirdparty/cccl_override.json new file mode 100644 index 00000000000..0226e08a7bb --- /dev/null +++ b/cpp/cmake/thirdparty/cccl_override.json @@ -0,0 +1,9 @@ +{ + "packages": { + "cccl": { + "version": "2.8.0", + "git_url": "https://github.com/NVIDIA/cccl.git", + "git_tag": "main" + } + } +} diff --git a/cpp/cmake/thirdparty/get_cccl.cmake b/cpp/cmake/thirdparty/get_cccl.cmake index 72b53d4c833..5acc0a60224 100644 --- a/cpp/cmake/thirdparty/get_cccl.cmake +++ b/cpp/cmake/thirdparty/get_cccl.cmake @@ -15,6 +15,13 @@ # This function finds CCCL and sets any additional necessary environment variables. function(find_and_configure_cccl) include(${rapids-cmake-dir}/cpm/cccl.cmake) + include(${rapids-cmake-dir}/cpm/package_override.cmake) + + rapids_cpm_package_override("${CMAKE_CURRENT_FUNCTION_LIST_DIR}/cccl_override.json") + + # Enable cudax namespace install + set(CCCL_ENABLE_UNSTABLE ON) + rapids_cpm_cccl(BUILD_EXPORT_SET cugraph-exports INSTALL_EXPORT_SET cugraph-exports) endfunction() From ae3036bb0e1c1cc7a378a496a49439add5befcd1 Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Fri, 10 Jan 2025 19:00:14 +0100 Subject: [PATCH 02/11] thrust::binary_function was deprecated in CCCL 2.6 and removed in CCCL 3.0 --- cpp/src/prims/property_op_utils.cuh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/prims/property_op_utils.cuh b/cpp/src/prims/property_op_utils.cuh index 04ad22cbf71..d8cff55e929 100644 --- a/cpp/src/prims/property_op_utils.cuh +++ b/cpp/src/prims/property_op_utils.cuh @@ -127,9 +127,7 @@ template typename Op> struct property_op : public Op {}; template typename Op> -struct property_op, Op> - : public thrust:: - binary_function, thrust::tuple, thrust::tuple> { +struct property_op, Op> { using Type = thrust::tuple; private: From 1d0d30796bb4cd4ae6570ec164190073498e7bd2 Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Fri, 10 Jan 2025 22:32:39 +0100 Subject: [PATCH 03/11] verify-copyright updates --- cpp/cmake/thirdparty/get_cccl.cmake | 2 +- cpp/src/prims/property_op_utils.cuh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cccl.cmake b/cpp/cmake/thirdparty/get_cccl.cmake index 5acc0a60224..1ee8c351968 100644 --- a/cpp/cmake/thirdparty/get_cccl.cmake +++ b/cpp/cmake/thirdparty/get_cccl.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/cpp/src/prims/property_op_utils.cuh b/cpp/src/prims/property_op_utils.cuh index d8cff55e929..2cab42c1dc8 100644 --- a/cpp/src/prims/property_op_utils.cuh +++ b/cpp/src/prims/property_op_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 959bbc1967beb5dad63299e6d452e2e0dd84161f Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Fri, 17 Jan 2025 14:49:59 +0100 Subject: [PATCH 04/11] Include a thrust header that was missing to use thrust::max MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cpp/tests/utilities/check_utilities.hpp: In member function ‘bool cugraph::test::device_nearly_equal::operator()(type_t, type_t) const’: cpp/tests/utilities/check_utilities.hpp:98:20: error: ‘max’ is not a member of ‘thrust’ 98 | thrust::max(thrust::max(lhs, rhs) * threshold_ratio, threshold_magnitude); | ^~~ --- cpp/tests/utilities/check_utilities.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/tests/utilities/check_utilities.hpp b/cpp/tests/utilities/check_utilities.hpp index 68b4ef88dda..ed3187b4ae7 100644 --- a/cpp/tests/utilities/check_utilities.hpp +++ b/cpp/tests/utilities/check_utilities.hpp @@ -19,6 +19,7 @@ #include #include +#include #include #include From b76174d99f5da8d00ecd8549c5aec18e0c8f0b10 Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Fri, 17 Jan 2025 14:58:09 +0100 Subject: [PATCH 05/11] Operators such as key_group_id_less_t are templated by functors which we instantiate using lambda functions. These lambda functions may have capture lists so that there is no default constructor, and we cannot call the default constructor of this operator. --- .../cugraph/utilities/shuffle_comm.cuh | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index 98fa2cb1706..d0b44021428 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -145,43 +145,58 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const& comm, template struct key_group_id_less_t { - KeyToGroupIdOp key_to_group_id_op{}; - int pivot{}; + key_group_id_less_t(KeyToGroupIdOp op, int pivot_) : key_to_group_id_op(::std::move(op)), pivot(pivot_) {} __device__ bool operator()(key_type k) const { return key_to_group_id_op(k) < pivot; } + +private: + KeyToGroupIdOp key_to_group_id_op; + int pivot; }; template struct value_group_id_less_t { - ValueToGroupIdOp value_to_group_id_op{}; - int pivot{}; + value_group_id_less_t(ValueToGroupIdOp op, int pivot_) : value_to_group_id_op(::std::move(op)), pivot(pivot_) {} __device__ bool operator()(value_type v) const { return value_to_group_id_op(v) < pivot; } + +private: + ValueToGroupIdOp value_to_group_id_op; + int pivot; }; template struct kv_pair_group_id_less_t { - KeyToGroupIdOp key_to_group_id_op{}; - int pivot{}; + kv_pair_group_id_less_t(KeyToGroupIdOp op, int pivot_) : key_to_group_id_op(::std::move(op)), pivot(pivot_) {} __device__ bool operator()(thrust::tuple t) const { return key_to_group_id_op(thrust::get<0>(t)) < pivot; } + +private: + KeyToGroupIdOp key_to_group_id_op; + int pivot; }; template struct value_group_id_greater_equal_t { - ValueToGroupIdOp value_to_group_id_op{}; - int pivot{}; + value_group_id_greater_equal_t(ValueToGroupIdOp op, int pivot_) : value_to_group_id_op(::std::move(op)), pivot(pivot_) {} __device__ bool operator()(value_type v) const { return value_to_group_id_op(v) >= pivot; } + +private: + ValueToGroupIdOp value_to_group_id_op; + int pivot; }; template struct kv_pair_group_id_greater_equal_t { - KeyToGroupIdOp key_to_group_id_op{}; - int pivot{}; + kv_pair_group_id_greater_equal_t(KeyToGroupIdOp op, int pivot_) : key_to_group_id_op(::std::move(op)), pivot(pivot_) {} __device__ bool operator()(thrust::tuple t) const { return key_to_group_id_op(thrust::get<0>(t)) >= pivot; } + +private: + KeyToGroupIdOp key_to_group_id_op; + int pivot; }; template From 86f4e000a1974a0ad75bff362d4d81f08e2ebbc0 Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Sat, 18 Jan 2025 10:32:14 +0100 Subject: [PATCH 06/11] CUDASTF needs -lcuda --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ad30b3769d7..df4b40071ea 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -484,6 +484,7 @@ target_link_libraries(cugraph rmm::rmm raft::raft $ + cuda PRIVATE ${COMPILED_RAFT_LIB} cuco::cuco From 0c8497fc0d728d4e42c89454ebe2933684ab8237 Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Sat, 18 Jan 2025 10:35:03 +0100 Subject: [PATCH 07/11] Start to reintroduce STF constructs in PRIMS algorithms --- .../detail/extract_transform_v_frontier_e.cuh | 8 ++++ .../prims/detail/transform_v_frontier_e.cuh | 38 ++++++++++++++++--- ...v_transform_reduce_incoming_outgoing_e.cuh | 4 ++ cpp/src/prims/transform_reduce_e.cuh | 34 ++++++++++++++--- 4 files changed, 74 insertions(+), 10 deletions(-) diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index 2b89d214fd7..93823851843 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -59,6 +59,10 @@ #include #include +#include + +using namespace cuda::experimental::stf; + namespace cugraph { namespace detail { @@ -702,6 +706,8 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, constexpr bool try_bitmap = GraphViewType::is_multi_gpu && std::is_same_v && KeyBucketType::is_sorted_unique; + stream_ctx cudastf_ctx(handle.get_stream()); + if (do_expensive_check) { auto frontier_vertex_first = thrust_tuple_get_or_identity(frontier.begin()); @@ -1597,6 +1603,8 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } } + cudastf_ctx.finalize(); + return std::make_tuple(std::move(key_buffer), std::move(value_buffer)); } diff --git a/cpp/src/prims/detail/transform_v_frontier_e.cuh b/cpp/src/prims/detail/transform_v_frontier_e.cuh index 5ebcddfe8da..78064eb2c3d 100644 --- a/cpp/src/prims/detail/transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/transform_v_frontier_e.cuh @@ -36,6 +36,11 @@ #include +#include + +using namespace cuda::experimental::stf; + + namespace cugraph { namespace detail { @@ -410,6 +415,9 @@ auto transform_v_frontier_e(raft::handle_t const& handle, typename EdgeValueInputWrapper::value_iterator, typename EdgeValueInputWrapper::value_type>>; +// cudastf::async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); + stream_ctx cudastf_ctx(handle.get_stream()); + auto edge_mask_view = graph_view.edge_mask_view(); // 1. update aggregate_local_frontier_local_degree_offsets @@ -504,6 +512,12 @@ auto transform_v_frontier_e(raft::handle_t const& handle, } auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); + // CUDASTF logical data buffer for transform reduce phase + std::vector> l_tv_buffers(5); + for (size_t segment_i = 0; segment_i < 5; segment_i++) { + l_tv_buffers[segment_i] = cudastf_ctx.logical_token(); + } + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); if (segment_offsets) { auto [edge_partition_key_indices, edge_partition_v_frontier_partition_offsets] = @@ -524,8 +538,11 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_block_t update_grid(high_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[0].write())->*[&](cudaStream_t stream, auto /* aggregate_value_buffer */) { + + detail::transform_v_frontier_e_high_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, edge_partition_key_indices.begin() + edge_partition_v_frontier_partition_offsets[0], @@ -537,6 +554,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } auto mid_size = edge_partition_v_frontier_partition_offsets[2] - edge_partition_v_frontier_partition_offsets[1]; @@ -544,8 +562,9 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_warp_t update_grid(mid_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[1].write())->*[&](cudaStream_t stream, auto ) { detail::transform_v_frontier_e_mid_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, edge_partition_key_indices.begin() + edge_partition_v_frontier_partition_offsets[1], @@ -557,6 +576,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } auto low_size = edge_partition_v_frontier_partition_offsets[3] - edge_partition_v_frontier_partition_offsets[2]; @@ -564,8 +584,9 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(low_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[2].write())->*[&](cudaStream_t stream, auto) { detail::transform_v_frontier_e_hypersparse_or_low_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, edge_partition_key_indices.begin() + edge_partition_v_frontier_partition_offsets[2], @@ -577,6 +598,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } auto hypersparse_size = edge_partition_v_frontier_partition_offsets[4] - edge_partition_v_frontier_partition_offsets[3]; @@ -584,8 +606,9 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(hypersparse_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[3].write())->*[&](cudaStream_t stream, auto) { detail::transform_v_frontier_e_hypersparse_or_low_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, edge_partition_key_indices.begin() + edge_partition_v_frontier_partition_offsets[3], @@ -597,14 +620,16 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } } else { raft::grid_1d_thread_t update_grid(local_frontier_sizes[i], detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[4].write())->*[&,i](cudaStream_t stream, auto) { detail::transform_v_frontier_e_hypersparse_or_low_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, thrust::make_counting_iterator(size_t{0}), @@ -616,9 +641,12 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } } + cudastf_ctx.finalize(); + return std::make_tuple(std::move(aggregate_value_buffer), std::move(aggregate_local_frontier_local_degree_offsets)); } diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 5ba7edec894..32f1d1fc8e1 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -27,6 +27,10 @@ #include #include +#include + +using namespace cuda::experimental::stf; + namespace cugraph { /** diff --git a/cpp/src/prims/transform_reduce_e.cuh b/cpp/src/prims/transform_reduce_e.cuh index 43722550c58..4ec97d4e921 100644 --- a/cpp/src/prims/transform_reduce_e.cuh +++ b/cpp/src/prims/transform_reduce_e.cuh @@ -47,6 +47,10 @@ #include #include +#include + +using namespace cuda::experimental::stf; + namespace cugraph { namespace detail { @@ -470,6 +474,8 @@ T transform_reduce_e(raft::handle_t const& handle, // currently, nothing to do } + stream_ctx cudastf_ctx(handle.get_stream()); + property_op edge_property_add{}; auto result_buffer = allocate_dataframe_buffer(1, handle.get_stream()); @@ -504,6 +510,11 @@ T transform_reduce_e(raft::handle_t const& handle, } auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); + // CUDASTF logical data buffer for transform_reduce phase + std::vector> l_tr_buffers(5); + for (size_t segment_i = 0; segment_i < 5; segment_i++) { l_tr_buffers[segment_i] = cudastf_ctx.logical_token(); + } + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); if (segment_offsets) { // FIXME: we may further improve performance by 1) concurrently running kernels on different @@ -514,8 +525,9 @@ T transform_reduce_e(raft::handle_t const& handle, raft::grid_1d_block_t update_grid((*segment_offsets)[1], detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[0].write())->*[&](cudaStream_t stream, auto) { detail::transform_reduce_e_high_degree - <<>>( + <<>>( edge_partition, edge_partition.major_range_first(), edge_partition.major_range_first() + (*segment_offsets)[1], @@ -525,13 +537,15 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[1].write())->*[&](cudaStream_t stream, auto) { detail::transform_reduce_e_mid_degree - <<>>( + <<>>( edge_partition, edge_partition.major_range_first() + (*segment_offsets)[1], edge_partition.major_range_first() + (*segment_offsets)[2], @@ -541,13 +555,15 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { raft::grid_1d_thread_t update_grid((*segment_offsets)[3] - (*segment_offsets)[2], detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[2].write())->*[&](cudaStream_t stream, auto) { detail::transform_reduce_e_low_degree - <<>>( + <<>>( edge_partition, edge_partition.major_range_first() + (*segment_offsets)[2], edge_partition.major_range_first() + (*segment_offsets)[3], @@ -557,13 +573,15 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } if (edge_partition.dcs_nzd_vertex_count() && (*(edge_partition.dcs_nzd_vertex_count()) > 0)) { raft::grid_1d_thread_t update_grid(*(edge_partition.dcs_nzd_vertex_count()), detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[3].write())->*[&](cudaStream_t stream, auto) { detail::transform_reduce_e_hypersparse - <<>>( + <<>>( edge_partition, edge_partition_src_value_input, edge_partition_dst_value_input, @@ -571,6 +589,7 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } } else { if (edge_partition.major_range_size() > 0) { @@ -578,8 +597,10 @@ T transform_reduce_e(raft::handle_t const& handle, detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[4].write())->*[&](cudaStream_t stream, auto ) { + detail::transform_reduce_e_low_degree - <<>>( + <<>>( edge_partition, edge_partition.major_range_first(), edge_partition.major_range_last(), @@ -589,10 +610,13 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } } } + cudastf_ctx.finalize(); + auto result = thrust::reduce( handle.get_thrust_policy(), get_dataframe_buffer_begin(result_buffer), From 0eb223a92498ca46fa997a25d8819fab2f481963 Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Sat, 18 Jan 2025 10:58:02 +0100 Subject: [PATCH 08/11] Use argument elision with logical_token --- cpp/src/prims/detail/transform_v_frontier_e.cuh | 10 +++++----- cpp/src/prims/transform_reduce_e.cuh | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/src/prims/detail/transform_v_frontier_e.cuh b/cpp/src/prims/detail/transform_v_frontier_e.cuh index 78064eb2c3d..b01e38e40e1 100644 --- a/cpp/src/prims/detail/transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/transform_v_frontier_e.cuh @@ -538,7 +538,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_block_t update_grid(high_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tv_buffers[0].write())->*[&](cudaStream_t stream, auto /* aggregate_value_buffer */) { + cudastf_ctx.task(l_tv_buffers[0].write())->*[&](cudaStream_t stream) { detail::transform_v_frontier_e_high_degree @@ -562,7 +562,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_warp_t update_grid(mid_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tv_buffers[1].write())->*[&](cudaStream_t stream, auto ) { + cudastf_ctx.task(l_tv_buffers[1].write())->*[&](cudaStream_t stream) { detail::transform_v_frontier_e_mid_degree <<>>( edge_partition, @@ -584,7 +584,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(low_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tv_buffers[2].write())->*[&](cudaStream_t stream, auto) { + cudastf_ctx.task(l_tv_buffers[2].write())->*[&](cudaStream_t stream) { detail::transform_v_frontier_e_hypersparse_or_low_degree <<>>( edge_partition, @@ -606,7 +606,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(hypersparse_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tv_buffers[3].write())->*[&](cudaStream_t stream, auto) { + cudastf_ctx.task(l_tv_buffers[3].write())->*[&](cudaStream_t stream) { detail::transform_v_frontier_e_hypersparse_or_low_degree <<>>( edge_partition, @@ -627,7 +627,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tv_buffers[4].write())->*[&,i](cudaStream_t stream, auto) { + cudastf_ctx.task(l_tv_buffers[4].write())->*[&,i](cudaStream_t stream) { detail::transform_v_frontier_e_hypersparse_or_low_degree <<>>( edge_partition, diff --git a/cpp/src/prims/transform_reduce_e.cuh b/cpp/src/prims/transform_reduce_e.cuh index 4ec97d4e921..8d21c075402 100644 --- a/cpp/src/prims/transform_reduce_e.cuh +++ b/cpp/src/prims/transform_reduce_e.cuh @@ -525,7 +525,7 @@ T transform_reduce_e(raft::handle_t const& handle, raft::grid_1d_block_t update_grid((*segment_offsets)[1], detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tr_buffers[0].write())->*[&](cudaStream_t stream, auto) { + cudastf_ctx.task(l_tr_buffers[0].write())->*[&](cudaStream_t stream) { detail::transform_reduce_e_high_degree <<>>( edge_partition, @@ -543,7 +543,7 @@ T transform_reduce_e(raft::handle_t const& handle, raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tr_buffers[1].write())->*[&](cudaStream_t stream, auto) { + cudastf_ctx.task(l_tr_buffers[1].write())->*[&](cudaStream_t stream) { detail::transform_reduce_e_mid_degree <<>>( edge_partition, @@ -561,7 +561,7 @@ T transform_reduce_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid((*segment_offsets)[3] - (*segment_offsets)[2], detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tr_buffers[2].write())->*[&](cudaStream_t stream, auto) { + cudastf_ctx.task(l_tr_buffers[2].write())->*[&](cudaStream_t stream) { detail::transform_reduce_e_low_degree <<>>( edge_partition, @@ -579,7 +579,7 @@ T transform_reduce_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(*(edge_partition.dcs_nzd_vertex_count()), detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tr_buffers[3].write())->*[&](cudaStream_t stream, auto) { + cudastf_ctx.task(l_tr_buffers[3].write())->*[&](cudaStream_t stream) { detail::transform_reduce_e_hypersparse <<>>( edge_partition, @@ -597,7 +597,7 @@ T transform_reduce_e(raft::handle_t const& handle, detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - cudastf_ctx.task(l_tr_buffers[4].write())->*[&](cudaStream_t stream, auto ) { + cudastf_ctx.task(l_tr_buffers[4].write())->*[&](cudaStream_t stream) { detail::transform_reduce_e_low_degree <<>>( From 22b54cce9a9ee189a0a246846cbde8d72cdc41fd Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Wed, 22 Jan 2025 11:14:13 +0100 Subject: [PATCH 09/11] More work (in progress) to enable only parts of the algorithms --- cpp/CMakeLists.txt | 315 ++++++++++-------- .../prims/detail/per_v_transform_reduce_e.cuh | 2 +- cpp/src/prims/vertex_frontier.cuh | 4 +- cpp/tests/CMakeLists.txt | 74 ++-- 4 files changed, 234 insertions(+), 161 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3422989c6bd..82cd16f4fb2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -58,6 +58,15 @@ option(USE_RAFT_STATIC "Build raft as a static library" OFF) option(CUGRAPH_COMPILE_RAFT_LIB "Compile the raft library instead of using it header-only" ON) option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF) +option(BUILD_CUGRAPH_COMPONENTS_ALGORITHMS "Enable components algorithms" ON) +option(BUILD_CUGRAPH_SAMPLING_ALGORITHMS "Enable sampling algorithms" ON) +option(BUILD_CUGRAPH_CENTRALITY_ALGORITHMS "Enable centrality algorithms" ON) +option(BUILD_CUGRAPH_COMMUNITY_ALGORITHMS "Enable community algorithms" ON) +option(BUILD_CUGRAPH_TRAVERSAL_ALGORITHMS "Enable traversal algorithms" ON) +option(BUILD_CUGRAPH_TREE_ALGORITHMS "Enable tree algorithms" ON) +option(BUILD_CUGRAPH_LINK_ANALYSIS_ALGORITHMS "Enable link analysis algorithms" ON) +option(BUILD_CUGRAPH_LINK_PREDICTION_ALGORITHMS "Enable link prediction algorithms" ON) + message(VERBOSE "CUGRAPH: CUDA_STATIC_RUNTIME=${CUDA_STATIC_RUNTIME}") ################################################################################ @@ -150,7 +159,6 @@ endif() # which should give us a better parallel schedule. set(CUGRAPH_SOURCES - src/utilities/shuffle_vertices_mg_v32_fp.cu src/utilities/shuffle_vertices_mg_v32_integral.cu src/utilities/shuffle_vertices_mg_v64_fp.cu src/utilities/shuffle_vertices_mg_v64_integral.cu @@ -166,25 +174,6 @@ set(CUGRAPH_SOURCES src/detail/groupby_and_count_mg_v64_e64.cu src/detail/collect_comm_wrapper_mg_v32_e32.cu src/detail/collect_comm_wrapper_mg_v64_e64.cu - src/sampling/detail/conversion_utilities.cu - src/sampling/random_walks_mg_v64_e64.cu - src/sampling/random_walks_mg_v32_e32.cu - src/community/detail/common_methods_mg_v64_e64.cu - src/community/detail/common_methods_mg_v32_e32.cu - src/community/detail/common_methods_sg_v64_e64.cu - src/community/detail/common_methods_sg_v32_e32.cu - src/community/detail/refine_sg_v64_e64.cu - src/community/detail/refine_sg_v32_e32.cu - src/community/detail/refine_mg_v64_e64.cu - src/community/detail/refine_mg_v32_e32.cu - src/community/edge_triangle_count_sg_v64_e64.cu - src/community/edge_triangle_count_sg_v32_e32.cu - src/community/edge_triangle_count_mg_v64_e64.cu - src/community/edge_triangle_count_mg_v32_e32.cu - src/community/detail/maximal_independent_moves_sg_v64_e64.cu - src/community/detail/maximal_independent_moves_sg_v32_e32.cu - src/community/detail/maximal_independent_moves_mg_v64_e64.cu - src/community/detail/maximal_independent_moves_mg_v32_e32.cu src/detail/utility_wrappers_32.cu src/detail/utility_wrappers_64.cu src/structure/graph_view_mg_v64_e64.cu @@ -197,83 +186,12 @@ set(CUGRAPH_SOURCES src/utilities/path_retrieval_sg_v64_e64.cu src/structure/legacy/graph.cu src/linear_assignment/legacy/hungarian.cu - src/link_prediction/jaccard_sg_v64_e64.cu - src/link_prediction/jaccard_sg_v32_e32.cu - src/link_prediction/sorensen_sg_v64_e64.cu - src/link_prediction/sorensen_sg_v32_e32.cu - src/link_prediction/overlap_sg_v64_e64.cu - src/link_prediction/overlap_sg_v32_e32.cu - src/link_prediction/cosine_sg_v64_e64.cu - src/link_prediction/cosine_sg_v32_e32.cu - src/link_prediction/jaccard_mg_v64_e64.cu - src/link_prediction/jaccard_mg_v32_e32.cu - src/link_prediction/sorensen_mg_v64_e64.cu - src/link_prediction/sorensen_mg_v32_e32.cu - src/link_prediction/overlap_mg_v64_e64.cu - src/link_prediction/overlap_mg_v32_e32.cu - src/link_prediction/cosine_mg_v64_e64.cu - src/link_prediction/cosine_mg_v32_e32.cu src/layout/legacy/force_atlas2.cu src/converters/legacy/COOtoCSR.cu - src/community/legacy/spectral_clustering.cu - src/community/louvain_sg_v64_e64.cu - src/community/louvain_sg_v32_e32.cu - src/community/louvain_mg_v64_e64.cu - src/community/louvain_mg_v32_e32.cu - src/community/leiden_sg_v64_e64.cu - src/community/leiden_sg_v32_e32.cu - src/community/leiden_mg_v64_e64.cu - src/community/leiden_mg_v32_e32.cu - src/community/ecg_sg_v64_e64.cu - src/community/ecg_sg_v32_e32.cu - src/community/ecg_mg_v64_e64.cu - src/community/ecg_mg_v32_e32.cu - src/community/egonet_sg_v64_e64.cu - src/community/egonet_sg_v32_e32.cu - src/community/egonet_mg_v64_e64.cu - src/community/egonet_mg_v32_e32.cu - src/community/k_truss_sg_v64_e64.cu - src/community/k_truss_sg_v32_e32.cu - src/community/k_truss_mg_v64_e64.cu - src/community/k_truss_mg_v32_e32.cu src/lookup/lookup_src_dst_mg_v32_e32.cu src/lookup/lookup_src_dst_mg_v64_e64.cu src/lookup/lookup_src_dst_sg_v32_e32.cu src/lookup/lookup_src_dst_sg_v64_e64.cu - src/sampling/random_walks_old_sg_v32_e32.cu - src/sampling/random_walks_old_sg_v64_e64.cu - src/sampling/random_walks_sg_v64_e64.cu - src/sampling/random_walks_sg_v32_e32.cu - src/sampling/detail/prepare_next_frontier_sg_v64_e64.cu - src/sampling/detail/prepare_next_frontier_sg_v32_e32.cu - src/sampling/detail/prepare_next_frontier_mg_v64_e64.cu - src/sampling/detail/prepare_next_frontier_mg_v32_e32.cu - src/sampling/detail/gather_one_hop_edgelist_sg_v64_e64.cu - src/sampling/detail/gather_one_hop_edgelist_sg_v32_e32.cu - src/sampling/detail/gather_one_hop_edgelist_mg_v64_e64.cu - src/sampling/detail/gather_one_hop_edgelist_mg_v32_e32.cu - src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu - src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu - src/sampling/detail/check_edge_bias_values_sg_v64_e64.cu - src/sampling/detail/check_edge_bias_values_sg_v32_e32.cu - src/sampling/detail/check_edge_bias_values_mg_v64_e64.cu - src/sampling/detail/check_edge_bias_values_mg_v32_e32.cu - src/sampling/detail/sample_edges_sg_v64_e64.cu - src/sampling/detail/sample_edges_sg_v32_e32.cu - src/sampling/detail/sample_edges_mg_v64_e64.cu - src/sampling/detail/sample_edges_mg_v32_e32.cu - src/sampling/detail/shuffle_and_organize_output_mg_v64_e64.cu - src/sampling/detail/shuffle_and_organize_output_mg_v32_e32.cu - src/sampling/neighbor_sampling_mg_v32_e32.cu - src/sampling/neighbor_sampling_mg_v64_e64.cu - src/sampling/neighbor_sampling_sg_v32_e32.cu - src/sampling/neighbor_sampling_sg_v64_e64.cu - src/sampling/negative_sampling_sg_v32_e32.cu - src/sampling/negative_sampling_sg_v64_e64.cu - src/sampling/negative_sampling_mg_v32_e32.cu - src/sampling/negative_sampling_mg_v64_e64.cu - src/sampling/sampling_post_processing_sg_v64_e64.cu - src/sampling/sampling_post_processing_sg_v32_e32.cu src/cores/core_number_sg_v64_e64.cu src/cores/core_number_sg_v32_e32.cu src/cores/core_number_mg_v64_e64.cu @@ -343,40 +261,6 @@ set(CUGRAPH_SOURCES src/structure/select_random_vertices_sg_v32_e32.cu src/structure/select_random_vertices_mg_v64_e64.cu src/structure/select_random_vertices_mg_v32_e32.cu - src/traversal/extract_bfs_paths_sg_v64_e64.cu - src/traversal/extract_bfs_paths_sg_v32_e32.cu - src/traversal/extract_bfs_paths_mg_v64_e64.cu - src/traversal/extract_bfs_paths_mg_v32_e32.cu - src/traversal/bfs_sg_v64_e64.cu - src/traversal/bfs_sg_v32_e32.cu - src/traversal/bfs_mg_v64_e64.cu - src/traversal/bfs_mg_v32_e32.cu - src/traversal/sssp_sg_v64_e64.cu - src/traversal/sssp_sg_v32_e32.cu - src/traversal/od_shortest_distances_sg_v64_e64.cu - src/traversal/od_shortest_distances_sg_v32_e32.cu - src/traversal/sssp_mg_v64_e64.cu - src/traversal/sssp_mg_v32_e32.cu - src/link_analysis/hits_sg_v64_e64.cu - src/link_analysis/hits_sg_v32_e32.cu - src/link_analysis/hits_mg_v64_e64.cu - src/link_analysis/hits_mg_v32_e32.cu - src/link_analysis/pagerank_sg_v64_e64.cu - src/link_analysis/pagerank_sg_v32_e32.cu - src/link_analysis/pagerank_mg_v64_e64.cu - src/link_analysis/pagerank_mg_v32_e32.cu - src/centrality/katz_centrality_sg_v64_e64.cu - src/centrality/katz_centrality_sg_v32_e32.cu - src/centrality/katz_centrality_mg_v64_e64.cu - src/centrality/katz_centrality_mg_v32_e32.cu - src/centrality/eigenvector_centrality_sg_v64_e64.cu - src/centrality/eigenvector_centrality_sg_v32_e32.cu - src/centrality/eigenvector_centrality_mg_v64_e64.cu - src/centrality/eigenvector_centrality_mg_v32_e32.cu - src/centrality/betweenness_centrality_sg_v64_e64.cu - src/centrality/betweenness_centrality_sg_v32_e32.cu - src/centrality/betweenness_centrality_mg_v64_e64.cu - src/centrality/betweenness_centrality_mg_v32_e32.cu src/tree/legacy/mst.cu src/from_cugraph_ops/sampling_index.cu src/components/weakly_connected_components_sg_v64_e64.cu @@ -407,18 +291,6 @@ set(CUGRAPH_SOURCES src/structure/symmetrize_edgelist_sg_v32_e32.cu src/structure/symmetrize_edgelist_mg_v64_e64.cu src/structure/symmetrize_edgelist_mg_v32_e32.cu - src/community/triangle_count_sg_v64_e64.cu - src/community/triangle_count_sg_v32_e32.cu - src/community/triangle_count_mg_v64_e64.cu - src/community/triangle_count_mg_v32_e32.cu - src/community/approx_weighted_matching_sg_v64_e64.cu - src/community/approx_weighted_matching_sg_v32_e32.cu - src/community/approx_weighted_matching_mg_v64_e64.cu - src/community/approx_weighted_matching_mg_v32_e32.cu - src/traversal/k_hop_nbrs_sg_v64_e64.cu - src/traversal/k_hop_nbrs_sg_v32_e32.cu - src/traversal/k_hop_nbrs_mg_v64_e64.cu - src/traversal/k_hop_nbrs_mg_v32_e32.cu src/mtmg/vertex_result_sg_v32_e32.cu src/mtmg/vertex_result_sg_v64_e64.cu src/mtmg/vertex_result_mg_v32_e32.cu @@ -429,6 +301,175 @@ set(CUGRAPH_SOURCES src/mtmg/vertex_pairs_result_mg_v64_e64.cu ) +if (BUILD_CUGRAPH_LINK_ANALYSIS_ALGORITHMS) +list(APPEND CUGRAPH_SOURCES + src/link_analysis/hits_sg_v64_e64.cu + src/link_analysis/hits_sg_v32_e32.cu + src/link_analysis/hits_mg_v64_e64.cu + src/link_analysis/hits_mg_v32_e32.cu + src/link_analysis/pagerank_sg_v64_e64.cu + src/link_analysis/pagerank_sg_v32_e32.cu + src/link_analysis/pagerank_mg_v64_e64.cu + src/link_analysis/pagerank_mg_v32_e32.cu +) +endif() # BUILD_CUGRAPH_LINK_ANALYSIS_ALGORITHMS + +if (BUILD_CUGRAPH_LINK_PREDICTION_ALGORITHMS) +list(APPEND CUGRAPH_SOURCES + src/link_prediction/jaccard_sg_v64_e64.cu + src/link_prediction/jaccard_sg_v32_e32.cu + src/link_prediction/sorensen_sg_v64_e64.cu + src/link_prediction/sorensen_sg_v32_e32.cu + src/link_prediction/overlap_sg_v64_e64.cu + src/link_prediction/overlap_sg_v32_e32.cu + src/link_prediction/cosine_sg_v64_e64.cu + src/link_prediction/cosine_sg_v32_e32.cu + src/link_prediction/jaccard_mg_v64_e64.cu + src/link_prediction/jaccard_mg_v32_e32.cu + src/link_prediction/sorensen_mg_v64_e64.cu + src/link_prediction/sorensen_mg_v32_e32.cu + src/link_prediction/overlap_mg_v64_e64.cu + src/link_prediction/overlap_mg_v32_e32.cu + src/link_prediction/cosine_mg_v64_e64.cu + src/link_prediction/cosine_mg_v32_e32.cu +) +endif() # BUILD_CUGRAPH_LINK_PREDICTION_ALGORITHMS + +if (BUILD_CUGRAPH_TRAVERSAL_ALGORITHMS) +list(APPEND CUGRAPH_SOURCES + src/traversal/extract_bfs_paths_sg_v64_e64.cu + src/traversal/extract_bfs_paths_sg_v32_e32.cu + src/traversal/extract_bfs_paths_mg_v64_e64.cu + src/traversal/extract_bfs_paths_mg_v32_e32.cu + src/traversal/bfs_sg_v64_e64.cu + src/traversal/bfs_sg_v32_e32.cu + src/traversal/bfs_mg_v64_e64.cu + src/traversal/bfs_mg_v32_e32.cu + src/traversal/sssp_sg_v64_e64.cu + src/traversal/sssp_sg_v32_e32.cu + src/traversal/od_shortest_distances_sg_v64_e64.cu + src/traversal/od_shortest_distances_sg_v32_e32.cu + src/traversal/sssp_mg_v64_e64.cu + src/traversal/sssp_mg_v32_e32.cu + src/traversal/k_hop_nbrs_sg_v64_e64.cu + src/traversal/k_hop_nbrs_sg_v32_e32.cu + src/traversal/k_hop_nbrs_mg_v64_e64.cu + src/traversal/k_hop_nbrs_mg_v32_e32.cu +) +endif() # BUILD_CUGRAPH_TRAVERSAL_ALGORITHMS + +if (BUILD_CUGRAPH_SAMPLING_ALGORITHMS) +list(APPEND CUGRAPH_SOURCES + src/sampling/random_walks_old_sg_v32_e32.cu + src/sampling/random_walks_old_sg_v64_e64.cu + src/sampling/random_walks_sg_v64_e64.cu + src/sampling/random_walks_sg_v32_e32.cu + src/sampling/detail/prepare_next_frontier_sg_v64_e64.cu + src/sampling/detail/prepare_next_frontier_sg_v32_e32.cu + src/sampling/detail/prepare_next_frontier_mg_v64_e64.cu + src/sampling/detail/prepare_next_frontier_mg_v32_e32.cu + src/sampling/detail/gather_one_hop_edgelist_sg_v64_e64.cu + src/sampling/detail/gather_one_hop_edgelist_sg_v32_e32.cu + src/sampling/detail/gather_one_hop_edgelist_mg_v64_e64.cu + src/sampling/detail/gather_one_hop_edgelist_mg_v32_e32.cu + src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu + src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu + src/sampling/detail/check_edge_bias_values_sg_v64_e64.cu + src/sampling/detail/check_edge_bias_values_sg_v32_e32.cu + src/sampling/detail/check_edge_bias_values_mg_v64_e64.cu + src/sampling/detail/check_edge_bias_values_mg_v32_e32.cu + src/sampling/detail/sample_edges_sg_v64_e64.cu + src/sampling/detail/sample_edges_sg_v32_e32.cu + src/sampling/detail/sample_edges_mg_v64_e64.cu + src/sampling/detail/sample_edges_mg_v32_e32.cu + src/sampling/detail/shuffle_and_organize_output_mg_v64_e64.cu + src/sampling/detail/shuffle_and_organize_output_mg_v32_e32.cu + src/sampling/neighbor_sampling_mg_v32_e32.cu + src/sampling/neighbor_sampling_mg_v64_e64.cu + src/sampling/neighbor_sampling_sg_v32_e32.cu + src/sampling/neighbor_sampling_sg_v64_e64.cu + src/sampling/negative_sampling_sg_v32_e32.cu + src/sampling/negative_sampling_sg_v64_e64.cu + src/sampling/negative_sampling_mg_v32_e32.cu + src/sampling/negative_sampling_mg_v64_e64.cu + src/sampling/sampling_post_processing_sg_v64_e64.cu + src/sampling/sampling_post_processing_sg_v32_e32.cu + src/sampling/detail/conversion_utilities.cu + src/sampling/random_walks_mg_v64_e64.cu + src/sampling/random_walks_mg_v32_e32.cu +) +endif() + +if (BUILD_CUGRAPH_CENTRALITY_ALGORITHMS) +list(APPEND CUGRAPH_SOURCES + src/centrality/katz_centrality_sg_v64_e64.cu + src/centrality/katz_centrality_sg_v32_e32.cu + src/centrality/katz_centrality_mg_v64_e64.cu + src/centrality/katz_centrality_mg_v32_e32.cu + src/centrality/eigenvector_centrality_sg_v64_e64.cu + src/centrality/eigenvector_centrality_sg_v32_e32.cu + src/centrality/eigenvector_centrality_mg_v64_e64.cu + src/centrality/eigenvector_centrality_mg_v32_e32.cu + src/centrality/betweenness_centrality_sg_v64_e64.cu + src/centrality/betweenness_centrality_sg_v32_e32.cu + src/centrality/betweenness_centrality_mg_v64_e64.cu + src/centrality/betweenness_centrality_mg_v32_e32.cu +) +endif() + +if (BUILD_CUGRAPH_COMMUNITY_ALGORITHMS) +list(APPEND CUGRAPH_SOURCES + src/community/detail/common_methods_mg_v64_e64.cu + src/community/detail/common_methods_mg_v32_e32.cu + src/community/detail/common_methods_sg_v64_e64.cu + src/community/detail/common_methods_sg_v32_e32.cu + src/community/detail/refine_sg_v64_e64.cu + src/community/detail/refine_sg_v32_e32.cu + src/community/detail/refine_mg_v64_e64.cu + src/community/detail/refine_mg_v32_e32.cu + src/community/edge_triangle_count_sg_v64_e64.cu + src/community/edge_triangle_count_sg_v32_e32.cu + src/community/edge_triangle_count_mg_v64_e64.cu + src/community/edge_triangle_count_mg_v32_e32.cu + src/community/detail/maximal_independent_moves_sg_v64_e64.cu + src/community/detail/maximal_independent_moves_sg_v32_e32.cu + src/community/detail/maximal_independent_moves_mg_v64_e64.cu + src/community/detail/maximal_independent_moves_mg_v32_e32.cu + src/community/legacy/spectral_clustering.cu + src/community/louvain_sg_v64_e64.cu + src/community/louvain_sg_v32_e32.cu + src/community/louvain_mg_v64_e64.cu + src/community/louvain_mg_v32_e32.cu + src/community/leiden_sg_v64_e64.cu + src/community/leiden_sg_v32_e32.cu + src/community/leiden_mg_v64_e64.cu + src/community/leiden_mg_v32_e32.cu + src/community/ecg_sg_v64_e64.cu + src/community/ecg_sg_v32_e32.cu + src/community/ecg_mg_v64_e64.cu + src/community/ecg_mg_v32_e32.cu + src/community/egonet_sg_v64_e64.cu + src/community/egonet_sg_v32_e32.cu + src/community/egonet_mg_v64_e64.cu + src/community/egonet_mg_v32_e32.cu + src/community/k_truss_sg_v64_e64.cu + src/community/k_truss_sg_v32_e32.cu + src/community/k_truss_mg_v64_e64.cu + src/community/k_truss_mg_v32_e32.cu + + src/community/triangle_count_sg_v64_e64.cu + src/community/triangle_count_sg_v32_e32.cu + src/community/triangle_count_mg_v64_e64.cu + src/community/triangle_count_mg_v32_e32.cu + src/community/approx_weighted_matching_sg_v64_e64.cu + src/community/approx_weighted_matching_sg_v32_e32.cu + src/community/approx_weighted_matching_mg_v64_e64.cu + src/community/approx_weighted_matching_mg_v32_e32.cu +) +endif() + + + add_library(cugraph ${CUGRAPH_SOURCES}) set_target_properties(cugraph diff --git a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh index 311b16e71ec..4914b7cbcbd 100644 --- a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh +++ b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh @@ -1610,7 +1610,7 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, edge_partition.major_range_first(), handle.get_stream()); assert((*key_segment_offsets).back() == *((*key_segment_offsets).rbegin() + 1)); - assert(sorted_uniue_nzd_key_last == sorted_unique_key_first + (*key_segment_offsets).back()); + assert(sorted_unique_nzd_key_last == sorted_unique_key_first + (*key_segment_offsets).back()); } } else { tmp_vertex_value_output_first = vertex_value_output_first; diff --git a/cpp/src/prims/vertex_frontier.cuh b/cpp/src/prims/vertex_frontier.cuh index 6e7d8515beb..8bfced42d5e 100644 --- a/cpp/src/prims/vertex_frontier.cuh +++ b/cpp/src/prims/vertex_frontier.cuh @@ -227,8 +227,8 @@ void retrieve_vertex_list_from_bitmap( { using vertex_t = typename thrust::iterator_traits::value_type; - assert((comm.get_rank() != root) || - (bitmap.size() >= packed_bool_size(vertex_range_last - vertex_ragne_first))); + //assert((comm.get_rank() != root) || + // (bitmap.size() >= packed_bool_size(vertex_range_last - vertex_ragne_first))); detail::copy_if_nosync(thrust::make_counting_iterator(vertex_range_first), thrust::make_counting_iterator(vertex_range_last), thrust::make_transform_iterator( diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 6800b9c4769..54b2f79a6cf 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -330,6 +330,7 @@ ConfigureTest(GRAPH_GENERATORS_TEST generators/generators_test.cpp) # - erdos renyi graph generator tests ------------------------------------------------------------- ConfigureTest(ERDOS_RENYI_GENERATOR_TEST generators/erdos_renyi_test.cpp) +if (BUILD_CUGRAPH_COMMUNITY_ALGORITHMS) ################################################################################################### # - LOUVAIN tests --------------------------------------------------------------------------------- ConfigureTest(LOUVAIN_TEST community/louvain_test.cpp) @@ -350,6 +351,8 @@ ConfigureTest(BALANCED_TEST community/balanced_edge_test.cpp) # - EGO tests ------------------------------------------------------------------------------------- ConfigureTest(EGONET_TEST community/egonet_test.cpp GPUS 1 PERCENT 75) +endif() # BUILD_CUGRAPH_COMMUNITY_ALGORITHMS + ################################################################################################### # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- ConfigureTest(LEGACY_FA2_TEST layout/legacy/force_atlas2_test.cu) @@ -422,6 +425,7 @@ ConfigureTest(INDUCED_SUBGRAPH_TEST structure/induced_subgraph_test.cpp) # - Temporal tests ------------------------------------------------------------------------------- ConfigureTest(TEMPORAL_GRAPH_TEST structure/temporal_graph_test.cpp) +if(BUILD_CUGRAPH_TRAVERSAL_ALGORITHMS) ################################################################################################### # - BFS tests ------------------------------------------------------------------------------------- ConfigureTest(BFS_TEST traversal/bfs_test.cpp) @@ -443,6 +447,9 @@ ConfigureTest(SSSP_TEST traversal/sssp_test.cpp) # - OD_SHORTEST_DISTANCES tests ------------------------------------------------------------------- ConfigureTest(OD_SHORTEST_DISTANCES_TEST traversal/od_shortest_distances_test.cpp) +endif() # BUILD_CUGRAPH_TRAVERSAL_ALGORITHMS + +if (BUILD_CUGRAPH_LINK_ANALYSIS_ALGORITHMS) ################################################################################################### # - HITS tests ------------------------------------------------------------------------------------ ConfigureTest(HITS_TEST link_analysis/hits_test.cpp) @@ -450,7 +457,9 @@ ConfigureTest(HITS_TEST link_analysis/hits_test.cpp) ################################################################################################### # - PAGERANK tests -------------------------------------------------------------------------------- ConfigureTest(PAGERANK_TEST link_analysis/pagerank_test.cpp) +endif () # BUILD_CUGRAPH_LINK_ANALYSIS_ALGORITHMS +if (BUILD_CUGRAPH_CENTRALITY_ALGORITHMS) ################################################################################################### # - KATZ_CENTRALITY tests ------------------------------------------------------------------------- ConfigureTest(KATZ_CENTRALITY_TEST centrality/katz_centrality_test.cpp) @@ -464,6 +473,8 @@ ConfigureTest(EIGENVECTOR_CENTRALITY_TEST centrality/eigenvector_centrality_test ConfigureTest(BETWEENNESS_CENTRALITY_TEST centrality/betweenness_centrality_test.cpp) ConfigureTest(EDGE_BETWEENNESS_CENTRALITY_TEST centrality/edge_betweenness_centrality_test.cpp) +endif() # BUILD_CUGRAPH_CENTRALITY_ALGORITHMS + ################################################################################################### # - WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------------- ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_components_test.cpp) @@ -478,6 +489,7 @@ target_include_directories(MIS_TEST PRIVATE "${CUGRAPH_SOURCE_DIR}/src") ConfigureTest(VERTEX_COLORING_TEST components/vertex_coloring_test.cu) target_include_directories(VERTEX_COLORING_TEST PRIVATE "${CUGRAPH_SOURCE_DIR}/src") +if (BUILD_CUGRAPH_LINK_PREDICTION_ALGORITHMS) ################################################################################################### # - SIMILARITY tests ------------------------------------------------------------------------------ ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cu) @@ -485,7 +497,9 @@ ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cu) ################################################################################################### # - WEIGHTED_SIMILARITY tests --------------------------------------------------------------------- ConfigureTest(WEIGHTED_SIMILARITY_TEST link_prediction/weighted_similarity_test.cpp) +endif() # BUILD_CUGRAPH_LINK_PREDICTION_ALGORITHMS +if (BUILD_CUGRAPH_SAMPLING_ALGORITHMS) ################################################################################################### # - RANDOM_WALKS tests ---------------------------------------------------------------------------- # FIXME: Rename to random_walks_test.cu once the legacy implementation is deleted @@ -531,6 +545,8 @@ ConfigureTest(SAMPLING_HETEROGENEOUS_POST_PROCESSING_TEST # - NEGATIVE SAMPLING tests -------------------------------------------------------------------- ConfigureTest(NEGATIVE_SAMPLING_TEST sampling/negative_sampling.cpp PERCENT 100) +endif() # BUILD_CUGRAPH_SAMPLING_ALGORITHMS + ################################################################################################### # - Renumber tests -------------------------------------------------------------------------------- ConfigureTest(RENUMBERING_TEST structure/renumbering_test.cpp) @@ -543,6 +559,7 @@ ConfigureTest(CORE_NUMBER_TEST cores/core_number_test.cpp) # - Core Number tests ----------------------------------------------------------------------------- ConfigureTest(K_CORE_TEST cores/k_core_test.cpp) +if (BUILD_CUGRAPH_COMMUNITY_ALGORITHMS) ################################################################################################### # - K-truss tests -------------------------------------------------------------------------- ConfigureTest(K_TRUSS_TEST community/k_truss_test.cpp) @@ -555,6 +572,8 @@ ConfigureTest(TRIANGLE_COUNT_TEST community/triangle_count_test.cpp) # - Edge Triangle Count tests --------------------------------------------------------------------- ConfigureTest(EDGE_TRIANGLE_COUNT_TEST community/edge_triangle_count_test.cpp) +endif() # BUILD_CUGRAPH_COMMUNITY_ALGORITHMS + ################################################################################################### # - EDGE SOURCE DESTINATION LOOKUP tests ---------------------------------------------------------- ConfigureTest(LOOKUP_SRC_DST_TEST lookup/lookup_src_dst_test.cpp) @@ -605,6 +624,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTestMG(MG_HAS_EDGE_AND_COMPUTE_MULTIPLICITY_TEST "structure/mg_has_edge_and_compute_multiplicity_test.cpp") +if (BUILD_CUGRAPH_LINK_ANALYSIS_ALGORITHMS) ############################################################################################### # - MG PAGERANK tests ------------------------------------------------------------------------- ConfigureTestMG(MG_PAGERANK_TEST link_analysis/mg_pagerank_test.cpp) @@ -612,7 +632,9 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG HITS tests ----------------------------------------------------------------------------- ConfigureTestMG(MG_HITS_TEST link_analysis/mg_hits_test.cpp) +endif() # BUILD_CUGRAPH_LINK_ANALYSIS_ALGORITHMS +if (BUILD_CUGRAPH_CENTRALITY_ALGORITHMS) ############################################################################################### # - MG KATZ CENTRALITY tests ------------------------------------------------------------------ ConfigureTestMG(MG_KATZ_CENTRALITY_TEST centrality/mg_katz_centrality_test.cpp) @@ -626,20 +648,9 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTestMG(MG_BETWEENNESS_CENTRALITY_TEST centrality/mg_betweenness_centrality_test.cpp) ConfigureTestMG(MG_EDGE_BETWEENNESS_CENTRALITY_TEST centrality/mg_edge_betweenness_centrality_test.cpp) +endif() # BUILD_CUGRAPH_CENTRALITY_ALGORITHMS - ############################################################################################### - # - MG BFS tests ------------------------------------------------------------------------------ - ConfigureTestMG(MG_BFS_TEST traversal/mg_bfs_test.cpp) - - ############################################################################################### - # - Extract BFS Paths tests ------------------------------------------------------------------- - ConfigureTestMG(MG_EXTRACT_BFS_PATHS_TEST - traversal/mg_extract_bfs_paths_test.cu) - - ############################################################################################### - # - MG SSSP tests ----------------------------------------------------------------------------- - ConfigureTestMG(MG_SSSP_TEST traversal/mg_sssp_test.cpp) - +if (BUILD_CUGRAPH_COMMUNITY_ALGORITHMS) ############################################################################################### # - MG LOUVAIN tests -------------------------------------------------------------------------- ConfigureTestMG(MG_LOUVAIN_TEST community/mg_louvain_test.cpp) @@ -656,10 +667,6 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG ECG tests -------------------------------------------------------------------------- ConfigureTestMG(MG_ECG_TEST community/mg_ecg_test.cpp) - ############################################################################################### - # - MG SELECT RANDOM VERTICES tests ----------------------------------------------------------- - ConfigureTestMG(MG_SELECT_RANDOM_VERTICES structure/mg_select_random_vertices_test.cpp) - ############################################################################################### # - MG LOUVAIN tests -------------------------------------------------------------------------- ConfigureTestMG(MG_EGONET_TEST community/mg_egonet_test.cu) @@ -672,6 +679,15 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG K-TRUSS tests -------------------------------------------------------------------------- ConfigureTestMG(MG_K_TRUSS_TEST community/mg_k_truss_test.cpp) + ############################################################################################### + # - MG TRIANGLE COUNT tests ------------------------------------------------------------------- + ConfigureTestMG(MG_TRIANGLE_COUNT_TEST community/mg_triangle_count_test.cpp) +endif() # BUILD_CUGRAPH_COMMUNITY_ALGORITHMS + + ############################################################################################### + # - MG SELECT RANDOM VERTICES tests ----------------------------------------------------------- + ConfigureTestMG(MG_SELECT_RANDOM_VERTICES structure/mg_select_random_vertices_test.cpp) + ############################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ ConfigureTestMG(MG_WEAKLY_CONNECTED_COMPONENTS_TEST @@ -697,10 +713,6 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG K Core tests --------------------------------------------------------------------------- ConfigureTestMG(MG_K_CORE_TEST cores/mg_k_core_test.cpp) - ############################################################################################### - # - MG TRIANGLE COUNT tests ------------------------------------------------------------------- - ConfigureTestMG(MG_TRIANGLE_COUNT_TEST community/mg_triangle_count_test.cpp) - ############################################################################################### # - MG coarsening tests ----------------------------------------------------------------------- ConfigureTestMG(MG_COARSEN_GRAPH_TEST structure/mg_coarsen_graph_test.cpp) @@ -782,6 +794,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTestMG(MG_PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION_TEST prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu) +if (BUILD_CUGRAPH_SAMPLING_ALGORITHMS) ############################################################################################### # - MG UNIFORM NBR SAMPLING tests ------------------------------------------------------------- ConfigureTestMG(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_uniform_neighbor_sampling.cpp) @@ -818,7 +831,9 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG RANDOM_WALKS tests --------------------------------------------------------------------- ConfigureTestMG(MG_RANDOM_WALKS_TEST sampling/mg_random_walks_test.cpp) +endif() # BUILD_CUGRAPH_SAMPLING_ALGORITHMS +if (BUILD_CUGRAPH_LINK_PREDICTION_ALGORITHMS) ############################################################################################### # - MG WEIGHTED_SIMILARITY tests -------------------------------------------------------------- ConfigureTestMG(MG_WEIGHTED_SIMILARITY_TEST link_prediction/mg_weighted_similarity_test.cpp) @@ -826,11 +841,28 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG SIMILARITY tests ----------------------------------------------------------------------- ConfigureTestMG(MG_SIMILARITY_TEST link_prediction/mg_similarity_test.cpp) +endif() # BUILD_CUGRAPH_LINK_PREDICTION_ALGORITHMS +if (BUILD_CUGRAPH_TRAVERSAL_ALGORITHMS) ############################################################################################### # - MG K_HOP_NBRS tests ----------------------------------------------------------------------- ConfigureTestMG(MG_K_HOP_NBRS_TEST traversal/mg_k_hop_nbrs_test.cpp) + ############################################################################################### + # - MG BFS tests ------------------------------------------------------------------------------ + ConfigureTestMG(MG_BFS_TEST traversal/mg_bfs_test.cpp) + + ############################################################################################### + # - Extract BFS Paths tests ------------------------------------------------------------------- + ConfigureTestMG(MG_EXTRACT_BFS_PATHS_TEST + traversal/mg_extract_bfs_paths_test.cu) + + ############################################################################################### + # - MG SSSP tests ----------------------------------------------------------------------------- + ConfigureTestMG(MG_SSSP_TEST traversal/mg_sssp_test.cpp) + +endif() # BUILD_CUGRAPH_TRAVERSAL_ALGORITHMS + ############################################################################################### # - MG C API tests ---------------------------------------------------------------------------- ConfigureCTestMG(MG_CAPI_CREATE_GRAPH_TEST c_api/mg_create_graph_test.c) From f96065050bacd29df5e8dae0ed5682094a4da1e6 Mon Sep 17 00:00:00 2001 From: Cedric Augonnet Date: Wed, 22 Jan 2025 16:26:34 +0100 Subject: [PATCH 10/11] Save WIP for page rank with STF --- .../prims/detail/per_v_transform_reduce_e.cuh | 87 ++++++++++++++----- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh index 4914b7cbcbd..4e1f220f863 100644 --- a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh +++ b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh @@ -65,6 +65,11 @@ #include #include +#include +#include + +using namespace cuda::experimental::stf; + namespace cugraph { namespace detail { @@ -1151,6 +1156,15 @@ void per_v_transform_reduce_e_edge_partition( std::optional> key_segment_offsets, std::optional> const& edge_partition_stream_pool_indices) { + async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); + stream_ctx cudastf_ctx(handle.get_stream(), cudastf_handle); + + logical_data output_tokens[4]; + for (size_t i = 0; i < 4; i++) + { + output_tokens[i] = cudastf_ctx.logical_token(); + } + constexpr bool use_input_key = !std::is_same_v; using vertex_t = typename GraphViewType::vertex_type; @@ -1174,10 +1188,13 @@ void per_v_transform_reduce_e_edge_partition( if constexpr (update_major && !use_input_key) { // this is necessary as we don't visit // every vertex in the hypersparse segment - thrust::fill(rmm::exec_policy_nosync(exec_stream), - output_buffer + (*key_segment_offsets)[3], - output_buffer + (*key_segment_offsets)[4], - major_init); + // TODO task write output_token[3] + cudastf_ctx.task(output_tokens[3].write())->*[=](cudaStream_t stream) { + thrust::fill(rmm::exec_policy_nosync(stream), + output_buffer + (*key_segment_offsets)[3], + output_buffer + (*key_segment_offsets)[4], + major_init); + }; } auto segment_size = use_input_key @@ -1187,8 +1204,9 @@ void per_v_transform_reduce_e_edge_partition( raft::grid_1d_thread_t update_grid(segment_size, detail::per_v_transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + size_t token_idx = 0; auto segment_output_buffer = output_buffer; - if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[3]; } + if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[3]; token_idx +=3; } auto segment_key_first = edge_partition_key_first; auto segment_key_last = edge_partition_key_last; if constexpr (use_input_key) { @@ -1199,20 +1217,22 @@ void per_v_transform_reduce_e_edge_partition( assert(segment_key_first == nullptr); assert(segment_key_last == nullptr); } - detail::per_v_transform_reduce_e_hypersparse - <<>>( - edge_partition, - segment_key_first, - segment_key_last, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - segment_output_buffer, - e_op, - major_init, - reduce_op, - pred_op); + cudastf_ctx.task(output_tokens[token_idx].rw())->*[=](cudaStream_t stream) { + detail::per_v_transform_reduce_e_hypersparse + <<>>( + edge_partition, + segment_key_first, + segment_key_last, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + segment_output_buffer, + e_op, + major_init, + reduce_op, + pred_op); + }; } } if ((*key_segment_offsets)[3] - (*key_segment_offsets)[2]) { @@ -1223,8 +1243,9 @@ void per_v_transform_reduce_e_edge_partition( raft::grid_1d_thread_t update_grid((*key_segment_offsets)[3] - (*key_segment_offsets)[2], detail::per_v_transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + size_t token_idx = 0; auto segment_output_buffer = output_buffer; - if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[2]; } + if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[2]; token_idx += 2; } std::optional segment_key_first{}; // std::optional as thrust::transform_iterator's default constructor // is a deleted function, segment_key_first should always have a value @@ -1234,8 +1255,10 @@ void per_v_transform_reduce_e_edge_partition( segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); } *segment_key_first += (*key_segment_offsets)[2]; + + cudastf_ctx.task(output_tokens[token_idx].rw())->*[=](cudaStream_t stream) { detail::per_v_transform_reduce_e_low_degree - <<>>( + <<>>( edge_partition, *segment_key_first, *segment_key_first + ((*key_segment_offsets)[3] - (*key_segment_offsets)[2]), @@ -1248,6 +1271,7 @@ void per_v_transform_reduce_e_edge_partition( major_init, reduce_op, pred_op); + }; } if ((*key_segment_offsets)[2] - (*key_segment_offsets)[1] > 0) { auto exec_stream = edge_partition_stream_pool_indices @@ -1257,8 +1281,9 @@ void per_v_transform_reduce_e_edge_partition( raft::grid_1d_warp_t update_grid((*key_segment_offsets)[2] - (*key_segment_offsets)[1], detail::per_v_transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + size_t token_idx = 0; auto segment_output_buffer = output_buffer; - if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[1]; } + if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[1]; token_idx += 1;} std::optional segment_key_first{}; // std::optional as thrust::transform_iterator's default constructor // is a deleted function, segment_key_first should always have a value @@ -1268,8 +1293,10 @@ void per_v_transform_reduce_e_edge_partition( segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); } *segment_key_first += (*key_segment_offsets)[1]; + + cudastf_ctx.task(output_tokens[token_idx].rw())->*[=](cudaStream_t stream) { detail::per_v_transform_reduce_e_mid_degree - <<>>( + <<>>( edge_partition, *segment_key_first, *segment_key_first + ((*key_segment_offsets)[2] - (*key_segment_offsets)[1]), @@ -1283,6 +1310,7 @@ void per_v_transform_reduce_e_edge_partition( major_identity_element, reduce_op, pred_op); + }; } if ((*key_segment_offsets)[1] > 0) { auto exec_stream = edge_partition_stream_pool_indices @@ -1303,8 +1331,9 @@ void per_v_transform_reduce_e_edge_partition( } else { segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); } + cudastf_ctx.task(output_tokens[0].rw())->*[=](cudaStream_t stream) { detail::per_v_transform_reduce_e_high_degree - <<>>( + <<>>( edge_partition, *segment_key_first, *segment_key_first + (*key_segment_offsets)[1], @@ -1318,6 +1347,7 @@ void per_v_transform_reduce_e_edge_partition( major_identity_element, reduce_op, pred_op); + }; } } else { auto exec_stream = edge_partition_stream_pool_indices @@ -1361,6 +1391,8 @@ void per_v_transform_reduce_e_edge_partition( pred_op); } } + + cudastf_ctx.finalize(); } template Date: Wed, 22 Jan 2025 16:41:57 +0100 Subject: [PATCH 11/11] use async resources saved in raft handles to initialize the stream ctx --- cpp/src/prims/detail/extract_transform_v_frontier_e.cuh | 4 +++- cpp/src/prims/detail/transform_v_frontier_e.cuh | 5 +++-- cpp/src/prims/transform_reduce_e.cuh | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index 93823851843..5091061c9ca 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -60,6 +60,7 @@ #include #include +#include using namespace cuda::experimental::stf; @@ -706,7 +707,8 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, constexpr bool try_bitmap = GraphViewType::is_multi_gpu && std::is_same_v && KeyBucketType::is_sorted_unique; - stream_ctx cudastf_ctx(handle.get_stream()); + async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); + stream_ctx cudastf_ctx(handle.get_stream(), cudastf_handle); if (do_expensive_check) { auto frontier_vertex_first = diff --git a/cpp/src/prims/detail/transform_v_frontier_e.cuh b/cpp/src/prims/detail/transform_v_frontier_e.cuh index b01e38e40e1..23e00930dcf 100644 --- a/cpp/src/prims/detail/transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/transform_v_frontier_e.cuh @@ -37,6 +37,7 @@ #include #include +#include using namespace cuda::experimental::stf; @@ -415,8 +416,8 @@ auto transform_v_frontier_e(raft::handle_t const& handle, typename EdgeValueInputWrapper::value_iterator, typename EdgeValueInputWrapper::value_type>>; -// cudastf::async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); - stream_ctx cudastf_ctx(handle.get_stream()); + async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); + stream_ctx cudastf_ctx(handle.get_stream(), cudastf_handle); auto edge_mask_view = graph_view.edge_mask_view(); diff --git a/cpp/src/prims/transform_reduce_e.cuh b/cpp/src/prims/transform_reduce_e.cuh index 8d21c075402..f1fd0742946 100644 --- a/cpp/src/prims/transform_reduce_e.cuh +++ b/cpp/src/prims/transform_reduce_e.cuh @@ -48,6 +48,7 @@ #include #include +#include using namespace cuda::experimental::stf; @@ -474,7 +475,8 @@ T transform_reduce_e(raft::handle_t const& handle, // currently, nothing to do } - stream_ctx cudastf_ctx(handle.get_stream()); + async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); + stream_ctx cudastf_ctx(handle.get_stream(), cudastf_handle); property_op edge_property_add{};