diff --git a/cpp/src/groupby/hash/compute_aggregations.cuh b/cpp/src/groupby/hash/compute_aggregations.cuh index 9c9a4c97bff..df8fcf4690f 100644 --- a/cpp/src/groupby/hash/compute_aggregations.cuh +++ b/cpp/src/groupby/hash/compute_aggregations.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,13 +67,17 @@ rmm::device_uvector compute_aggregations( auto const grid_size = max_occupancy_grid_size>(num_rows); auto const available_shmem_size = get_available_shared_memory_size(grid_size); - auto const has_sufficient_shmem = - available_shmem_size > (compute_shmem_offsets_size(flattened_values.num_columns()) * 2); - auto const has_dictionary_request = std::any_of( - requests.begin(), requests.end(), [](cudf::groupby::aggregation_request const& request) { - return cudf::is_dictionary(request.values.type()); + auto const offsets_buffer_size = compute_shmem_offsets_size(flattened_values.num_columns()) * 2; + auto const data_buffer_size = available_shmem_size - offsets_buffer_size; + auto const is_shared_memory_compatible = std::all_of( + requests.begin(), requests.end(), [&](cudf::groupby::aggregation_request const& request) { + if (cudf::is_dictionary(request.values.type())) { return false; } + // Ensure there is enough buffer space to store local aggregations up to the max cardinality + // for shared memory aggregations + auto const size = cudf::type_dispatcher(request.values.type(), + size_of_functor{}); + return static_cast(data_buffer_size) >= (size * GROUPBY_CARDINALITY_THRESHOLD); }); - auto const is_shared_memory_compatible = !has_dictionary_request and has_sufficient_shmem; // Performs naive global memory aggregations when the workload is not compatible with shared // memory, such as when aggregating dictionary columns or when there is insufficient dynamic diff --git a/cpp/src/groupby/hash/compute_shared_memory_aggs.cu b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu index f0361ccced2..ae7584da483 100644 --- a/cpp/src/groupby/hash/compute_shared_memory_aggs.cu +++ b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,15 +35,6 @@ namespace cudf::groupby::detail::hash { namespace { -/// Functor used by type dispatcher returning the size of the underlying C++ type -struct size_of_functor { - template - __device__ constexpr cudf::size_type operator()() - { - return sizeof(T); - } -}; - /// Shared memory data alignment CUDF_HOST_DEVICE cudf::size_type constexpr ALIGNMENT = 8; diff --git a/cpp/src/groupby/hash/single_pass_functors.cuh b/cpp/src/groupby/hash/single_pass_functors.cuh index 048c9252773..c02087072a0 100644 --- a/cpp/src/groupby/hash/single_pass_functors.cuh +++ b/cpp/src/groupby/hash/single_pass_functors.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,15 @@ #include namespace cudf::groupby::detail::hash { +/// Functor used by type dispatcher returning the size of the underlying C++ type +struct size_of_functor { + template + CUDF_HOST_DEVICE constexpr cudf::size_type operator()() + { + return sizeof(T); + } +}; + // TODO: TO BE REMOVED issue tracked via #17171 template __device__ constexpr bool is_supported()