diff --git a/include/CLUEstering/CLUE/CLUEAlpakaKernels.hpp b/include/CLUEstering/CLUE/CLUEAlpakaKernels.hpp index 42201f5d..722ad076 100644 --- a/include/CLUEstering/CLUE/CLUEAlpakaKernels.hpp +++ b/include/CLUEstering/CLUE/CLUEAlpakaKernels.hpp @@ -19,10 +19,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { constexpr int32_t reserve{1000000}; template - using PointsView = typename PointsAlpaka::PointsAlpakaView; - - template - ALPAKA_FN_ACC void getCoords(float* coords, PointsView* d_points, uint32_t i) { + ALPAKA_FN_ACC void getCoords(float* coords, PointsAlpakaView* d_points, uint32_t i) { for (auto dim = 0; dim < Ndim; ++dim) { coords[dim] = d_points->coords[i + dim * d_points->n]; } @@ -55,7 +52,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { struct KernelFillTiles { template ALPAKA_FN_ACC void operator()(const TAcc& acc, - PointsView* points, + PointsAlpakaView* points, TilesAlpaka* tiles, uint32_t n_points) const { for (auto index : alpaka::uniformElements(acc, n_points)) { @@ -72,7 +69,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { VecArray& base_vec, const VecArray, Ndim>& search_box, TilesAlpaka* tiles, - PointsView* dev_points, + PointsAlpakaView* dev_points, const KernelType& kernel, /* const VecArray, Ndim>& domains, */ const float* coords_i, @@ -128,7 +125,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { template ALPAKA_FN_ACC void operator()(const TAcc& acc, TilesAlpaka* dev_tiles, - PointsView* dev_points, + PointsAlpakaView* dev_points, const KernelType& kernel, /* const VecArray, Ndim>& domains, */ float dc, @@ -175,7 +172,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { VecArray& base_vec, const VecArray, Ndim>& s_box, TilesAlpaka* tiles, - PointsView* dev_points, + PointsAlpakaView* dev_points, /* const VecArray, Ndim>& domains, */ const float* coords_i, float rho_i, @@ -242,7 +239,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { template ALPAKA_FN_ACC void operator()(const TAcc& acc, TilesAlpaka* dev_tiles, - PointsView* dev_points, + PointsAlpakaView* dev_points, /* const VecArray, Ndim>& domains, */ float dm, float, @@ -294,7 +291,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { ALPAKA_FN_ACC void operator()(const TAcc& acc, VecArray* seeds, VecArray* followers, - PointsView* dev_points, + PointsAlpakaView* dev_points, float dm, float d_c, float rho_c, @@ -329,7 +326,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { ALPAKA_FN_ACC void operator()(const TAcc& acc, VecArray* seeds, VecArray* followers, - PointsView* dev_points) const { + PointsAlpakaView* dev_points) const { const auto& seeds_0{*seeds}; const auto n_seeds{seeds_0.size()}; for (auto idx_cls : alpaka::uniformElements(acc, n_seeds)) { diff --git a/include/CLUEstering/CLUEstering.hpp b/include/CLUEstering/CLUEstering.hpp index 54d69c3b..977797a2 100644 --- a/include/CLUEstering/CLUEstering.hpp +++ b/include/CLUEstering/CLUEstering.hpp @@ -35,11 +35,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { template void make_clusters(PointsSoA& h_points, - PointsAlpaka& d_points, const KernelType& kernel, Queue queue_, std::size_t block_size); + template + void make_clusters(PointsSoA& h_points, + PointsAlpaka& dev_points, + const KernelType& kernel, + Queue queue_, + std::size_t block_size); + + std::map> getClusters(const PointsSoA& h_points); + private: float dc_; float rhoc_; @@ -54,11 +62,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { std::optional>> d_seeds; std::optional[]>> d_followers; + std::optional> d_points; // Private methods void init_device(Queue queue_); void setup(const PointsSoA& h_points, - PointsAlpaka& d_points, + PointsAlpaka& dev_points, Queue queue_, std::size_t block_size); @@ -105,7 +114,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { template void CLUEAlgoAlpaka::setup(const PointsSoA& h_points, - PointsAlpaka& d_points, + PointsAlpaka& dev_points, Queue queue_, std::size_t block_size) { // calculate the number of tiles and their size @@ -133,7 +142,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { const auto copyExtent = (Ndim + 1) * h_points.nPoints(); alpaka::memcpy(queue_, - d_points.input_buffer, + dev_points.input_buffer, clue::make_host_view(h_points.coords(), copyExtent), copyExtent); alpaka::memset(queue_, *d_seeds, 0x00); @@ -151,11 +160,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { template template void CLUEAlgoAlpaka::make_clusters(PointsSoA& h_points, - PointsAlpaka& d_points, const KernelType& kernel, Queue queue_, std::size_t block_size) { - setup(h_points, d_points, queue_, block_size); + d_points = PointsAlpaka(queue_, h_points.nPoints()); + auto& dev_points = *d_points; + make_clusters(h_points, dev_points, kernel, queue_, block_size); + } + + template + template + void CLUEAlgoAlpaka::make_clusters(PointsSoA& h_points, + PointsAlpaka& dev_points, + const KernelType& kernel, + Queue queue_, + std::size_t block_size) { + setup(h_points, dev_points, queue_, block_size); const auto nPoints = h_points.nPoints(); @@ -164,13 +184,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { alpaka::enqueue( queue_, alpaka::createTaskKernel( - working_div, KernelFillTiles{}, d_points.view(), m_tiles, nPoints)); + working_div, KernelFillTiles{}, dev_points.view(), m_tiles, nPoints)); alpaka::enqueue(queue_, alpaka::createTaskKernel(working_div, KernelCalculateLocalDensity{}, m_tiles, - d_points.view(), + dev_points.view(), kernel, /* m_domains.data(), */ dc_, @@ -179,7 +199,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { alpaka::createTaskKernel(working_div, KernelCalculateNearestHigher{}, m_tiles, - d_points.view(), + dev_points.view(), /* m_domains.data(), */ dm_, dc_, @@ -189,7 +209,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { KernelFindClusters{}, m_seeds, m_followers, - d_points.view(), + dev_points.view(), dm_, dc_, rhoc_, @@ -204,7 +224,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { KernelAssignClusters{}, m_seeds, m_followers, - d_points.view())); + dev_points.view())); // Wait for all the operations in the queue to finish alpaka::wait(queue_); @@ -213,23 +233,35 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { #ifdef DEBUG alpaka::memcpy(queue_, clue::make_host_view(h_points.debugInfo().rho.data(), nPoints), - clue::make_device_view(device, d_points.view()->rho, nPoints)); + clue::make_device_view(device, dev_points.view()->rho, nPoints)); alpaka::memcpy(queue_, clue::make_host_view(h_points.debugInfo().rho.data(), nPoints), - clue::make_device_view(device, d_points.view()->delta, nPoints)); + clue::make_device_view(device, dev_points.view()->delta, nPoints)); alpaka::memcpy( queue_, clue::make_host_view(h_points.debugInfo().nearestHigher.data(), nPoints), - clue::make_device_view(device, d_points.view()->nearest_higher, nPoints)); + clue::make_device_view(device, dev_points.view()->nearest_higher, nPoints)); #endif alpaka::memcpy(queue_, clue::make_host_view(h_points.clusterIndexes(), 2 * nPoints), clue::make_device_view( - device, d_points.result_buffer.data() + nPoints, 2 * nPoints), + device, dev_points.result_buffer.data() + nPoints, 2 * nPoints), 2 * nPoints); // Wait for all the operations in the queue to finish alpaka::wait(queue_); } + + template + std::map> CLUEAlgoAlpaka::getClusters( + const PointsSoA& h_points) { + // cluster all points with same clusterId + std::map> clusters; + for (size_t i = 0; i < h_points.nPoints(); i++) { + clusters[h_points.clusterIndexes()[i]].push_back(i); + } + return clusters; + } + } // namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE diff --git a/include/CLUEstering/DataFormats/alpaka/PointsAlpaka.hpp b/include/CLUEstering/DataFormats/alpaka/PointsAlpaka.hpp index b0bdef7f..7398ba8f 100644 --- a/include/CLUEstering/DataFormats/alpaka/PointsAlpaka.hpp +++ b/include/CLUEstering/DataFormats/alpaka/PointsAlpaka.hpp @@ -10,6 +10,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { + class PointsAlpakaView { + public: + float* coords; + float* weight; + float* rho; + float* delta; + int* nearest_higher; + int* cluster_index; + int* is_seed; + int n; + }; + template class PointsAlpaka { public: @@ -40,18 +52,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE { clue::device_buffer input_buffer; clue::device_buffer result_buffer; - class PointsAlpakaView { - public: - float* coords; - float* weight; - float* rho; - float* delta; - int* nearest_higher; - int* cluster_index; - int* is_seed; - int n; - }; - PointsAlpakaView* view() { return view_dev.data(); } private: