Skip to content

Commit

Permalink
Replace outlier-delta-factor parameter with dm (cms-patatrack#56)
Browse files Browse the repository at this point in the history
* Substitute outlier delta factor with `dm` parameter

* Update parameters in tests

* Set dm equal to dc by default

* Update package version

* Update parameters in test

* Fix typo

* Update readme
  • Loading branch information
sbaldu authored Sep 9, 2024
1 parent 7410770 commit 09ff2e6
Show file tree
Hide file tree
Showing 21 changed files with 129 additions and 129 deletions.
36 changes: 21 additions & 15 deletions CLUEstering/CLUEstering.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,14 @@ class clusterer:
Points with a density lower than rhoc can't be seeds, can only be followers
or outliers.
outlier : float
Multiplicative increment of dc_ for getting the region over which the followers of a
point are searched.
While dc_ determines the size of the search box in which the neighbors of a point are
searched when calculating its local density, when looking for followers while trying
to find potential seeds the size of the search box is given by dm = dc_ * outlier.
dm: float
Similar to dc, it's a spatial parameter that determines the region over
which the followers of a point are searched.
While dc_ determines the size of the search box in which the neighbors
of a point are searched when calculating its local density, when
looking for followers while trying to find potential seeds the size of
the search box is given by dm.
ppbin : int
Average number of points to be found in each tile.
kernel : Algo.kernel
Expand All @@ -243,10 +244,12 @@ class clusterer:
Execution time of the algorithm, expressed in nanoseconds.
"""

def __init__(self, dc_: float, rhoc_: float, outlier_: float, ppbin: int = 10):
def __init__(self, dc_: float, rhoc_: float, dm_: [float, None] = None, ppbin: int = 10):
self.dc_ = dc_
self.rhoc = rhoc_
self.outlier = outlier_
self.dm = dm_
if dm_ is None:
self.dm = dc_
self.ppbin = ppbin

# Initialize attributes
Expand All @@ -261,10 +264,13 @@ def __init__(self, dc_: float, rhoc_: float, outlier_: float, ppbin: int = 10):
self.elapsed_time = 0.

def set_params(self, dc: float, rhoc: float,
outlier: float, ppbin: int = 128) -> None:
dm: [float, None], ppbin: int = 128) -> None:
self.dc_ = dc
self.rhoc = rhoc
self.outlier = outlier
if dm is not None:
self.dm = dm
else:
self.dm = dc
self.ppbin = ppbin

def _read_array(self, input_data: Union[list, np.ndarray]) -> None:
Expand Down Expand Up @@ -673,12 +679,12 @@ def run_clue(self,
data = self._partial_dimension_dataset(dimensions)
start = time.time_ns()
if backend == "cpu serial":
cluster_id_is_seed = cpu_serial.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin,
cluster_id_is_seed = cpu_serial.mainRun(self.dc_, self.rhoc, self.dm, self.ppbin,
data, self.clust_data.weight, self.kernel,
self.clust_data.n_dim, block_size, device_id)
elif backend == "cpu tbb":
if tbb_found:
cluster_id_is_seed = cpu_tbb.mainRun(self.dc_, self.rhoc, self.outlier,
cluster_id_is_seed = cpu_tbb.mainRun(self.dc_, self.rhoc, self.dm,
self.ppbin, data, self.clust_data.weight,
self.kernel, self.clust_data.n_dim, block_size,
device_id)
Expand All @@ -687,7 +693,7 @@ def run_clue(self,

elif backend == "gpu cuda":
if cuda_found:
cluster_id_is_seed = gpu_cuda.mainRun(self.dc_, self.rhoc, self.outlier,
cluster_id_is_seed = gpu_cuda.mainRun(self.dc_, self.rhoc, self.dm,
self.ppbin, data, self.clust_data.weight,
self.kernel, self.clust_data.n_dim, block_size,
device_id)
Expand All @@ -696,7 +702,7 @@ def run_clue(self,

elif backend == "gpu hip":
if hip_found:
cluster_id_is_seed = gpu_hip.mainRun(self.dc_, self.rhoc, self.outlier,
cluster_id_is_seed = gpu_hip.mainRun(self.dc_, self.rhoc, self.dm,
self.ppbin, data, self.clust_data.weight,
self.kernel, self.clust_data.n_dim, block_size,
device_id)
Expand Down
8 changes: 4 additions & 4 deletions CLUEstering/alpaka/AlpakaCore/HostOnlyTask.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ namespace alpaka {
(*pTask)();
}

ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue, HostOnlyTask task)
-> void {
ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue,
HostOnlyTask task) -> void {
auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
cudaStreamAddCallback(alpaka::getNativeHandle(queue),
Expand All @@ -58,8 +58,8 @@ namespace alpaka {
(*pTask)();
}

ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, HostOnlyTask task)
-> void {
ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue,
HostOnlyTask task) -> void {
auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
hipStreamAddCallback(alpaka::getNativeHandle(queue),
Expand Down
22 changes: 11 additions & 11 deletions CLUEstering/alpaka/BindingModules/binding_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace alpaka_serial_sync {
template <typename Kernel>
std::vector<std::vector<int>> mainRun(float dc,
float rhoc,
float outlier,
float dm,
int pPBin,
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
Expand All @@ -43,34 +43,34 @@ namespace alpaka_serial_sync {
switch (Ndim) {
[[unlikely]] case (1):
return run<1, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[likely]] case (2):
return run<2, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[likely]] case (3):
return run<3, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (4):
return run<4, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (5):
return run<5, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (6):
return run<6, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (7):
return run<7, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (8):
return run<8, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (9):
return run<9, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (10):
return run<10, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] default:
std::cout << "This library only works up to 10 dimensions\n";
return {};
Expand Down
22 changes: 11 additions & 11 deletions CLUEstering/alpaka/BindingModules/binding_cpu_tbb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace alpaka_tbb_async {
template <typename Kernel>
std::vector<std::vector<int>> mainRun(float dc,
float rhoc,
float outlier,
float dm,
int pPBin,
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
Expand All @@ -43,34 +43,34 @@ namespace alpaka_tbb_async {
switch (Ndim) {
[[unlikely]] case (1):
return run<1, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[likely]] case (2):
return run<2, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[likely]] case (3):
return run<3, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (4):
return run<4, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (5):
return run<5, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (6):
return run<6, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (7):
return run<7, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (8):
return run<8, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (9):
return run<9, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (10):
return run<10, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] default:
std::cout << "This library only works up to 10 dimensions\n";
return {};
Expand Down
22 changes: 11 additions & 11 deletions CLUEstering/alpaka/BindingModules/binding_gpu_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace alpaka_cuda_async {
template <typename Kernel>
std::vector<std::vector<int>> mainRun(float dc,
float rhoc,
float outlier,
float dm,
int pPBin,
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
Expand All @@ -44,34 +44,34 @@ namespace alpaka_cuda_async {
switch (Ndim) {
[[unlikely]] case (1):
return run<1, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[likely]] case (2):
return run<2, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[likely]] case (3):
return run<3, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (4):
return run<4, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (5):
return run<5, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (6):
return run<6, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (7):
return run<7, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (8):
return run<8, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (9):
return run<9, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (10):
return run<10, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] default:
std::cout << "This library only works up to 10 dimensions\n";
return {};
Expand Down
22 changes: 11 additions & 11 deletions CLUEstering/alpaka/BindingModules/binding_gpu_hip.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace alpaka_rocm_async {
template <typename Kernel>
std::vector<std::vector<int>> mainRun(float dc,
float rhoc,
float outlier,
float dm,
int pPBin,
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
Expand All @@ -42,34 +42,34 @@ namespace alpaka_rocm_async {
switch (Ndim) {
[[unlikely]] case (1):
return run<1, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[likely]] case (2):
return run<2, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[likely]] case (3):
return run<3, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (4):
return run<4, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (5):
return run<5, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (6):
return run<6, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (7):
return run<7, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (8):
return run<8, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (9):
return run<9, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] case (10):
return run<10, Kernel>(
dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
[[unlikely]] default:
std::cout << "This library only works up to 10 dimensions\n";
return {};
Expand Down
14 changes: 5 additions & 9 deletions CLUEstering/alpaka/CLUE/CLUEAlgoAlpaka.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
class CLUEAlgoAlpaka {
public:
CLUEAlgoAlpaka() = delete;
explicit CLUEAlgoAlpaka(
float dc, float rhoc, float outlierDeltaFactor, int pPBin, Queue queue_)
: dc_{dc},
rhoc_{rhoc},
outlierDeltaFactor_{outlierDeltaFactor},
pointsPerTile_{pPBin} {
explicit CLUEAlgoAlpaka(float dc, float rhoc, float dm, int pPBin, Queue queue_)
: dc_{dc}, rhoc_{rhoc}, dm_{dm}, pointsPerTile_{pPBin} {
init_device(queue_);
}

Expand All @@ -51,7 +47,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
private:
float dc_;
float rhoc_;
float outlierDeltaFactor_;
float dm_;
// average number of points found in a tile
int pointsPerTile_;

Expand Down Expand Up @@ -207,7 +203,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
m_tiles,
d_points.view(),
/* m_domains.data(), */
outlierDeltaFactor_,
dm_,
dc_,
h_points.n));

Expand All @@ -217,7 +213,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
m_seeds,
m_followers,
d_points.view(),
outlierDeltaFactor_,
dm_,
dc_,
rhoc_,
h_points.n));
Expand Down
Loading

0 comments on commit 09ff2e6

Please sign in to comment.